# Learning Multiprocessing

# Setup

In [None]:
import sys
!{sys.executable} -m pip install pandas requests tqdm ratelimit --user

# Before Multiprocessing

In [13]:
import requests as req
import timeit
import time
import pandas as pd
from IPython.display import Image, HTML
import random
from tqdm import tqdm
from ratelimit import limits, sleep_and_retry



## Rate limit to help with overcalling
## pokemon api is 100 calls per 60 seconds max
@sleep_and_retry
@limits(calls=100, period=60)
def call_api(url):
    response = req.get(url)

    if response.status_code == 404:
        return 'Not Found'
    if response.status_code != 200:
        print('here', status_code, url)
        raise Exception('API response: {}'.format(response.status_code))
    return response


API_POKEMON = 'https://pokeapi.co/api/v2/pokemon/{pokemon}'

def get_number_pokemon():
    res = req.get(API_POKEMON.format(pokemon=''))
    number_pokemon = res.json()['count']
    res_url = call_api(API_POKEMON.format(pokemon='?offset=0&limit={limit}'.format(limit=str(number_pokemon))))
    pokemon_links_values = [link['url'] for link in res_url.json()['results']]
    return pokemon_links_values

def get_pokemon(link=''):
    
    info = None
    resolved = False
    
    try:
        while not resolved:
            

            res = None
            tooManyCalls = False

            try:
                res = call_api(link)
                if res == 'Not Found':
                    resolved = True
                    break
            except Exception as e:
                print(e)
                if e == 'too many calls':
                    tooManyCalls =True
            if tooManyCalls:
                time.sleep(60)
                    
            elif res.status_code < 300:

                pokemon_info = res.json()

                info = {
                    'Image' : pokemon_info['sprites']['front_default'],
                    'id' : pokemon_info['id'],
                    'name' : pokemon_info['name'],
                    'height' : pokemon_info['height'],
                    'base_experience' : pokemon_info['base_experience'],
                    'weight' : pokemon_info['weight'],
                    'species' : pokemon_info['species']['name']

                }

                resolved = True

            elif res.status_code == 429:
                time.sleep(60)
            else:
                sleep_val = random.randint(1,10)
                time.sleep(sleep_val)
                
    except Exception as e:
        print(e)
        return info
    finally:
        return info
            



def get_all_pokemon(links_pokemon=None):
    
    
    list_pokemon = []
    for link in tqdm(links_pokemon):
        
        pokemon = get_pokemon(link)
        if pokemon != None:
            list_pokemon.append(pokemon)
        time.sleep(0.3)
        
            
    pd.set_option('display.max_colwidth', None)

    df_pokemon = pd.DataFrame(list_pokemon)
      
    return df_pokemon
    

def image_formatter(im):
    return f'<img src="{im}">'

def main_pokemon_run():
    links_pokemon = get_number_pokemon()

    df_pokemon = get_all_pokemon(links_pokemon=links_pokemon)
    
    df_pokemon.sort_values(['id'],inplace=True)
    return df_pokemon, HTML(df_pokemon.iloc[0:4].to_html(formatters={'Image': image_formatter}, escape=False))
    

df_pokemon, html = main_pokemon_run()
display(df_pokemon)
display(html)
    
    


100%|██████████| 964/964 [09:31<00:00,  1.69it/s]


Unnamed: 0,Image,id,name,height,base_experience,weight,species
0,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1.png,1,bulbasaur,7,64,69,bulbasaur
1,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/2.png,2,ivysaur,10,142,130,ivysaur
2,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/3.png,3,venusaur,20,236,1000,venusaur
3,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/4.png,4,charmander,6,62,85,charmander
4,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/5.png,5,charmeleon,11,142,190,charmeleon
...,...,...,...,...,...,...,...
959,,10153,araquanid-totem,31,159,2175,araquanid
960,,10154,togedemaru-totem,6,152,130,togedemaru
961,,10155,necrozma-dusk,38,306,4600,necrozma
962,,10156,necrozma-dawn,42,306,3500,necrozma


Unnamed: 0,Image,id,name,height,base_experience,weight,species
0,,1,bulbasaur,7,64,69,bulbasaur
1,,2,ivysaur,10,142,130,ivysaur
2,,3,venusaur,20,236,1000,venusaur
3,,4,charmander,6,62,85,charmander


### Results 1.69 iterations per seond

# With Multiprocessing


In [12]:
import requests as req
import timeit
import time
import pandas as pd
from IPython.display import Image, HTML
import random
from tqdm import tqdm
from ratelimit import limits, sleep_and_retry
from multiprocessing import Pool, Manager, cpu_count
from functools import partial


API_POKEMON = 'https://pokeapi.co/api/v2/pokemon/{pokemon}'

#  To see how it ran
# def infoDebugger(title):
#     print(title)
#     print('module name:', __name__)
#     if hasattr(os, 'getppid'):
#         print('parent process:', os.getppid())
#     print('process id:', os.getpid())


@sleep_and_retry
@limits(calls=100, period=60)
def call_api(url):
    response = req.get(url)
    
    if response.status_code == 404:
        return 'Not Found'
    if response.status_code != 200:
        raise Exception('API response: {}'.format(response.status_code))
    return response


# https://docs.python.org/2/library/multiprocessing.html

def get_number_pokemon():
    res = req.get(API_POKEMON.format(pokemon=''))
    number_pokemon = res.json()['count']
    res_url = call_api(API_POKEMON.format(pokemon='?offset=0&limit={limit}'.format(limit=str(number_pokemon))))
    pokemon_links_values = [link['url'] for link in res_url.json()['results']]
    return pokemon_links_values

def get_pokemon_multiprocess(listManager=None, links_pokemon=None, process=0):
#     print('Called Pokemon', process)
    link = links_pokemon[process]
    info = None
    resolved = False
#     print(link)
    
    try:
        while not resolved:

              
            res = None
            tooManyCalls = False
            
            try:
                res = call_api(link)
                if res == 'Not Found':
                    resolved = True
                    break
            except Exception as e:
                print(e)
                if e == 'too many calls':
                    tooManyCalls =True
                    
            if tooManyCalls:
                time.sleep(60)
                
            elif res.status_code < 300:

                pokemon_info = res.json()

                info = {
                    'Image' : pokemon_info['sprites']['front_default'],
                    'id' :  pokemon_info['id'],
                    'name' : pokemon_info['name'],
                    'height' : pokemon_info['height'],
                    'base_experience' : pokemon_info['base_experience'],
                    'weight' : pokemon_info['weight'],
                    'species' : pokemon_info['species']['name']

                }

                resolved = True
                
            elif res.status_code == 429:
                print(res.status_code)
                time.sleep(60)

            else:
                print(res.status_code)
                sleep_val = random.randint(1,10)
                time.sleep(sleep_val)
                
    except Exception as e:
        print(e)
    finally:
        if info != None:
            listManager.append(info)
            time.sleep(0.5)
            return


def image_formatter(im):
    return f'<img src="{im}">'


def main_pokemon_run_multiprocessing():
    ## cannot be 0, so max(NUMBER,1) solves this
    workers = max(cpu_count()-1,1)

    ## create the pool
    manager = Manager()
    
    ## Need a manager to help get the values async, the values will be updated after join
    listManager = manager.list()
    pool = Pool(workers)
    try:

        links_pokemon = get_number_pokemon()
        part_get_clean_pokemon = partial(get_pokemon_multiprocess, listManager, links_pokemon)

#         could do this the below is visualize the rate success /etc
#         pool.imap(part_get_clean_pokemon, list(range(0, len(links_pokemon))))
#         using tqdm to see progress imap works
        for _ in tqdm(pool.imap(part_get_clean_pokemon, list(range(0, len(links_pokemon)))), total=len(links_pokemon)):
            pass
        pool.close()
        pool.join()
    finally:
        pool.close()
        pool.join()
        
    pokemonList = list(listManager)
    
    df_pokemon = pd.DataFrame(pokemonList)
    df_pokemon.sort_values(['id'],inplace=True)
    return df_pokemon, HTML(df_pokemon.iloc[0:4].to_html(formatters={'Image': image_formatter}, escape=False))
    

df_pokemon, html = main_pokemon_run_multiprocessing()
display(df_pokemon)
display(html)
    
    
        
    


100%|██████████| 964/964 [01:04<00:00, 14.97it/s]


Unnamed: 0,Image,id,name,height,base_experience,weight,species
0,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1.png,1,bulbasaur,7,64,69,bulbasaur
2,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/2.png,2,ivysaur,10,142,130,ivysaur
1,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/3.png,3,venusaur,20,236,1000,venusaur
6,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/4.png,4,charmander,6,62,85,charmander
5,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/5.png,5,charmeleon,11,142,190,charmeleon
...,...,...,...,...,...,...,...
958,,10153,araquanid-totem,31,159,2175,araquanid
959,,10154,togedemaru-totem,6,152,130,togedemaru
961,,10155,necrozma-dusk,38,306,4600,necrozma
962,,10156,necrozma-dawn,42,306,3500,necrozma


Unnamed: 0,Image,id,name,height,base_experience,weight,species
0,,1,bulbasaur,7,64,69,bulbasaur
2,,2,ivysaur,10,142,130,ivysaur
1,,3,venusaur,20,236,1000,venusaur
6,,4,charmander,6,62,85,charmander


### Results 14.97 iterations per seond