![Callysto.ca Banner](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-top.jpg?raw=true)

<a href="https://hub.callysto.ca/jupyter/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2Fcallysto%2Fdata-science-and-artificial-intelligence&branch=main&subPath=06b-getting-data.ipynb&depth=1" target="_parent"><img src="https://raw.githubusercontent.com/callysto/curriculum-notebooks/master/open-in-callysto-button.svg?sanitize=true" width="123" height="24" alt="Open in Callysto"/></a>

# Retrieving Pokemon Data

We are going to query the [PokéAPI](https://pokeapi.co/) site to get a large Pokémon data set.

In [None]:
# query the PokeAPI for data about all Pokemon
import requests
import pandas as pd
r = requests.get('https://pokeapi.co/api/v2/pokemon?limit=100000')
df = pd.DataFrame(r.json()['results'])
df
abilities = []
base_experience = []
forms = []
#game_indices = []
height = []
held_items = []
id = []
is_default = []
location_area_encounters = []
moves = []
#name = []
order = []
species = []
sprites = []
stats = []
types = []
weight = []
#for i in range(5):
for i in range(len(df)):
    print(df['name'][i])
    try:
        r = requests.get(df['url'][i])
        abilities.append(r.json()['abilities'])
        base_experience.append(r.json()['base_experience'])
        forms.append(r.json()['forms'])
        #game_indices.append(r.json()['game_indices'])
        height.append(r.json()['height'])
        held_items.append(r.json()['held_items'])
        id.append(r.json()['id'])
        is_default.append(r.json()['is_default'])
        location_area_encounters.append(r.json()['location_area_encounters'])
        moves.append(r.json()['moves'])
        #name.append(r.json()['name'])
        order.append(r.json()['order'])
        species.append(r.json()['species'])
        sprites.append(r.json()['sprites'])
        stats.append(r.json()['stats'])
        types.append(r.json()['types'])
        weight.append(r.json()['weight'])
    except:
        abilities.append(None)
        base_experience.append(None)
        forms.append(None)
        #game_indices.append(None)
        height.append(None)
        held_items.append(None)
        id.append(None)
        is_default.append(None)
        location_area_encounters.append(None)
        moves.append(None)
        #name.append(None)
        order.append(None)
        species.append(None)
        sprites.append(None)
        stats.append(None)
        types.append(None)
        weight.append(None)
df['id'] = id
df['height'] = height
df['weight'] = weight
df['base_experience'] = base_experience
df['abilities'] = abilities
df['forms'] = forms
df['species'] = species
df['is_default'] = is_default
#df['game_indices'] = game_indices
#df['held_items'] = held_items
df['location_area_encounters'] = location_area_encounters
#df['moves'] = moves
#df['name'] = name
df['order'] = order
#df['sprites'] = sprites
df['stats'] = stats
df['types'] = types
df = df.drop('url', axis=1) # drop url column
df

In [None]:
# split the stats column into multiple columns
df_stats = pd.DataFrame(df['stats'].tolist())
df_stats.columns = ['hp','attack','defense','special-attack','special-defense','speed']
for column in df_stats.columns:
    df_stats[column] = df_stats[column].apply(lambda x: x['base_stat'] if x is not None else None)
# join with the main dataframe
df = df.join(df_stats)
df = df.drop('stats', axis=1)
df = df.drop('location_area_encounters', axis=1)

# split the abilities to three columns
abilities_lists = [[],[],[]]
for i in range(len(df)):
    for n in range(3):
        try:
            ability = df['abilities'][i][n]['ability']['name']
        except:
            ability = None
        abilities_lists[n].append(ability)
df['ability1'] = abilities_lists[0]
df['ability2'] = abilities_lists[1]
df['ability3'] = abilities_lists[2]
df = df.drop('abilities', axis=1)

# species column is a dictionary, so we need to extract the name
df['species'] = df['species'].apply(lambda x: x['name'] if x is not None else None)

# convert the forms column to a string
df['forms'] = df['forms'].apply(lambda x: x[0]['name'] if x is not None else None)

# split the types to two columns
types_lists = [[],[]]
for i in range(len(df)):
    for n in range(2):
        try:
            type = df['types'][i][n]['type']['name']
        except:
            type = None
        types_lists[n].append(type)
df['type1'] = types_lists[0]
df['type2'] = types_lists[1]
df = df.drop('types', axis=1)

# convert the moves column to a stringified list
#df['moves'] = df['moves'].apply(lambda x: [move['move']['name'] for move in x] if x is not None else None)

In [None]:
# reorder the columns
df = df[['id','name','base_experience','height','weight','speed','hp','attack','defense','special-attack','special-defense','forms','species','is_default','order','type1','type2','ability1','ability2','ability3']]
df.head(3)

In [None]:
df.to_csv('data/pokemon.csv', index=False)

In [None]:
import pandas as pd
df = pd.read_csv('data/pokemon.csv')
df

## Display Images

In [None]:
from IPython.display import Image
def display_pokemon(n):
    image_url = 'https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/other/official-artwork/'+str(n+1)+'.png'
    display(Image(url=image_url, width=200, height=200))

for n in range(10):
    display_pokemon(n)
    print(df['name'][n])

## Species Characteristics

https://pokeapi.co/api/v2/pokemon-species/{id or name}/

In [None]:
import requests
def get_species_characteristics(species):
    r = requests.get('https://pokeapi.co/api/v2/pokemon-species/'+species)
    resp = r.json()
    sc = {}
    sc['base_happiness'] = resp['base_happiness']
    sc['capture_rate'] = resp['capture_rate']
    sc['color'] = resp['color']['name']
    sc['egg_groups'] = [egg_group['name'] for egg_group in resp['egg_groups']]
    sc['evolution_chain'] = resp['evolution_chain']['url']
    sc['evolves_from_species'] = resp['evolves_from_species']['name'] if resp['evolves_from_species'] is not None else None
    #sc['flavor_text_entries'] = [flavor_text['flavor_text'] for flavor_text in resp['flavor_text_entries'] if flavor_text['language']['name'] == 'en']
    sc['form_descriptions'] = [form_description['description'] for form_description in resp['form_descriptions'] if form_description['language']['name'] == 'en']
    sc['forms_switchable'] = resp['forms_switchable']
    sc['gender_rate'] = resp['gender_rate']
    sc['generation'] = resp['generation']['name']
    sc['growth_rate'] = resp['growth_rate']['name']
    sc['habitat'] = resp['habitat']['name'] if resp['habitat'] is not None else None
    sc['has_gender_differences'] = resp['has_gender_differences']
    sc['hatch_counter'] = resp['hatch_counter']
    sc['is_baby'] = resp['is_baby']
    sc['is_legendary'] = resp['is_legendary']
    sc['is_mythical'] = resp['is_mythical']
    sc['name'] = resp['name']
    sc['names'] = [name['name'] for name in resp['names'] if name['language']['name'] == 'en']
    sc['order'] = resp['order']
    sc['pal_park_encounters'] = [pal_park_encounter['area']['name'] for pal_park_encounter in resp['pal_park_encounters']]
    sc['shape'] = resp['shape']['name'] if resp['shape'] is not None else None
    sc['varieties'] = [variety['pokemon']['name'] for variety in resp['varieties']]
    return sc

species_characteristics = {}
for species in df['species'].unique():
    print(species)
    species_characteristics[species] = get_species_characteristics(species)
df_sc = pd.DataFrame(species_characteristics).T
df_sc.to_pickle('data/pokemon_species_characteristics.pkl')
df_sc

In [2]:
import pandas as pd
df_sc = pd.read_pickle('data/pokemon_species_characteristics.pkl')
df_sc

Unnamed: 0,base_happiness,capture_rate,color,egg_groups,evolution_chain,evolves_from_species,form_descriptions,forms_switchable,gender_rate,generation,...,hatch_counter,is_baby,is_legendary,is_mythical,name,names,order,pal_park_encounters,shape,varieties
bulbasaur,50,45,green,"[monster, plant]",https://pokeapi.co/api/v2/evolution-chain/1/,,[],False,1,generation-i,...,20,False,False,False,bulbasaur,[Bulbasaur],1,[field],quadruped,[bulbasaur]
ivysaur,50,45,green,"[monster, plant]",https://pokeapi.co/api/v2/evolution-chain/1/,bulbasaur,[],False,1,generation-i,...,20,False,False,False,ivysaur,[Ivysaur],2,[field],quadruped,[ivysaur]
venusaur,50,45,green,"[monster, plant]",https://pokeapi.co/api/v2/evolution-chain/1/,ivysaur,[],True,1,generation-i,...,20,False,False,False,venusaur,[Venusaur],3,[field],quadruped,"[venusaur, venusaur-mega, venusaur-gmax]"
charmander,50,45,red,"[monster, dragon]",https://pokeapi.co/api/v2/evolution-chain/2/,,[],False,1,generation-i,...,20,False,False,False,charmander,[Charmander],4,[field],upright,[charmander]
charmeleon,50,45,red,"[monster, dragon]",https://pokeapi.co/api/v2/evolution-chain/2/,charmander,[],False,1,generation-i,...,20,False,False,False,charmeleon,[Charmeleon],5,[field],upright,[charmeleon]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
iron-valiant,0,10,white,[no-eggs],https://pokeapi.co/api/v2/evolution-chain/534/,,[],False,-1,generation-ix,...,50,False,False,False,iron-valiant,[Iron Valiant],993,[],,[iron-valiant]
koraidon,0,3,red,[no-eggs],https://pokeapi.co/api/v2/evolution-chain/535/,,[],False,-1,generation-ix,...,50,False,True,False,koraidon,[Koraidon],998,[],,"[koraidon, koraidon-limited-build, koraidon-sp..."
miraidon,0,3,blue,[no-eggs],https://pokeapi.co/api/v2/evolution-chain/536/,,[],False,-1,generation-ix,...,50,False,True,False,miraidon,[Miraidon],999,[],,"[miraidon, miraidon-low-power-mode, miraidon-d..."
walking-wake,0,5,blue,[no-eggs],https://pokeapi.co/api/v2/evolution-chain/537/,,[],False,-1,generation-ix,...,50,False,False,False,walking-wake,[Walking Wake],1011,[],,[walking-wake]


## Types

In [None]:
# get Pokemon types from the PokeAPI
import requests
import pandas as pd
r = requests.get('https://pokeapi.co/api/v2/type?limit=100000')
types = pd.DataFrame(r.json()['results'])

In [None]:
damage_relations = []
game_indices = []
generation = []
move_damage_class = []
moves = []
pokemon = []
name = []
names = []

for i in range(len(types)):
    print(types['name'][i], types['url'][i])
    r = requests.get(types['url'][i])
    resp = r.json()
    damage_relations.append(resp['damage_relations'])
    game_indices.append(resp['game_indices'])
    generation.append(resp['generation'])
    move_damage_class.append(resp['move_damage_class'])
    moves.append(resp['moves'])
    pokemon.append(resp['pokemon'])
    name.append(resp['name'])
    names.append(resp['names'])

    #for key in r.json():
    #    print(key)
    #    print(r.json()[key])
        #try:
        #    types[key][i] = r.json()[key]
        #except:
        #    types[key][i] = None
    '''
    types['damage_relations'][i] = r.json()['damage_relations']
    types['game_indices'][i] = r.json()['game_indices']
    types['generation'][i] = r.json()['generation']
    types['move_damage_class'][i] = r.json()['move_damage_class']
    types['moves'][i] = r.json()['moves']
    types['pokemon'][i] = r.json()['pokemon']
    types['name'][i] = r.json()['name']
    types['names'][i] = r.json()['names']
    '''
#types = types.drop('url', axis=1) # drop url column

types['damage_relations'] = damage_relations
types['game_indices'] = game_indices
types['generation'] = generation
types['move_damage_class'] = move_damage_class
types['moves'] = moves
types['pokemon'] = pokemon
types['name'] = name
types['names'] = names

# convert index to a column
types = types.reset_index().rename(columns={'index': 'type'})

types

In [None]:
import pandas as pd
types = pd.read_pickle('data/pokemon_species_characteristics.pkl')

In [None]:
# split the egg_groups column into two columns
types['egg_group1'] = types['egg_groups'].apply(lambda x: x[0] if len(x) > 0 else None)
types['egg_group2'] = types['egg_groups'].apply(lambda x: x[1] if len(x) > 1 else None)
types = types.drop('egg_groups', axis=1)

In [None]:
# drop evolution_chain, form_descriptions, and names columns
types = types.drop(['evolution_chain', 'form_descriptions', 'names'], axis=1)

In [None]:
# remove 'generation-' from the generation column
types['generation'] = types['generation'].apply(lambda x: x.replace('generation-', ''))
# replace roman numerals with integers
types['generation'] = types['generation'].apply(lambda x: x.replace('ix', '9'))
types['generation'] = types['generation'].apply(lambda x: x.replace('viii', '8'))
types['generation'] = types['generation'].apply(lambda x: x.replace('vii', '7'))
types['generation'] = types['generation'].apply(lambda x: x.replace('vi', '6'))
types['generation'] = types['generation'].apply(lambda x: x.replace('iv', '4'))
types['generation'] = types['generation'].apply(lambda x: x.replace('iii', '3'))
types['generation'] = types['generation'].apply(lambda x: x.replace('ii', '2'))
types['generation'] = types['generation'].apply(lambda x: x.replace('i', '1'))
types['generation'] = types['generation'].apply(lambda x: x.replace('v', '5'))
types['generation'] = types['generation'].astype(int)
types['generation'].unique()

In [None]:
# calculate female and male percentages
types['%_female'] = types['gender_rate'].apply(lambda x: 0 if x == -1 else float(12.5 * x))
types['% male'] = types['gender_rate'].apply(lambda x: 0 if x == -1 else 100 - float(12.5 * x))
types = types.drop('gender_rate', axis=1)

In [None]:
# convert 'pal_park_encounters' column to a string
types['pal_park_encounters'] = types['pal_park_encounters'].apply(lambda x: ', '.join(x))

In [None]:
# reorder columns
types = types[['type','name','order','base_happiness','capture_rate','color','evolves_from_species','forms_switchable','generation','growth_rate','habitat','shape','%_female','% male','has_gender_differences','hatch_counter','egg_group1','egg_group2','is_baby','is_legendary','is_mythical','pal_park_encounters','varieties']]

In [None]:
types.to_csv('data/pokemon_types.csv', index=False)

In [None]:
types = pd.read_csv('data/pokemon_types.csv')

In [None]:
types.to_pickle('data/pokemon_types.pkl')

In [None]:
for key in r.json():
    print(key)
    print(r.json()[key])
    print('------')

In [None]:
r.json()['damage_relations']

The [next notebook](07-data-logging.ipynb) will introduce you to recording and using your own data.

[![Callysto.ca License](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-bottom.jpg?raw=true)](https://github.com/callysto/curriculum-notebooks/blob/master/LICENSE.md)