In [2]:
import requests
import time
import pandas as pd

In [3]:
# API endpoint
url_detail = "https://pokeapi.co/api/v2/pokemon/"
# Create empty list
pokemon_detail = []

for i in range(40):
    new_url_detail = url_detail + str(i+1)
    # Get request and transform to Json
    response = requests.get(new_url_detail).json()
    
    data_detail = [
            response['id'],
            response['name'],
            response['height'],
            response['weight'],
            response['types']
    ]
    
    pokemon_detail.append(data_detail)  # append data in empty list
    time.sleep(2)
    
#print(pokemon_detail)

In [4]:
url_species = "https://pokeapi.co/api/v2/pokemon-species/"
# Create empty list
pokemon_species = []

for i in range(40):
    new_url_species = url_species + str(i+1)
    # Get request and transform to Json
    response = requests.get(new_url_species).json()
    
    data_species = [
            response["name"],
            response["capture_rate"],
            response["color"],
            response["habitat"]
    ]
    
    pokemon_species.append(data_species)
    time.sleep(2)
    
#print(pokemon_species)

# Create DataFrame

In [5]:
df_detail = pd.DataFrame(pokemon_detail, columns = ["id", "name", "height", "weight", "types"])

df_detail.head()

Unnamed: 0,id,name,height,weight,types
0,1,bulbasaur,7,69,"[{'slot': 1, 'type': {'name': 'grass', 'url': ..."
1,2,ivysaur,10,130,"[{'slot': 1, 'type': {'name': 'grass', 'url': ..."
2,3,venusaur,20,1000,"[{'slot': 1, 'type': {'name': 'grass', 'url': ..."
3,4,charmander,6,85,"[{'slot': 1, 'type': {'name': 'fire', 'url': '..."
4,5,charmeleon,11,190,"[{'slot': 1, 'type': {'name': 'fire', 'url': '..."


In [6]:
df_species = pd.DataFrame(pokemon_species, columns = ["name", "capture_rate", "color", "habitat"])

df_species.head()

Unnamed: 0,name,capture_rate,color,habitat
0,bulbasaur,45,"{'name': 'green', 'url': 'https://pokeapi.co/a...","{'name': 'grassland', 'url': 'https://pokeapi...."
1,ivysaur,45,"{'name': 'green', 'url': 'https://pokeapi.co/a...","{'name': 'grassland', 'url': 'https://pokeapi...."
2,venusaur,45,"{'name': 'green', 'url': 'https://pokeapi.co/a...","{'name': 'grassland', 'url': 'https://pokeapi...."
3,charmander,45,"{'name': 'red', 'url': 'https://pokeapi.co/api...","{'name': 'mountain', 'url': 'https://pokeapi.c..."
4,charmeleon,45,"{'name': 'red', 'url': 'https://pokeapi.co/api...","{'name': 'mountain', 'url': 'https://pokeapi.c..."


# Data Cleansing

#### Clean and Extract Pokemon Type where the format in the 'types' colmn is something like [{'slot': 1, 'type': {'name': 'grass', 'url': ...], which is dictionary with outer key-value pair.  e.g., filter only 'grass'

In [7]:
# Extract values associated with the key 'name' in the 'types' column
df_detail['types'] = df_detail['types'].apply(lambda x: [item['type']['name'] for item in x])

df_detail.head()

Unnamed: 0,id,name,height,weight,types
0,1,bulbasaur,7,69,"[grass, poison]"
1,2,ivysaur,10,130,"[grass, poison]"
2,3,venusaur,20,1000,"[grass, poison]"
3,4,charmander,6,85,[fire]
4,5,charmeleon,11,190,[fire]


#### Clean and Extract Pokemon 'color' and 'habitat' as above process. In this case, the structure is a simple dictionary without additional nesting

In [8]:
# Extract words between 'name': ' and ',
df_species['color'] = df_species['color'].astype(str).str.extract(r"'name': '(.*?)',")
df_species['habitat'] = df_species['habitat'].astype(str).str.extract(r"'name': '(.*?)',")

df_species.head()

Unnamed: 0,name,capture_rate,color,habitat
0,bulbasaur,45,green,grassland
1,ivysaur,45,green,grassland
2,venusaur,45,green,grassland
3,charmander,45,red,mountain
4,charmeleon,45,red,mountain


# Join DataFrame

In [9]:
full_pokemon = pd.merge(df_detail, df_species, how="inner", on ="name")
full_pokemon.head()

Unnamed: 0,id,name,height,weight,types,capture_rate,color,habitat
0,1,bulbasaur,7,69,"[grass, poison]",45,green,grassland
1,2,ivysaur,10,130,"[grass, poison]",45,green,grassland
2,3,venusaur,20,1000,"[grass, poison]",45,green,grassland
3,4,charmander,6,85,[fire],45,red,mountain
4,5,charmeleon,11,190,[fire],45,red,mountain


In [10]:
full_pokemon.tail()

Unnamed: 0,id,name,height,weight,types,capture_rate,color,habitat
35,36,clefable,13,400,[fairy],25,pink,mountain
36,37,vulpix,6,99,[fire],190,brown,grassland
37,38,ninetales,11,199,[fire],75,yellow,grassland
38,39,jigglypuff,5,55,"[normal, fairy]",170,pink,grassland
39,40,wigglytuff,10,120,"[normal, fairy]",50,pink,grassland


# Finding Insights

#### Find the number of pokemon separated by habitat

In [11]:
count_by_habitat = full_pokemon.groupby('habitat')['name'].count().reset_index(name='number of pokemon')
count_by_habitat

Unnamed: 0,habitat,number of pokemon
0,forest,11
1,grassland,17
2,mountain,5
3,rough-terrain,4
4,waters-edge,3


#### Find the number of pokemon separated by color

In [12]:
color = full_pokemon.groupby('color')['name'].agg(['count', 'unique']).reset_index()
color.columns = ['color', 'number of pokemon', 'pokemon list']

In [13]:
color

Unnamed: 0,color,number of pokemon,pokemon list
0,blue,6,"[squirtle, wartortle, blastoise, nidoran-f, ni..."
1,brown,8,"[weedle, pidgey, pidgeotto, pidgeot, raticate,..."
2,green,5,"[bulbasaur, ivysaur, venusaur, caterpie, metapod]"
3,pink,4,"[clefairy, clefable, jigglypuff, wigglytuff]"
4,purple,6,"[rattata, ekans, arbok, nidoran-m, nidorino, n..."
5,red,3,"[charmander, charmeleon, charizard]"
6,white,1,[butterfree]
7,yellow,7,"[kakuna, beedrill, pikachu, raichu, sandshrew,..."


In [14]:
full_pokemon

Unnamed: 0,id,name,height,weight,types,capture_rate,color,habitat
0,1,bulbasaur,7,69,"[grass, poison]",45,green,grassland
1,2,ivysaur,10,130,"[grass, poison]",45,green,grassland
2,3,venusaur,20,1000,"[grass, poison]",45,green,grassland
3,4,charmander,6,85,[fire],45,red,mountain
4,5,charmeleon,11,190,[fire],45,red,mountain
5,6,charizard,17,905,"[fire, flying]",45,red,mountain
6,7,squirtle,5,90,[water],45,blue,waters-edge
7,8,wartortle,10,225,[water],45,blue,waters-edge
8,9,blastoise,16,855,[water],45,blue,waters-edge
9,10,caterpie,3,29,[bug],255,green,forest


#### Find Basic Stats
- weight of Pokemon (hectograms)
- height of Pokemon (decimetres)

In [15]:
full_pokemon[['height', 'weight']].describe().round(2)

Unnamed: 0,height,weight
count,40.0,40.0
mean,9.57,249.35
std,6.16,253.92
min,3.0,18.0
25%,5.75,73.75
50%,8.5,157.5
75%,11.25,305.0
max,35.0,1000.0


#### Dealing with capture_rate column
The base capture rate ranges from 1 to 255, with higher values indicating an increased likelihood of successful capture.

In [16]:
# Create an empty list to store the catch levels
catch_level = []

# Iterate over the values in the "capture_rate" column
for rate in full_pokemon['capture_rate']:
    try:
        rate = float(rate)
        if rate <= 85:
            catch_level.append('hard')
        elif 85 < rate <= 170:
            catch_level.append('normal')
        elif 170 < rate <= 255:
            catch_level.append('easy')
    except ValueError:
        catch_level.append('unknown')

# Assign the catch_level list as a new column in the DataFrame
full_pokemon['catch_level'] = catch_level

# Print the resulting DataFrame as a table
full_pokemon.head()

Unnamed: 0,id,name,height,weight,types,capture_rate,color,habitat,catch_level
0,1,bulbasaur,7,69,"[grass, poison]",45,green,grassland,hard
1,2,ivysaur,10,130,"[grass, poison]",45,green,grassland,hard
2,3,venusaur,20,1000,"[grass, poison]",45,green,grassland,hard
3,4,charmander,6,85,[fire],45,red,mountain,hard
4,5,charmeleon,11,190,[fire],45,red,mountain,hard


#### Find the number of pokemon separated by catch_level

In [17]:
catchlevel = full_pokemon.groupby('catch_level')['name'].agg(['count', 'unique']).reset_index()
catchlevel.columns = ['catch_level', 'number of pokemon', 'pokemon list']

In [18]:
catchlevel

Unnamed: 0,catch_level,number of pokemon,pokemon list
0,easy,11,"[caterpie, weedle, pidgey, rattata, spearow, e..."
1,hard,18,"[bulbasaur, ivysaur, venusaur, charmander, cha..."
2,normal,11,"[metapod, kakuna, pidgeotto, raticate, fearow,..."


## Save File

In [21]:
#full_pokemon.to_csv("Pokemon.csv", index=False)