In [1]:
import requests
import json
import pandas as pd
import numpy as np
from pprint import pprint as pp

In [2]:
items = pd.read_json("API_datasets/item.json")

moves = pd.read_json("API_datasets/moves.json")

species = pd.read_json("API_datasets/species.json")

abilities = pd.read_json("API_datasets/abilities.json")

types = pd.read_json("API_datasets/types.json")

In [3]:
items_df = pd.DataFrame(items, columns=["attributes", "category", "effect_entries", "id", "name"])
moves_df = pd.DataFrame(moves, columns=["accuracy", "damage_class", "generation", "id", "learned_by_pokemon", "name", 
                                             "power", "pp", "type"])
species_df = pd.DataFrame(species, columns=["evolves_from_species", "generation", "has_gender_differences", "id", 
                                                   "is_baby", "is_legendary", "is_mythical", "name", "varieties"])
ability_df = pd.DataFrame(abilities, columns=["effect_changes", "effect_entries", "generation", "id", "is_main_series", 
                                                   "name", "pokemon"])
type_df = pd.DataFrame(types, columns=["damage_relations", "game_indices", "generation", "id", "name"])

In [4]:
items_df = items_df[["id", "name", "attributes", "category", "effect_entries"]]
items_df.columns = ["ID", "Name", "Attributes", "Category", "Effect"]

moves_df = moves_df[["id", "name", "type", "power", "accuracy", "pp", "damage_class", "generation", "learned_by_pokemon"]]
moves_df.columns = ["ID", "Name", "Type", "Power", "Accuracy", "PP", "Damage_class", "Introducted_in", "Learned_by"]

species_df = species_df[["id", "name", "varieties", "generation", "evolves_from_species", "has_gender_differences",  "is_baby", 
                         "is_legendary", "is_mythical"]]
species_df.columns = ["ID", "Name", "Varieties", "Generation", "Evolves_from", "Has_gender_diff", "Is_baby", "Is_legendary",
                     "Is_mythical"]
ability_df = ability_df[["id", "name", "generation", "is_main_series", "effect_changes", "effect_entries", "pokemon"]]
ability_df.columns = ["ID", "Name", "Generation", "Is_Main_Series", "Effect_changes", "Effect_entries", "Pokemon"]
type_df = type_df[["id", "game_indices", "name", "generation", "damage_relations"]]
type_df.columns = ["ID", "Game_index", "Name", "Introducted_in", "Damage_relations"]

### Items

In [10]:
#Capitalize names
items_df.Name = items_df.Name.str.capitalize()
items_df.head()

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Catches a wild...
1,2,Ultra-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...
2,3,Great-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...
3,4,Poke-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...
4,5,Safari-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...


In [14]:
#Ci sono degli ID mancanti nella colonna ID (per via di quelli skippati dalla API nell'ottenimento dei dati).
#Vado a resettare la colonna, da 1 a 1607 come dovrebbe essere
for i in range(0, items_df.shape[0]):
    items_df.loc[i, 'ID'] = i+1
    
items_df.tail()

Unnamed: 0,ID,Name,Attributes,Category,Effect
1602,1603,Shaderoot-carrot,[],"{'name': 'plot-advancement', 'url': 'https://p...",[]
1603,1604,Dynite-ore,[],"{'name': 'collectibles', 'url': 'https://pokea...",[]
1604,1605,Carrot-seeds,[],"{'name': 'plot-advancement', 'url': 'https://p...",[]
1605,1606,Ability-patch,[],"{'name': 'vitamins', 'url': 'https://pokeapi.c...",[]
1606,1607,Reins-of-unity,[],"{'name': 'plot-advancement', 'url': 'https://p...",[]


In [15]:
#I mantain just the short effect in the Effect attribute
for i in (items_df['ID'].values):
    try:
        items_df.loc[i-1, 'Effect'] = items_df[items_df['ID'] == i]['Effect'][i-1][0]['short_effect']
    except IndexError:
        items_df.loc[i-1, 'Effect'] = np.NaN

In [16]:
items_df.tail()

Unnamed: 0,ID,Name,Attributes,Category,Effect
1602,1603,Shaderoot-carrot,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1603,1604,Dynite-ore,[],"{'name': 'collectibles', 'url': 'https://pokea...",
1604,1605,Carrot-seeds,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1605,1606,Ability-patch,[],"{'name': 'vitamins', 'url': 'https://pokeapi.c...",
1606,1607,Reins-of-unity,[],"{'name': 'plot-advancement', 'url': 'https://p...",


In [17]:
items_df[items_df['Effect'].notnull()]

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Catches a wild Pokémon every time.
1,2,Ultra-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon. Success rate i...
2,3,Great-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon. Success rate i...
3,4,Poke-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon.
4,5,Safari-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon in the Great Mar...
...,...,...,...,...,...
949,950,Roto-stealth,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-stealth
950,951,Roto-hp-restore,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-hp-restore
951,952,Roto-pp-restore,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-pp-restore
952,953,Roto-boost,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-boost


In [18]:
items_df[items_df['Effect'].isnull()]

Unnamed: 0,ID,Name,Attributes,Category,Effect
954,955,Autograph,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
955,956,Pokemon-box,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
956,957,Medicine-pocket,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
957,958,Candy-jar,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
958,959,Power-up-pocket,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
...,...,...,...,...,...
1602,1603,Shaderoot-carrot,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1603,1604,Dynite-ore,[],"{'name': 'collectibles', 'url': 'https://pokea...",
1604,1605,Carrot-seeds,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1605,1606,Ability-patch,[],"{'name': 'vitamins', 'url': 'https://pokeapi.c...",


I primi 954 strumenti hanno l'effetto, i successivi no.

In [19]:
#Stessa cosa per la categoria, mantengo solo il nome
for i in (items_df['ID'].values):
        items_df.loc[i-1, 'Category'] = items_df[items_df['ID'] == i]['Category'][i-1]['name']

In [20]:
items_df.Category = items_df.Category.str.capitalize()
items_df.head()

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Catches a wild Pokémon every time.
1,2,Ultra-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
2,3,Great-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
3,4,Poke-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon.
4,5,Safari-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon in the Great Mar...


In [21]:
for i in (items_df['ID'].values):
    temp = []
    for j in range(0, len(items_df[items_df['ID'] == i]['Attributes'][i-1])):
        temp.append(items_df[items_df['ID'] == i]['Attributes'][i-1][j]['name'])
    #In questo caso uso .at perchè .loc da problemi con l'assegnamento delle liste
    items_df.at[i-1, 'Attributes'] = temp

In [22]:
items_df.head()

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Catches a wild Pokémon every time.
1,2,Ultra-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
2,3,Great-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
3,4,Poke-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon.
4,5,Safari-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon in the Great Mar...


In [23]:
#Salvataggio sia in .csv che in .json
items_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\items.json.gz", orient='records', lines=True, compression='gzip')
items_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\items.csv")

### Moves

In [28]:
moves_df.Name = moves_df.Name.str.capitalize()
moves_df.tail()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
839,10014,Shadow-hold,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
840,10015,Shadow-mist,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,100.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
841,10016,Shadow-panic,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,90.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
842,10017,Shadow-shed,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
843,10018,Shadow-sky,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]


In [29]:
#Ci sono degli ID mancanti nella colonna ID (per via di quelli skippati dalla API nell'ottenimento dei dati)
for i in range(0, moves_df.shape[0]):
    moves_df.loc[i, 'ID'] = i+1
    
moves_df.tail()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
839,840,Shadow-hold,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
840,841,Shadow-mist,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,100.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
841,842,Shadow-panic,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,90.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
842,843,Shadow-shed,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
843,844,Shadow-sky,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]


In [30]:
#Extract just the type from 'Type' lists, damage class from 'Damage_class' lists and generation from 'Introducted_in' lists
for i in (moves_df['ID'].values):
    moves_df.loc[i-1, 'Type'] = moves_df[moves_df['ID'] == i]['Type'][i-1]['name']
    moves_df.loc[i-1, 'Damage_class'] = moves_df[moves_df['ID'] == i]['Damage_class'][i-1]['name']
    moves_df.loc[i-1, 'Introducted_in'] = moves_df[moves_df['ID'] == i]['Introducted_in'][i-1]['name']

In [31]:
moves_df.Type = moves_df.Type.str.capitalize()
moves_df.Damage_class = moves_df.Damage_class.str.capitalize()
moves_df.head()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
0,1,Pound,Normal,40.0,100.0,35.0,Physical,generation-i,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
1,2,Karate-chop,Fighting,50.0,100.0,25.0,Physical,generation-i,"[{'name': 'mankey', 'url': 'https://pokeapi.co..."
2,3,Double-slap,Normal,15.0,85.0,10.0,Physical,generation-i,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
3,4,Comet-punch,Normal,18.0,85.0,15.0,Physical,generation-i,"[{'name': 'hitmonchan', 'url': 'https://pokeap..."
4,5,Mega-punch,Normal,80.0,85.0,20.0,Physical,generation-i,"[{'name': 'charmander', 'url': 'https://pokeap..."


In [32]:
for index, row in moves_df.iterrows():
    if row['Introducted_in'] == 'generation-i':
        moves_df.loc[index,'Introducted_in'] = '1'
    elif row['Introducted_in'] == 'generation-ii':
        moves_df.loc[index,'Introducted_in'] = '2'
    elif row['Introducted_in'] == 'generation-iii':
        moves_df.loc[index,'Introducted_in'] = '3'
    elif row['Introducted_in'] == 'generation-iv':
        moves_df.loc[index,'Introducted_in'] = '4'
    elif row['Introducted_in'] == 'generation-v':
        moves_df.loc[index,'Introducted_in'] = '5'
    elif row['Introducted_in'] == 'generation-vi':
        moves_df.loc[index,'Introducted_in'] = '6'
    elif row['Introducted_in'] == 'generation-vii':
        moves_df.loc[index,'Introducted_in'] = '7'
    elif row['Introducted_in'] == 'generation-viii':
        moves_df.loc[index,'Introducted_in'] = '8'

In [33]:
moves_df.head()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
0,1,Pound,Normal,40.0,100.0,35.0,Physical,1,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
1,2,Karate-chop,Fighting,50.0,100.0,25.0,Physical,1,"[{'name': 'mankey', 'url': 'https://pokeapi.co..."
2,3,Double-slap,Normal,15.0,85.0,10.0,Physical,1,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
3,4,Comet-punch,Normal,18.0,85.0,15.0,Physical,1,"[{'name': 'hitmonchan', 'url': 'https://pokeap..."
4,5,Mega-punch,Normal,80.0,85.0,20.0,Physical,1,"[{'name': 'charmander', 'url': 'https://pokeap..."


In [34]:
moves_df.tail()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
839,840,Shadow-hold,Shadow,,,,Status,3,[]
840,841,Shadow-mist,Shadow,,100.0,,Status,3,[]
841,842,Shadow-panic,Shadow,,90.0,,Status,3,[]
842,843,Shadow-shed,Shadow,,,,Status,3,[]
843,844,Shadow-sky,Shadow,,,,Status,3,[]


In [35]:
#Salvataggio sia in .csv che in .json
moves_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\moves.json.gz", orient='records', lines=True, compression='gzip')
moves_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\moves.csv")

### Species

In [53]:
#Questo dataframe contiene una riga per ciascuna specie, nella colonna 'Varieties' sono presenti le varie forme alternative
#Tengo la colonna 'Varieties' così com'è, la considero una relazione
species_df.Name = species_df.Name.str.capitalize()
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Is_baby,Is_legendary,Is_mythical
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...","{'name': 'generation-i', 'url': 'https://pokea...",,False,False,False,False
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...","{'name': 'generation-i', 'url': 'https://pokea...","{'name': 'bulbasaur', 'url': 'https://pokeapi....",False,False,False,False
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...","{'name': 'generation-i', 'url': 'https://pokea...","{'name': 'ivysaur', 'url': 'https://pokeapi.co...",True,False,False,False
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...","{'name': 'generation-i', 'url': 'https://pokea...",,False,False,False,False
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...","{'name': 'generation-i', 'url': 'https://pokea...","{'name': 'charmander', 'url': 'https://pokeapi...",False,False,False,False


In [54]:
#Extract just the generation from 'Generation' lists 
for i in (species_df['ID'].values):
    species_df.loc[i-1, 'Generation'] = species_df[species_df['ID'] == i]['Generation'][i-1]['name']

In [55]:
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Is_baby,Is_legendary,Is_mythical
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...",generation-i,,False,False,False,False
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...",generation-i,"{'name': 'bulbasaur', 'url': 'https://pokeapi....",False,False,False,False
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",generation-i,"{'name': 'ivysaur', 'url': 'https://pokeapi.co...",True,False,False,False
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,,False,False,False,False
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,"{'name': 'charmander', 'url': 'https://pokeapi...",False,False,False,False


In [56]:
#Extract just the name from 'Evolves_from' lists 
for i in (species_df['ID'].values):
    try:
        species_df.loc[i-1, 'Evolves_from'] = species_df[species_df['ID'] == i]['Evolves_from'][i-1]['name']
    except TypeError:
        species_df.loc[i-1, 'Evolves_from'] = np.NaN

In [57]:
species_df.Evolves_from = species_df.Evolves_from.str.capitalize()
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Is_baby,Is_legendary,Is_mythical
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...",generation-i,,False,False,False,False
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...",generation-i,Bulbasaur,False,False,False,False
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",generation-i,Ivysaur,True,False,False,False
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,,False,False,False,False
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,Charmander,False,False,False,False


In [58]:
#Aggiungo una colonna che riassume le colonne 'Is_baby', 'Is_legendary' e 'Is_mythical' (se tutte sono false li chiamo 'Common')
for index, row in species_df.iterrows():
    if row['Is_baby'] == True:
        species_df.loc[index,'Rarity'] = 'Baby'
    elif row['Is_legendary'] == True:
        species_df.loc[index,'Rarity'] = 'Legendary'
    elif row['Is_mythical'] == True:
        species_df.loc[index,'Rarity'] = 'Mythical'
    else:
        species_df.loc[index,'Rarity'] = 'Common'

In [59]:
species_df.drop(['Is_baby', 'Is_legendary', 'Is_mythical'], axis=1, inplace=True)
species_df.tail(15)

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
883,884,Duraludon,"[{'is_default': True, 'pokemon': {'name': 'dur...",generation-viii,,False,Common
884,885,Dreepy,"[{'is_default': True, 'pokemon': {'name': 'dre...",generation-viii,,False,Common
885,886,Drakloak,"[{'is_default': True, 'pokemon': {'name': 'dra...",generation-viii,Dreepy,False,Common
886,887,Dragapult,"[{'is_default': True, 'pokemon': {'name': 'dra...",generation-viii,Drakloak,False,Common
887,888,Zacian,"[{'is_default': True, 'pokemon': {'name': 'zac...",generation-viii,,False,Legendary
888,889,Zamazenta,"[{'is_default': True, 'pokemon': {'name': 'zam...",generation-viii,,False,Legendary
889,890,Eternatus,"[{'is_default': True, 'pokemon': {'name': 'ete...",generation-viii,,False,Legendary
890,891,Kubfu,"[{'is_default': True, 'pokemon': {'name': 'kub...",generation-viii,,False,Legendary
891,892,Urshifu,"[{'is_default': True, 'pokemon': {'name': 'urs...",generation-viii,Kubfu,False,Legendary
892,893,Zarude,"[{'is_default': True, 'pokemon': {'name': 'zar...",generation-viii,,False,Mythical


In [60]:
for index, row in species_df.iterrows():
    if row['Generation'] == 'generation-i':
        species_df.loc[index,'Generation'] = '1'
    elif row['Generation'] == 'generation-ii':
        species_df.loc[index,'Generation'] = '2'
    elif row['Generation'] == 'generation-iii':
        species_df.loc[index,'Generation'] = '3'
    elif row['Generation'] == 'generation-iv':
        species_df.loc[index,'Generation'] = '4'
    elif row['Generation'] == 'generation-v':
        species_df.loc[index,'Generation'] = '5'
    elif row['Generation'] == 'generation-vi':
        species_df.loc[index,'Generation'] = '6'
    elif row['Generation'] == 'generation-vii':
        species_df.loc[index,'Generation'] = '7'
    elif row['Generation'] == 'generation-viii':
        species_df.loc[index,'Generation'] = '8'

In [61]:
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...",1,,False,Common
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...",1,Bulbasaur,False,Common
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",1,Ivysaur,True,Common
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...",1,,False,Common
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...",1,Charmander,False,Common


In [62]:
species_df.tail()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
893,894,Regieleki,"[{'is_default': True, 'pokemon': {'name': 'reg...",8,,False,Legendary
894,895,Regidrago,"[{'is_default': True, 'pokemon': {'name': 'reg...",8,,False,Legendary
895,896,Glastrier,"[{'is_default': True, 'pokemon': {'name': 'gla...",8,,False,Legendary
896,897,Spectrier,"[{'is_default': True, 'pokemon': {'name': 'spe...",8,,False,Legendary
897,898,Calyrex,"[{'is_default': True, 'pokemon': {'name': 'cal...",8,,False,Legendary


In [64]:
#Salvataggio sia in .csv che in .json
species_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\species.json.gz", orient='records', lines=True, compression='gzip')
species_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\species.csv")

### Abilities

In [65]:
ability_df.Name = ability_df.Name.str.capitalize()

#Resetto gli IDs
for i in range(0, ability_df.shape[0]):
    ability_df.loc[i, 'ID'] = i+1
    
ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
322,323,Run-up,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
323,324,Conqueror,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
324,325,Shackle,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
325,326,Decoy,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
326,327,Shield,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]


In [66]:
#Extract just the generation from 'Generation' lists 
for i in (ability_df['ID'].values):
    ability_df.loc[i-1, 'Generation'] = ability_df[ability_df['ID'] == i]['Generation'][i-1]['name']

In [67]:
ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
322,323,Run-up,generation-v,False,[],[],[]
323,324,Conqueror,generation-v,False,[],[],[]
324,325,Shackle,generation-v,False,[],[],[]
325,326,Decoy,generation-v,False,[],[],[]
326,327,Shield,generation-v,False,[],[],[]


In [68]:
#A noi interessano solo le abilità disponibili nei giochi della serie principale, quindi:
mainseries_ability_df = ability_df[ability_df['Is_Main_Series'] == True]
mainseries_ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
262,263,Dragons-maw,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'reg..."
263,264,Chilling-neigh,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'gla..."
264,265,Grim-neigh,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'spe..."
265,266,As-one-glastrier,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
266,267,As-one-spectrier,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'cal..."


La colonna 'Effect_changes' contiene i cambiamenti degli effetti dell'abilità col passare delle versioni. Lascio la colonna per sicurezza, anche se a noi interesse l'effetto attuale, che è contenuto nella colonna 'Effect_entries'. Ha senso comunque fare integrazione coi dati dello scraping.

In [69]:
#Fino all'abilità con ID=191 abbiamo sia la descrizione in tedesco che in inglese. Dall' ID=192 fino alla fine abbiamo solo la 
#descrizione in inglese

for i in (mainseries_ability_df['ID'].values):
    try:
        if i <= 191:
            mainseries_ability_df.loc[i-1, 'Effect_entries'] = mainseries_ability_df[mainseries_ability_df['ID'] == i]['Effect_entries'][i-1][1]['effect']
        else:
            mainseries_ability_df.loc[i-1, 'Effect_entries'] = mainseries_ability_df[mainseries_ability_df['ID'] == i]['Effect_entries'][i-1][0]['effect']
    except IndexError:
        mainseries_ability_df.loc[i-1, 'Effect_entries'] = np.NaN

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [70]:
mainseries_ability_df.head()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
0,1,Stench,generation-iii,True,[{'effect_entries': [{'effect': 'Hat im Kampf ...,This Pokémon's damaging moves have a 10% chanc...,"[{'is_hidden': True, 'pokemon': {'name': 'gloo..."
1,2,Drizzle,generation-iii,True,[],The weather changes to rain when this Pokémon ...,"[{'is_hidden': True, 'pokemon': {'name': 'poli..."
2,3,Speed-boost,generation-iii,True,[],This Pokémon's Speed rises one stage after eac...,"[{'is_hidden': False, 'pokemon': {'name': 'yan..."
3,4,Battle-armor,generation-iii,True,[],Moves cannot score critical hits against this ...,"[{'is_hidden': True, 'pokemon': {'name': 'cubo..."
4,5,Sturdy,generation-iii,True,[{'effect_entries': [{'effect': 'Verhindert ke...,"When this Pokémon is at full HP, any hit that ...","[{'is_hidden': False, 'pokemon': {'name': 'geo..."


In [71]:
for index, row in mainseries_ability_df.iterrows():
    if row['Generation'] == 'generation-i':
        mainseries_ability_df.loc[index,'Generation'] = '1'
    elif row['Generation'] == 'generation-ii':
        mainseries_ability_df.loc[index,'Generation'] = '2'
    elif row['Generation'] == 'generation-iii':
        mainseries_ability_df.loc[index,'Generation'] = '3'
    elif row['Generation'] == 'generation-iv':
        mainseries_ability_df.loc[index,'Generation'] = '4'
    elif row['Generation'] == 'generation-v':
        mainseries_ability_df.loc[index,'Generation'] = '5'
    elif row['Generation'] == 'generation-vi':
        mainseries_ability_df.loc[index,'Generation'] = '6'
    elif row['Generation'] == 'generation-vii':
        mainseries_ability_df.loc[index,'Generation'] = '7'
    elif row['Generation'] == 'generation-viii':
        mainseries_ability_df.loc[index,'Generation'] = '8'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [72]:
mainseries_ability_df.head()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
0,1,Stench,3,True,[{'effect_entries': [{'effect': 'Hat im Kampf ...,This Pokémon's damaging moves have a 10% chanc...,"[{'is_hidden': True, 'pokemon': {'name': 'gloo..."
1,2,Drizzle,3,True,[],The weather changes to rain when this Pokémon ...,"[{'is_hidden': True, 'pokemon': {'name': 'poli..."
2,3,Speed-boost,3,True,[],This Pokémon's Speed rises one stage after eac...,"[{'is_hidden': False, 'pokemon': {'name': 'yan..."
3,4,Battle-armor,3,True,[],Moves cannot score critical hits against this ...,"[{'is_hidden': True, 'pokemon': {'name': 'cubo..."
4,5,Sturdy,3,True,[{'effect_entries': [{'effect': 'Verhindert ke...,"When this Pokémon is at full HP, any hit that ...","[{'is_hidden': False, 'pokemon': {'name': 'geo..."


In [73]:
mainseries_ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
262,263,Dragons-maw,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'reg..."
263,264,Chilling-neigh,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'gla..."
264,265,Grim-neigh,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'spe..."
265,266,As-one-glastrier,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
266,267,As-one-spectrier,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."


In [74]:
#Salvataggio sia in .csv che in .json
mainseries_ability_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\Abilities.json.gz", orient='records', lines=True, compression='gzip')
mainseries_ability_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\Abilities.csv")

### Types

In [81]:
type_df.Name = type_df.Name.str.capitalize()

#Resetto gli indici
for i in range(0, type_df.shape[0]):
    type_df.loc[i, 'ID'] = i+1
    
type_df.tail()

Unnamed: 0,ID,Game_index,Name,Introducted_in,Damage_relations
15,16,"[{'game_index': 26, 'generation': {'name': 'ge...",Dragon,"{'name': 'generation-i', 'url': 'https://pokea...","{'double_damage_from': [{'name': 'ice', 'url':..."
16,17,"[{'game_index': 27, 'generation': {'name': 'ge...",Dark,"{'name': 'generation-ii', 'url': 'https://poke...","{'double_damage_from': [{'name': 'fighting', '..."
17,18,"[{'game_index': 17, 'generation': {'name': 'ge...",Fairy,"{'name': 'generation-vi', 'url': 'https://poke...","{'double_damage_from': [{'name': 'poison', 'ur..."
18,19,"[{'game_index': 19, 'generation': {'name': 'ge...",Unknown,"{'name': 'generation-ii', 'url': 'https://poke...","{'double_damage_from': [], 'double_damage_to':..."
19,20,[],Shadow,"{'name': 'generation-iii', 'url': 'https://pok...","{'double_damage_from': [], 'double_damage_to':..."


In [82]:
#Extract just the generation from 'Generation' lists 
for i in (type_df['ID'].values):
    type_df.loc[i-1, 'Introducted_in'] = type_df[type_df['ID'] == i]['Introducted_in'][i-1]['name']
    
type_df.tail()

Unnamed: 0,ID,Game_index,Name,Introducted_in,Damage_relations
15,16,"[{'game_index': 26, 'generation': {'name': 'ge...",Dragon,generation-i,"{'double_damage_from': [{'name': 'ice', 'url':..."
16,17,"[{'game_index': 27, 'generation': {'name': 'ge...",Dark,generation-ii,"{'double_damage_from': [{'name': 'fighting', '..."
17,18,"[{'game_index': 17, 'generation': {'name': 'ge...",Fairy,generation-vi,"{'double_damage_from': [{'name': 'poison', 'ur..."
18,19,"[{'game_index': 19, 'generation': {'name': 'ge...",Unknown,generation-ii,"{'double_damage_from': [], 'double_damage_to':..."
19,20,[],Shadow,generation-iii,"{'double_damage_from': [], 'double_damage_to':..."


In [83]:
for index, row in type_df.iterrows():
    if row['Introducted_in'] == 'generation-i':
        type_df.loc[index,'Introducted_in'] = '1'
    elif row['Introducted_in'] == 'generation-ii':
        type_df.loc[index,'Introducted_in'] = '2'
    elif row['Introducted_in'] == 'generation-iii':
        type_df.loc[index,'Introducted_in'] = '3'
    elif row['Introducted_in'] == 'generation-iv':
        type_df.loc[index,'Introducted_in'] = '4'
    elif row['Introducted_in'] == 'generation-v':
        type_df.loc[index,'Introducted_in'] = '5'
    elif row['Introducted_in'] == 'generation-vi':
        type_df.loc[index,'Introducted_in'] = '6'
    elif row['Introducted_in'] == 'generation-vii':
        type_df.loc[index,'Introducted_in'] = '7'
    elif row['Introducted_in'] == 'generation-viii':
        type_df.loc[index,'Introducted_in'] = '8'

In [84]:
#Salvataggio sia in .csv che in .json
type_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\Types.json.gz", orient='records', lines=True, compression='gzip')
type_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\Types.csv")