In [None]:
import requests
import json
import pandas as pd
import numpy as np
from pprint import pprint as pp
import recordlinkage as rl
import re

### Cleaning

In [None]:
items = pd.read_json("API_datasets/item.json")

moves = pd.read_json("API_datasets/moves.json")

species = pd.read_json("API_datasets/species.json")

abilities = pd.read_json("API_datasets/abilities.json")

types = pd.read_json("API_datasets/types.json")

In [None]:
items_df = pd.DataFrame(items, columns=["attributes", "category", "effect_entries", "id", "name"])
moves_df = pd.DataFrame(moves, columns=["accuracy", "damage_class", "generation", "id", "learned_by_pokemon", "name", 
                                             "power", "pp", "type"])
species_df = pd.DataFrame(species, columns=["evolves_from_species", "generation", "has_gender_differences", "id", 
                                                   "is_baby", "is_legendary", "is_mythical", "name", "varieties"])
ability_df = pd.DataFrame(abilities, columns=["effect_changes", "effect_entries", "generation", "id", "is_main_series", 
                                                   "name", "pokemon"])
type_df = pd.DataFrame(types, columns=["damage_relations", "game_indices", "generation", "id", "name"])

In [None]:
items_df = items_df[["id", "name", "attributes", "category", "effect_entries"]]
items_df.columns = ["ID", "Name", "Attributes", "Category", "Effect"]

moves_df = moves_df[["id", "name", "type", "power", "accuracy", "pp", "damage_class", "generation", "learned_by_pokemon"]]
moves_df.columns = ["ID", "Name", "Type", "Power", "Accuracy", "PP", "Damage_class", "Introducted_in", "Learned_by"]

species_df = species_df[["id", "name", "varieties", "generation", "evolves_from_species", "has_gender_differences",  "is_baby", 
                         "is_legendary", "is_mythical"]]
species_df.columns = ["ID", "Name", "Varieties", "Generation", "Evolves_from", "Has_gender_diff", "Is_baby", "Is_legendary",
                     "Is_mythical"]
ability_df = ability_df[["id", "name", "generation", "is_main_series", "effect_changes", "effect_entries", "pokemon"]]
ability_df.columns = ["ID", "Name", "Generation", "Is_Main_Series", "Effect_changes", "Effect_entries", "Pokemon"]
type_df = type_df[["id", "game_indices", "name", "generation", "damage_relations"]]
type_df.columns = ["ID", "Game_index", "Name", "Introducted_in", "Damage_relations"]

### Items

In [None]:
#Capitalize names
items_df.Name = items_df.Name.str.capitalize()
items_df.head()

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Catches a wild...
1,2,Ultra-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...
2,3,Great-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...
3,4,Poke-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...
4,5,Safari-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",[{'effect': 'Used in battle : Attempts to ca...


In [None]:
#Ci sono degli ID mancanti nella colonna ID (per via di quelli skippati dalla API nell'ottenimento dei dati).
#Vado a resettare la colonna (anche se poi dovrebbe essere eliminata comunque)
for i in range(0, items_df.shape[0]):
    items_df.loc[i, 'ID'] = i+1
    
items_df.tail()

Unnamed: 0,ID,Name,Attributes,Category,Effect
1602,1603,Shaderoot-carrot,[],"{'name': 'plot-advancement', 'url': 'https://p...",[]
1603,1604,Dynite-ore,[],"{'name': 'collectibles', 'url': 'https://pokea...",[]
1604,1605,Carrot-seeds,[],"{'name': 'plot-advancement', 'url': 'https://p...",[]
1605,1606,Ability-patch,[],"{'name': 'vitamins', 'url': 'https://pokeapi.c...",[]
1606,1607,Reins-of-unity,[],"{'name': 'plot-advancement', 'url': 'https://p...",[]


In [None]:
#Nell'attributo "Effect" mantengo solo il valore della chiave "short_effect"
for i in (items_df['ID'].values):
    try:
        items_df.loc[i-1, 'Effect'] = items_df[items_df['ID'] == i]['Effect'][i-1][0]['short_effect']
    except IndexError:
        items_df.loc[i-1, 'Effect'] = np.NaN

In [None]:
items_df.tail()

Unnamed: 0,ID,Name,Attributes,Category,Effect
1602,1603,Shaderoot-carrot,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1603,1604,Dynite-ore,[],"{'name': 'collectibles', 'url': 'https://pokea...",
1604,1605,Carrot-seeds,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1605,1606,Ability-patch,[],"{'name': 'vitamins', 'url': 'https://pokeapi.c...",
1606,1607,Reins-of-unity,[],"{'name': 'plot-advancement', 'url': 'https://p...",


In [None]:
items_df[items_df['Effect'].notnull()]

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Catches a wild Pokémon every time.
1,2,Ultra-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon. Success rate i...
2,3,Great-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon. Success rate i...
3,4,Poke-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon.
4,5,Safari-ball,"[{'name': 'countable', 'url': 'https://pokeapi...","{'name': 'standard-balls', 'url': 'https://pok...",Tries to catch a wild Pokémon in the Great Mar...
...,...,...,...,...,...
949,950,Roto-stealth,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-stealth
950,951,Roto-hp-restore,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-hp-restore
951,952,Roto-pp-restore,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-pp-restore
952,953,Roto-boost,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",XXX new effect for roto-boost


In [None]:
items_df[items_df['Effect'].isnull()]

Unnamed: 0,ID,Name,Attributes,Category,Effect
954,955,Autograph,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
955,956,Pokemon-box,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
956,957,Medicine-pocket,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
957,958,Candy-jar,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
958,959,Power-up-pocket,[],"{'name': 'unused', 'url': 'https://pokeapi.co/...",
...,...,...,...,...,...
1602,1603,Shaderoot-carrot,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1603,1604,Dynite-ore,[],"{'name': 'collectibles', 'url': 'https://pokea...",
1604,1605,Carrot-seeds,[],"{'name': 'plot-advancement', 'url': 'https://p...",
1605,1606,Ability-patch,[],"{'name': 'vitamins', 'url': 'https://pokeapi.c...",


I primi 954 strumenti hanno l'effetto, i successivi no.

In [None]:
#Stessa cosa per la categoria, mantengo solo il nome
for i in (items_df['ID'].values):
        items_df.loc[i-1, 'Category'] = items_df[items_df['ID'] == i]['Category'][i-1]['name']

In [None]:
items_df.Category = items_df.Category.str.capitalize()
items_df.head()

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Catches a wild Pokémon every time.
1,2,Ultra-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
2,3,Great-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
3,4,Poke-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon.
4,5,Safari-ball,"[{'name': 'countable', 'url': 'https://pokeapi...",Standard-balls,Tries to catch a wild Pokémon in the Great Mar...


In [None]:
for i in (items_df['ID'].values):
    temp = []
    for j in range(0, len(items_df[items_df['ID'] == i]['Attributes'][i-1])):
        temp.append(items_df[items_df['ID'] == i]['Attributes'][i-1][j]['name'])
    #In questo caso uso .at perchè .loc da problemi con l'assegnamento delle liste
    items_df.at[i-1, 'Attributes'] = temp

In [None]:
items_df.head()

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Catches a wild Pokémon every time.
1,2,Ultra-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
2,3,Great-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
3,4,Poke-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon.
4,5,Safari-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon in the Great Mar...


In [None]:
#Salvataggio sia in .csv che in .json
items_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\items.json.gz", orient='records', lines=True, compression='gzip')
items_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\items.csv")

### Moves

In [None]:
moves_df.Name = moves_df.Name.str.capitalize()
moves_df.tail()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
839,10014,Shadow-hold,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
840,10015,Shadow-mist,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,100.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
841,10016,Shadow-panic,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,90.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
842,10017,Shadow-shed,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
843,10018,Shadow-sky,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]


In [None]:
#Ci sono degli ID mancanti nella colonna ID (per via di quelli skippati dalla API nell'ottenimento dei dati). Li resetto anche
#se pure qui poi l'ID non serve
for i in range(0, moves_df.shape[0]):
    moves_df.loc[i, 'ID'] = i+1
    
moves_df.tail()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
839,840,Shadow-hold,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
840,841,Shadow-mist,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,100.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
841,842,Shadow-panic,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,90.0,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
842,843,Shadow-shed,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]
843,844,Shadow-sky,"{'name': 'shadow', 'url': 'https://pokeapi.co/...",,,,"{'name': 'status', 'url': 'https://pokeapi.co/...","{'name': 'generation-iii', 'url': 'https://pok...",[]


In [None]:
#Estraggo il tipo da 'Type', la classe di danno da 'Damage_class' e la generazione da 'Introducted_in'
for i in (moves_df['ID'].values):
    moves_df.loc[i-1, 'Type'] = moves_df[moves_df['ID'] == i]['Type'][i-1]['name']
    moves_df.loc[i-1, 'Damage_class'] = moves_df[moves_df['ID'] == i]['Damage_class'][i-1]['name']
    moves_df.loc[i-1, 'Introducted_in'] = moves_df[moves_df['ID'] == i]['Introducted_in'][i-1]['name']

In [None]:
moves_df.Type = moves_df.Type.str.capitalize()
moves_df.Damage_class = moves_df.Damage_class.str.capitalize()
moves_df.head()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
0,1,Pound,Normal,40.0,100.0,35.0,Physical,generation-i,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
1,2,Karate-chop,Fighting,50.0,100.0,25.0,Physical,generation-i,"[{'name': 'mankey', 'url': 'https://pokeapi.co..."
2,3,Double-slap,Normal,15.0,85.0,10.0,Physical,generation-i,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
3,4,Comet-punch,Normal,18.0,85.0,15.0,Physical,generation-i,"[{'name': 'hitmonchan', 'url': 'https://pokeap..."
4,5,Mega-punch,Normal,80.0,85.0,20.0,Physical,generation-i,"[{'name': 'charmander', 'url': 'https://pokeap..."


In [None]:
#Sostituisco la generazione con un valore intero
for index, row in moves_df.iterrows():
    if row['Introducted_in'] == 'generation-i':
        moves_df.loc[index,'Introducted_in'] = '1'
    elif row['Introducted_in'] == 'generation-ii':
        moves_df.loc[index,'Introducted_in'] = '2'
    elif row['Introducted_in'] == 'generation-iii':
        moves_df.loc[index,'Introducted_in'] = '3'
    elif row['Introducted_in'] == 'generation-iv':
        moves_df.loc[index,'Introducted_in'] = '4'
    elif row['Introducted_in'] == 'generation-v':
        moves_df.loc[index,'Introducted_in'] = '5'
    elif row['Introducted_in'] == 'generation-vi':
        moves_df.loc[index,'Introducted_in'] = '6'
    elif row['Introducted_in'] == 'generation-vii':
        moves_df.loc[index,'Introducted_in'] = '7'
    elif row['Introducted_in'] == 'generation-viii':
        moves_df.loc[index,'Introducted_in'] = '8'

In [None]:
moves_df.head()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
0,1,Pound,Normal,40.0,100.0,35.0,Physical,1,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
1,2,Karate-chop,Fighting,50.0,100.0,25.0,Physical,1,"[{'name': 'mankey', 'url': 'https://pokeapi.co..."
2,3,Double-slap,Normal,15.0,85.0,10.0,Physical,1,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
3,4,Comet-punch,Normal,18.0,85.0,15.0,Physical,1,"[{'name': 'hitmonchan', 'url': 'https://pokeap..."
4,5,Mega-punch,Normal,80.0,85.0,20.0,Physical,1,"[{'name': 'charmander', 'url': 'https://pokeap..."


In [None]:
moves_df.tail()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
839,840,Shadow-hold,Shadow,,,,Status,3,[]
840,841,Shadow-mist,Shadow,,100.0,,Status,3,[]
841,842,Shadow-panic,Shadow,,90.0,,Status,3,[]
842,843,Shadow-shed,Shadow,,,,Status,3,[]
843,844,Shadow-sky,Shadow,,,,Status,3,[]


In [None]:
#Salvataggio sia in .csv che in .json
moves_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\moves.json.gz", orient='records', lines=True, compression='gzip')
moves_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\moves.csv")

### Species

In [None]:
#Questo dataframe contiene una riga per ciascuna specie, nella colonna 'Varieties' sono presenti le varie forme alternative
#Tengo la colonna 'Varieties' così com'è, la considero una relazione
species_df.Name = species_df.Name.str.capitalize()
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Is_baby,Is_legendary,Is_mythical
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...","{'name': 'generation-i', 'url': 'https://pokea...",,False,False,False,False
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...","{'name': 'generation-i', 'url': 'https://pokea...","{'name': 'bulbasaur', 'url': 'https://pokeapi....",False,False,False,False
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...","{'name': 'generation-i', 'url': 'https://pokea...","{'name': 'ivysaur', 'url': 'https://pokeapi.co...",True,False,False,False
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...","{'name': 'generation-i', 'url': 'https://pokea...",,False,False,False,False
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...","{'name': 'generation-i', 'url': 'https://pokea...","{'name': 'charmander', 'url': 'https://pokeapi...",False,False,False,False


In [None]:
#Estraggo la generazione da 'Generation' 
for i in (species_df['ID'].values):
    species_df.loc[i-1, 'Generation'] = species_df[species_df['ID'] == i]['Generation'][i-1]['name']

In [None]:
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Is_baby,Is_legendary,Is_mythical
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...",generation-i,,False,False,False,False
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...",generation-i,"{'name': 'bulbasaur', 'url': 'https://pokeapi....",False,False,False,False
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",generation-i,"{'name': 'ivysaur', 'url': 'https://pokeapi.co...",True,False,False,False
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,,False,False,False,False
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,"{'name': 'charmander', 'url': 'https://pokeapi...",False,False,False,False


In [None]:
#Estraggo solo il nome da 'Evolves_from' 
for i in (species_df['ID'].values):
    try:
        species_df.loc[i-1, 'Evolves_from'] = species_df[species_df['ID'] == i]['Evolves_from'][i-1]['name']
    except TypeError:
        species_df.loc[i-1, 'Evolves_from'] = np.NaN

In [None]:
species_df.Evolves_from = species_df.Evolves_from.str.capitalize()
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Is_baby,Is_legendary,Is_mythical
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...",generation-i,,False,False,False,False
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...",generation-i,Bulbasaur,False,False,False,False
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",generation-i,Ivysaur,True,False,False,False
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,,False,False,False,False
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...",generation-i,Charmander,False,False,False,False


In [None]:
#Aggiungo una colonna che riassume le colonne 'Is_baby', 'Is_legendary' e 'Is_mythical' (se tutte sono false li chiamo 'Common')
for index, row in species_df.iterrows():
    if row['Is_baby'] == True:
        species_df.loc[index,'Rarity'] = 'Baby'
    elif row['Is_legendary'] == True:
        species_df.loc[index,'Rarity'] = 'Legendary'
    elif row['Is_mythical'] == True:
        species_df.loc[index,'Rarity'] = 'Mythical'
    else:
        species_df.loc[index,'Rarity'] = 'Common'

In [None]:
species_df.drop(['Is_baby', 'Is_legendary', 'Is_mythical'], axis=1, inplace=True)
species_df.tail(15)

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
883,884,Duraludon,"[{'is_default': True, 'pokemon': {'name': 'dur...",generation-viii,,False,Common
884,885,Dreepy,"[{'is_default': True, 'pokemon': {'name': 'dre...",generation-viii,,False,Common
885,886,Drakloak,"[{'is_default': True, 'pokemon': {'name': 'dra...",generation-viii,Dreepy,False,Common
886,887,Dragapult,"[{'is_default': True, 'pokemon': {'name': 'dra...",generation-viii,Drakloak,False,Common
887,888,Zacian,"[{'is_default': True, 'pokemon': {'name': 'zac...",generation-viii,,False,Legendary
888,889,Zamazenta,"[{'is_default': True, 'pokemon': {'name': 'zam...",generation-viii,,False,Legendary
889,890,Eternatus,"[{'is_default': True, 'pokemon': {'name': 'ete...",generation-viii,,False,Legendary
890,891,Kubfu,"[{'is_default': True, 'pokemon': {'name': 'kub...",generation-viii,,False,Legendary
891,892,Urshifu,"[{'is_default': True, 'pokemon': {'name': 'urs...",generation-viii,Kubfu,False,Legendary
892,893,Zarude,"[{'is_default': True, 'pokemon': {'name': 'zar...",generation-viii,,False,Mythical


In [None]:
for index, row in species_df.iterrows():
    if row['Generation'] == 'generation-i':
        species_df.loc[index,'Generation'] = '1'
    elif row['Generation'] == 'generation-ii':
        species_df.loc[index,'Generation'] = '2'
    elif row['Generation'] == 'generation-iii':
        species_df.loc[index,'Generation'] = '3'
    elif row['Generation'] == 'generation-iv':
        species_df.loc[index,'Generation'] = '4'
    elif row['Generation'] == 'generation-v':
        species_df.loc[index,'Generation'] = '5'
    elif row['Generation'] == 'generation-vi':
        species_df.loc[index,'Generation'] = '6'
    elif row['Generation'] == 'generation-vii':
        species_df.loc[index,'Generation'] = '7'
    elif row['Generation'] == 'generation-viii':
        species_df.loc[index,'Generation'] = '8'

In [None]:
species_df.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...",1,,False,Common
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...",1,Bulbasaur,False,Common
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",1,Ivysaur,True,Common
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...",1,,False,Common
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...",1,Charmander,False,Common


In [None]:
species_df.tail()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
893,894,Regieleki,"[{'is_default': True, 'pokemon': {'name': 'reg...",8,,False,Legendary
894,895,Regidrago,"[{'is_default': True, 'pokemon': {'name': 'reg...",8,,False,Legendary
895,896,Glastrier,"[{'is_default': True, 'pokemon': {'name': 'gla...",8,,False,Legendary
896,897,Spectrier,"[{'is_default': True, 'pokemon': {'name': 'spe...",8,,False,Legendary
897,898,Calyrex,"[{'is_default': True, 'pokemon': {'name': 'cal...",8,,False,Legendary


In [None]:
#Salvataggio sia in .csv che in .json
species_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\species.json.gz", orient='records', lines=True, compression='gzip')
species_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\species.csv")

### Abilities

In [None]:
ability_df.Name = ability_df.Name.str.capitalize()

#Resetto gli IDs, anche se poi vanno comunque eliminati
for i in range(0, ability_df.shape[0]):
    ability_df.loc[i, 'ID'] = i+1
    
ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
322,323,Run-up,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
323,324,Conqueror,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
324,325,Shackle,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
325,326,Decoy,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]
326,327,Shield,"{'name': 'generation-v', 'url': 'https://pokea...",False,[],[],[]


In [None]:
#Estraggo la generazione da 'Generation' 
for i in (ability_df['ID'].values):
    ability_df.loc[i-1, 'Generation'] = ability_df[ability_df['ID'] == i]['Generation'][i-1]['name']

In [None]:
ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
322,323,Run-up,generation-v,False,[],[],[]
323,324,Conqueror,generation-v,False,[],[],[]
324,325,Shackle,generation-v,False,[],[],[]
325,326,Decoy,generation-v,False,[],[],[]
326,327,Shield,generation-v,False,[],[],[]


In [None]:
#A noi interessano solo le abilità disponibili nei giochi della serie principale, quindi tolgo quelle con valore False per
#Is_Main_Series:
mainseries_ability_df = ability_df[ability_df['Is_Main_Series'] == True]
mainseries_ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
262,263,Dragons-maw,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'reg..."
263,264,Chilling-neigh,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'gla..."
264,265,Grim-neigh,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'spe..."
265,266,As-one-glastrier,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
266,267,As-one-spectrier,generation-viii,True,[],[],"[{'is_hidden': False, 'pokemon': {'name': 'cal..."


La colonna 'Effect_changes' contiene i cambiamenti degli effetti dell'abilità col passare delle versioni. Lascio la colonna per sicurezza, anche se a noi interesse l'effetto attuale, che è contenuto nella colonna 'Effect_entries'. Ha senso comunque fare integrazione coi dati dello scraping.

In [None]:
#Fino all'abilità con ID=191 abbiamo sia la descrizione in tedesco che in inglese. Dall'ID=192 fino alla fine abbiamo solo la 
#descrizione in inglese. Tengo solo quella in inglese

for i in (mainseries_ability_df['ID'].values):
    try:
        if i <= 191:
            mainseries_ability_df.loc[i-1, 'Effect_entries'] = mainseries_ability_df[mainseries_ability_df['ID'] == i]['Effect_entries'][i-1][1]['effect']
        else:
            mainseries_ability_df.loc[i-1, 'Effect_entries'] = mainseries_ability_df[mainseries_ability_df['ID'] == i]['Effect_entries'][i-1][0]['effect']
    except IndexError:
        mainseries_ability_df.loc[i-1, 'Effect_entries'] = np.NaN

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
mainseries_ability_df.head()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
0,1,Stench,generation-iii,True,[{'effect_entries': [{'effect': 'Hat im Kampf ...,This Pokémon's damaging moves have a 10% chanc...,"[{'is_hidden': True, 'pokemon': {'name': 'gloo..."
1,2,Drizzle,generation-iii,True,[],The weather changes to rain when this Pokémon ...,"[{'is_hidden': True, 'pokemon': {'name': 'poli..."
2,3,Speed-boost,generation-iii,True,[],This Pokémon's Speed rises one stage after eac...,"[{'is_hidden': False, 'pokemon': {'name': 'yan..."
3,4,Battle-armor,generation-iii,True,[],Moves cannot score critical hits against this ...,"[{'is_hidden': True, 'pokemon': {'name': 'cubo..."
4,5,Sturdy,generation-iii,True,[{'effect_entries': [{'effect': 'Verhindert ke...,"When this Pokémon is at full HP, any hit that ...","[{'is_hidden': False, 'pokemon': {'name': 'geo..."


In [None]:
for index, row in mainseries_ability_df.iterrows():
    if row['Generation'] == 'generation-i':
        mainseries_ability_df.loc[index,'Generation'] = '1'
    elif row['Generation'] == 'generation-ii':
        mainseries_ability_df.loc[index,'Generation'] = '2'
    elif row['Generation'] == 'generation-iii':
        mainseries_ability_df.loc[index,'Generation'] = '3'
    elif row['Generation'] == 'generation-iv':
        mainseries_ability_df.loc[index,'Generation'] = '4'
    elif row['Generation'] == 'generation-v':
        mainseries_ability_df.loc[index,'Generation'] = '5'
    elif row['Generation'] == 'generation-vi':
        mainseries_ability_df.loc[index,'Generation'] = '6'
    elif row['Generation'] == 'generation-vii':
        mainseries_ability_df.loc[index,'Generation'] = '7'
    elif row['Generation'] == 'generation-viii':
        mainseries_ability_df.loc[index,'Generation'] = '8'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
mainseries_ability_df.head()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
0,1,Stench,3,True,[{'effect_entries': [{'effect': 'Hat im Kampf ...,This Pokémon's damaging moves have a 10% chanc...,"[{'is_hidden': True, 'pokemon': {'name': 'gloo..."
1,2,Drizzle,3,True,[],The weather changes to rain when this Pokémon ...,"[{'is_hidden': True, 'pokemon': {'name': 'poli..."
2,3,Speed-boost,3,True,[],This Pokémon's Speed rises one stage after eac...,"[{'is_hidden': False, 'pokemon': {'name': 'yan..."
3,4,Battle-armor,3,True,[],Moves cannot score critical hits against this ...,"[{'is_hidden': True, 'pokemon': {'name': 'cubo..."
4,5,Sturdy,3,True,[{'effect_entries': [{'effect': 'Verhindert ke...,"When this Pokémon is at full HP, any hit that ...","[{'is_hidden': False, 'pokemon': {'name': 'geo..."


In [None]:
mainseries_ability_df.tail()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
262,263,Dragons-maw,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'reg..."
263,264,Chilling-neigh,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'gla..."
264,265,Grim-neigh,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'spe..."
265,266,As-one-glastrier,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
266,267,As-one-spectrier,8,True,[],,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."


In [None]:
#Salvataggio sia in .csv che in .json
mainseries_ability_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\Abilities.json.gz", orient='records', lines=True, compression='gzip')
mainseries_ability_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\Abilities.csv")

### Types

In [None]:
type_df.Name = type_df.Name.str.capitalize()

#Resetto gli indici
for i in range(0, type_df.shape[0]):
    type_df.loc[i, 'ID'] = i+1
    
type_df.tail()

Unnamed: 0,ID,Game_index,Name,Introducted_in,Damage_relations
15,16,"[{'game_index': 26, 'generation': {'name': 'ge...",Dragon,"{'name': 'generation-i', 'url': 'https://pokea...","{'double_damage_from': [{'name': 'ice', 'url':..."
16,17,"[{'game_index': 27, 'generation': {'name': 'ge...",Dark,"{'name': 'generation-ii', 'url': 'https://poke...","{'double_damage_from': [{'name': 'fighting', '..."
17,18,"[{'game_index': 17, 'generation': {'name': 'ge...",Fairy,"{'name': 'generation-vi', 'url': 'https://poke...","{'double_damage_from': [{'name': 'poison', 'ur..."
18,19,"[{'game_index': 19, 'generation': {'name': 'ge...",Unknown,"{'name': 'generation-ii', 'url': 'https://poke...","{'double_damage_from': [], 'double_damage_to':..."
19,20,[],Shadow,"{'name': 'generation-iii', 'url': 'https://pok...","{'double_damage_from': [], 'double_damage_to':..."


In [None]:
#Estraggo la generazione da 'Generation' 
for i in (type_df['ID'].values):
    type_df.loc[i-1, 'Introducted_in'] = type_df[type_df['ID'] == i]['Introducted_in'][i-1]['name']
    
type_df.tail()

Unnamed: 0,ID,Game_index,Name,Introducted_in,Damage_relations
15,16,"[{'game_index': 26, 'generation': {'name': 'ge...",Dragon,generation-i,"{'double_damage_from': [{'name': 'ice', 'url':..."
16,17,"[{'game_index': 27, 'generation': {'name': 'ge...",Dark,generation-ii,"{'double_damage_from': [{'name': 'fighting', '..."
17,18,"[{'game_index': 17, 'generation': {'name': 'ge...",Fairy,generation-vi,"{'double_damage_from': [{'name': 'poison', 'ur..."
18,19,"[{'game_index': 19, 'generation': {'name': 'ge...",Unknown,generation-ii,"{'double_damage_from': [], 'double_damage_to':..."
19,20,[],Shadow,generation-iii,"{'double_damage_from': [], 'double_damage_to':..."


In [None]:
for index, row in type_df.iterrows():
    if row['Introducted_in'] == 'generation-i':
        type_df.loc[index,'Introducted_in'] = '1'
    elif row['Introducted_in'] == 'generation-ii':
        type_df.loc[index,'Introducted_in'] = '2'
    elif row['Introducted_in'] == 'generation-iii':
        type_df.loc[index,'Introducted_in'] = '3'
    elif row['Introducted_in'] == 'generation-iv':
        type_df.loc[index,'Introducted_in'] = '4'
    elif row['Introducted_in'] == 'generation-v':
        type_df.loc[index,'Introducted_in'] = '5'
    elif row['Introducted_in'] == 'generation-vi':
        type_df.loc[index,'Introducted_in'] = '6'
    elif row['Introducted_in'] == 'generation-vii':
        type_df.loc[index,'Introducted_in'] = '7'
    elif row['Introducted_in'] == 'generation-viii':
        type_df.loc[index,'Introducted_in'] = '8'

In [None]:
#Salvataggio sia in .csv che in .json
type_df.to_json("API_datasets\DATASET PER INTEGRAZIONE\Types.json.gz", orient='records', lines=True, compression='gzip')
type_df.to_csv("API_datasets\DATASET PER INTEGRAZIONE\Types.csv")

# INTEGRAZIONE API CON SCRAPING (POKEMON DATABASE)

# POKEMON

In [None]:
dex_scrap = pd.read_json('Scraping_datasets/pokemondb/pokedex.json')
dex_scrap.tail()

Unnamed: 0,#,Name,Type1,Type2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,sprite_url,Variant
1070,902,Basculegion,Water,Ghost,530,120,92,65,100,75,78,https://img.pokemondb.net/s.png,Female
1071,903,Sneasler,Poison,Fighting,510,80,130,60,40,80,120,https://img.pokemondb.net/s.png,
1072,904,Overqwil,Dark,Poison,510,85,115,95,65,65,85,https://img.pokemondb.net/s.png,
1073,905,Enamorus,Fairy,Flying,580,74,115,70,135,80,106,https://img.pokemondb.net/s.png,Incarnate Forme
1074,905,Enamorus,Fairy,Flying,580,74,115,110,135,100,46,https://img.pokemondb.net/s.png,Therian Forme


In [None]:
species = pd.read_json('API_per_integrazione/species.json.gz', compression='gzip', orient='records', lines=True)
species.tail()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
893,894,Regieleki,"[{'is_default': True, 'pokemon': {'name': 'reg...",8,,False,Legendary
894,895,Regidrago,"[{'is_default': True, 'pokemon': {'name': 'reg...",8,,False,Legendary
895,896,Glastrier,"[{'is_default': True, 'pokemon': {'name': 'gla...",8,,False,Legendary
896,897,Spectrier,"[{'is_default': True, 'pokemon': {'name': 'spe...",8,,False,Legendary
897,898,Calyrex,"[{'is_default': True, 'pokemon': {'name': 'cal...",8,,False,Legendary


In [None]:
#Inizializzo l'indexer per l'integrazione
indexer = rl.Index()
indexer.full()



<Index>

In [None]:
#Valuto il numero di coppie candidate al linking
candidates = indexer.index(species, dex_scrap)
print(len(candidates))

965350


In [None]:
compare = rl.Compare()
compare.string('Name', 'Name', threshold=0.85, label='Name')
compare.exact('ID', '#', label='ID')

features = compare.compute(candidates, species, dex_scrap)

In [None]:
features.sum(axis=1).value_counts().sort_index(ascending=False)

2.0      1054
1.0        23
0.0    964273
dtype: int64

Abbiamo 1054 match completi (nome+ID) e 23 match singoli (o nome o ID)

In [None]:
potential_matches = features[features.sum(axis=1) > 0].reset_index()
potential_matches['Score'] = potential_matches.loc[:, 'Name':'ID'].sum(axis=1)
potential_matches

Unnamed: 0,level_0,level_1,Name,ID,Score
0,0,0,1.0,1,2.0
1,1,1,1.0,1,2.0
2,2,2,1.0,1,2.0
3,2,3,1.0,1,2.0
4,3,4,1.0,1,2.0
...,...,...,...,...,...
1072,895,1061,1.0,1,2.0
1073,896,1062,1.0,1,2.0
1074,897,1063,1.0,1,2.0
1075,897,1064,1.0,1,2.0


In [None]:
potential_matches[potential_matches['Score'] == 1.0]

Unnamed: 0,level_0,level_1,Name,ID,Score
40,28,40,0.0,1,1.0
42,29,44,1.0,0,1.0
44,31,43,0.0,1,1.0
45,32,41,1.0,0,1.0
161,118,355,1.0,0,1.0
164,121,161,0.0,1,1.0
165,121,162,0.0,1,1.0
184,136,292,1.0,0,1.0
285,221,1022,1.0,0,1.0
297,232,180,1.0,0,1.0


Analizzo manualmente i match con score pari a 1:

In [None]:
#Match numero 1: Nidoran femmina ---> VA TENUTO
print(species.loc[28])
print(dex_scrap.loc[40])

ID                                                                29
Name                                                       Nidoran-f
Varieties          [{'is_default': True, 'pokemon': {'name': 'nid...
Generation                                                         1
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                        Common
Name: 28, dtype: object
#                                                            29
Name                                                   Nidoran♀
Type1                                                    Poison
Type2                                                       NaN
Total                                                       275
HP                                                           55
Attack                                                       47
Defense                                      

In [None]:
#Match numero 2: Nidorino con Nidorina ---> ELIMINATO
print(species.loc[29])
print(dex_scrap.loc[44])

ID                                                                30
Name                                                        Nidorina
Varieties          [{'is_default': True, 'pokemon': {'name': 'nid...
Generation                                                         1
Evolves_from                                               Nidoran-f
Has_gender_diff                                                False
Rarity                                                        Common
Name: 29, dtype: object
#                                                            33
Name                                                   Nidorino
Type1                                                    Poison
Type2                                                       NaN
Total                                                       365
HP                                                           61
Attack                                                       72
Defense                                      

In [None]:
#Match numero 3: Nidoran maschio ---> VA TENUTO
print(species.loc[31])
print(dex_scrap.loc[43])

ID                                                                32
Name                                                       Nidoran-m
Varieties          [{'is_default': True, 'pokemon': {'name': 'nid...
Generation                                                         1
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                        Common
Name: 31, dtype: object
#                                                            32
Name                                                   Nidoran♂
Type1                                                    Poison
Type2                                                       NaN
Total                                                       273
HP                                                           46
Attack                                                       57
Defense                                      

In [None]:
#Match numero 4: Nidorino con Nidorina ---> ELIMINATO
print(species.loc[32])
print(dex_scrap.loc[41])

ID                                                                33
Name                                                        Nidorino
Varieties          [{'is_default': True, 'pokemon': {'name': 'nid...
Generation                                                         1
Evolves_from                                               Nidoran-m
Has_gender_diff                                                False
Rarity                                                        Common
Name: 32, dtype: object
#                                                            30
Name                                                   Nidorina
Type1                                                    Poison
Type2                                                       NaN
Total                                                       365
HP                                                           70
Attack                                                       62
Defense                                      

In [None]:
#Match numero 5: Slaking e Seaking sono diversi ---> ELIMINATO
print(species.loc[118])
print(dex_scrap.loc[355])

ID                                                               119
Name                                                         Seaking
Varieties          [{'is_default': True, 'pokemon': {'name': 'sea...
Generation                                                         1
Evolves_from                                                 Goldeen
Has_gender_diff                                                 True
Rarity                                                        Common
Name: 118, dtype: object
#                                                           289
Name                                                    Slaking
Type1                                                    Normal
Type2                                                       NaN
Total                                                       670
HP                                                          150
Attack                                                      160
Defense                                     

In [None]:
#Match numero 6: Mr Mime ---> VA TENUTO
print(species.loc[121])
print(dex_scrap.loc[161])

ID                                                               122
Name                                                         Mr-mime
Varieties          [{'is_default': True, 'pokemon': {'name': 'mr-...
Generation                                                         1
Evolves_from                                                 Mime-jr
Has_gender_diff                                                False
Rarity                                                        Common
Name: 121, dtype: object
#                                                           122
Name                                                        Mr.
Type1                                                   Psychic
Type2                                                     Fairy
Total                                                       460
HP                                                           40
Attack                                                       45
Defense                                     

In [None]:
#Match numero 7: Mr Mime ---> VA TENUTO
print(species.loc[121])
print(dex_scrap.loc[162])

ID                                                               122
Name                                                         Mr-mime
Varieties          [{'is_default': True, 'pokemon': {'name': 'mr-...
Generation                                                         1
Evolves_from                                                 Mime-jr
Has_gender_diff                                                False
Rarity                                                        Common
Name: 121, dtype: object
#                                                           122
Name                                                        Mr.
Type1                                                       Ice
Type2                                                   Psychic
Total                                                       460
HP                                                           50
Attack                                                       65
Defense                                     

In [None]:
#Match numero 8: Porygon e Porygon2 sono diversi ---> ELIMINATO
print(species.loc[136])
print(dex_scrap.loc[292])

ID                                                               137
Name                                                         Porygon
Varieties          [{'is_default': True, 'pokemon': {'name': 'por...
Generation                                                         1
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                        Common
Name: 136, dtype: object
#                                                           233
Name                                                   Porygon2
Type1                                                    Normal
Type2                                                       NaN
Total                                                       515
HP                                                           85
Attack                                                       80
Defense                                     

In [None]:
#Match numero 9: Corsola e Cursola sono diversi ---> ELIMINATO
print(species.loc[221])
print(dex_scrap.loc[1022])

ID                                                               222
Name                                                         Corsola
Varieties          [{'is_default': True, 'pokemon': {'name': 'cor...
Generation                                                         2
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                        Common
Name: 221, dtype: object
#                                                           864
Name                                                    Cursola
Type1                                                     Ghost
Type2                                                       NaN
Total                                                       510
HP                                                           60
Attack                                                       95
Defense                                     

In [None]:
#Match numero 10: Porygon e Porygon2 sono diversi ---> ELIMINATO
print(species.loc[232])
print(dex_scrap.loc[180])

ID                                                               233
Name                                                        Porygon2
Varieties          [{'is_default': True, 'pokemon': {'name': 'por...
Generation                                                         2
Evolves_from                                                 Porygon
Has_gender_diff                                                False
Rarity                                                        Common
Name: 232, dtype: object
#                                                           137
Name                                                    Porygon
Type1                                                    Normal
Type2                                                       NaN
Total                                                       395
HP                                                           65
Attack                                                       60
Defense                                     

In [None]:
#Match numero 11: Slaking e Seaking sono diversi ---> ELIMINATO
print(species.loc[288])
print(dex_scrap.loc[158])

ID                                                               289
Name                                                         Slaking
Varieties          [{'is_default': True, 'pokemon': {'name': 'sla...
Generation                                                         3
Evolves_from                                                Vigoroth
Has_gender_diff                                                False
Rarity                                                        Common
Name: 288, dtype: object
#                                                           119
Name                                                    Seaking
Type1                                                     Water
Type2                                                       NaN
Total                                                       450
HP                                                           80
Attack                                                       92
Defense                                     

In [None]:
#Match numero 12: Mime Jr è uguale ---> VA TENUTO
print(species.loc[438])
print(dex_scrap.loc[534])

ID                                                               439
Name                                                         Mime-jr
Varieties          [{'is_default': True, 'pokemon': {'name': 'mim...
Generation                                                         4
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                          Baby
Name: 438, dtype: object
#                                                           439
Name                                                       Mime
Type1                                                   Psychic
Type2                                                     Fairy
Total                                                       310
HP                                                           20
Attack                                                       25
Defense                                     

In [None]:
#Match numero 13 e 14: Minccino e Cinccino sono diversi ---> ELIMINATO
print(species.loc[571])
print(dex_scrap.loc[693])

print(species.loc[572])
print(dex_scrap.loc[692])

ID                                                               572
Name                                                        Minccino
Varieties          [{'is_default': True, 'pokemon': {'name': 'min...
Generation                                                         5
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                        Common
Name: 571, dtype: object
#                                                           573
Name                                                   Cinccino
Type1                                                    Normal
Type2                                                       NaN
Total                                                       470
HP                                                           75
Attack                                                       95
Defense                                     

In [None]:
#Match numero 15: C'è solo l'accento sbagliato ---> VA TENUTO
print(species.loc[668])
print(dex_scrap.loc[799])

ID                                                               669
Name                                                         Flabebe
Varieties          [{'is_default': True, 'pokemon': {'name': 'fla...
Generation                                                         6
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                        Common
Name: 668, dtype: object
#                                                           669
Name                                                    Flabébé
Type1                                                     Fairy
Type2                                                       NaN
Total                                                       303
HP                                                           44
Attack                                                       38
Defense                                     

In [None]:
#Match numero 16: Type NULL e Type: sono uguali ---> VA TENUTO
print(species.loc[771])
print(dex_scrap.loc[925])

ID                                                               772
Name                                                       Type-null
Varieties          [{'is_default': True, 'pokemon': {'name': 'typ...
Generation                                                         7
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                        Common
Name: 771, dtype: object
#                                                           772
Name                                                      Type:
Type1                                                    Normal
Type2                                                       NaN
Total                                                       534
HP                                                           95
Attack                                                       95
Defense                                     

In [None]:
#Match numero 17, 18, 19 e 20: I Tapu sono uguali ---> TENERE
print(species.loc[784])
print(dex_scrap.loc[939])

print(species.loc[785])
print(dex_scrap.loc[940])

print(species.loc[786])
print(dex_scrap.loc[941])

print(species.loc[787])
print(dex_scrap.loc[942])

ID                                                               785
Name                                                       Tapu-koko
Varieties          [{'is_default': True, 'pokemon': {'name': 'tap...
Generation                                                         7
Evolves_from                                                    None
Has_gender_diff                                                False
Rarity                                                     Legendary
Name: 784, dtype: object
#                                                           785
Name                                                       Tapu
Type1                                                  Electric
Type2                                                     Fairy
Total                                                       570
HP                                                           70
Attack                                                      115
Defense                                     

In [None]:
#Match numero 21 e 22: Cursola e Corsola sono diversi ---> ELIMINARE
print(species.loc[863])
print(dex_scrap.loc[279])

print(species.loc[863])
print(dex_scrap.loc[280])

ID                                                               864
Name                                                         Cursola
Varieties          [{'is_default': True, 'pokemon': {'name': 'cur...
Generation                                                         8
Evolves_from                                                 Corsola
Has_gender_diff                                                False
Rarity                                                        Common
Name: 863, dtype: object
#                                                           222
Name                                                    Corsola
Type1                                                     Water
Type2                                                      Rock
Total                                                       410
HP                                                           65
Attack                                                       55
Defense                                     

In [None]:
#Match numero 21 e 22: Sono lo stesso ---> TENERE
print(species.loc[865])
print(dex_scrap.loc[1024])

ID                                                               866
Name                                                         Mr-rime
Varieties          [{'is_default': True, 'pokemon': {'name': 'mr-...
Generation                                                         8
Evolves_from                                                 Mr-mime
Has_gender_diff                                                False
Rarity                                                        Common
Name: 865, dtype: object
#                                                           866
Name                                                        Mr.
Type1                                                       Ice
Type2                                                   Psychic
Total                                                       520
HP                                                           80
Attack                                                       85
Defense                                     

Si nota come nei match dove lo score è 1 (quindi non coincide il nome o non coincide l'ID) sono da scartare i match dove coincide il nome ma non l'ID (la coincidenza del nome viene dal fatto che sono nomi simili, dove magari cambia solo una lettera come nel caso "SLAKING-SEAKING") mentre sono da tenere i match dove non c'è coincidenza nel nome ma c'è coincidenza nell'ID. Basta droppare queste righe:

In [None]:
potential_matches.drop(potential_matches[potential_matches.ID == 0].index, inplace=True)

In [None]:
potential_matches.head()

Unnamed: 0,level_0,level_1,Name,ID,Score
0,0,0,1.0,1,2.0
1,1,1,1.0,1,2.0
2,2,2,1.0,1,2.0
3,2,3,1.0,1,2.0
4,3,4,1.0,1,2.0


In [None]:
species.head()

Unnamed: 0,ID,Name,Varieties,Generation,Evolves_from,Has_gender_diff,Rarity
0,1,Bulbasaur,"[{'is_default': True, 'pokemon': {'name': 'bul...",1,,False,Common
1,2,Ivysaur,"[{'is_default': True, 'pokemon': {'name': 'ivy...",1,Bulbasaur,False,Common
2,3,Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",1,Ivysaur,True,Common
3,4,Charmander,"[{'is_default': True, 'pokemon': {'name': 'cha...",1,,False,Common
4,5,Charmeleon,"[{'is_default': True, 'pokemon': {'name': 'cha...",1,Charmander,False,Common


Procedo con il merging:

In [None]:
#Aggiungo le informazioni da species (la colonna riferita a species è level_0)
pokemon_complete = pd.merge(potential_matches, species, left_on="level_0", right_index=True)

In [None]:
#Aggiungo le informazioni da scraping (la colonna riferita a dex_scrap è level_1)
pokemon_complete = pd.merge(pokemon_complete, dex_scrap, left_on="level_1", right_index=True)

In [None]:
pokemon_complete.drop(["level_0", "level_1", "Name_x", "ID_x", "Score", "ID_y", "Name_y"], axis = 1, inplace=True)

In [None]:
pokemon_complete = pokemon_complete.reindex(columns=['#', 'Name', 'Generation', 'Rarity', 'Evolves_from', 'Has_gender_diff',
                                                    'Type1', 'Type2', 'Total', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def',
                                                    'Speed', 'Variant', 'Varieties', 'sprite_url'])
pokemon_complete.reset_index(inplace=True)

In [None]:
pokemon_complete

Unnamed: 0,index,#,Name,Generation,Rarity,Evolves_from,Has_gender_diff,Type1,Type2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Variant,Varieties,sprite_url
0,0,1,Bulbasaur,1,Common,,False,Grass,Poison,318,45,49,49,65,65,45,,"[{'is_default': True, 'pokemon': {'name': 'bul...",https://img.pokemondb.net/sprites/sword-shield...
1,1,2,Ivysaur,1,Common,Bulbasaur,False,Grass,Poison,405,60,62,63,80,80,60,,"[{'is_default': True, 'pokemon': {'name': 'ivy...",https://img.pokemondb.net/sprites/sword-shield...
2,2,3,Venusaur,1,Common,Ivysaur,True,Grass,Poison,525,80,82,83,100,100,80,,"[{'is_default': True, 'pokemon': {'name': 'ven...",https://img.pokemondb.net/sprites/sword-shield...
3,3,3,Venusaur,1,Common,Ivysaur,True,Grass,Poison,625,80,100,123,122,120,80,Mega Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",https://img.pokemondb.net/sprites/sword-shield...
4,4,4,Charmander,1,Common,,False,Fire,,309,39,52,43,60,50,65,,"[{'is_default': True, 'pokemon': {'name': 'cha...",https://img.pokemondb.net/sprites/sword-shield...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1061,1072,896,Glastrier,8,Legendary,,False,Ice,,580,100,145,130,65,110,30,,"[{'is_default': True, 'pokemon': {'name': 'gla...",https://img.pokemondb.net/sprites/sword-shield...
1062,1073,897,Spectrier,8,Legendary,,False,Ghost,,580,100,65,60,145,80,130,,"[{'is_default': True, 'pokemon': {'name': 'spe...",https://img.pokemondb.net/sprites/sword-shield...
1063,1074,898,Calyrex,8,Legendary,,False,Psychic,Grass,500,100,80,80,80,80,80,,"[{'is_default': True, 'pokemon': {'name': 'cal...",https://img.pokemondb.net/sprites/sword-shield...
1064,1075,898,Calyrex,8,Legendary,,False,Psychic,Ice,680,100,165,150,85,130,50,Ice Rider,"[{'is_default': True, 'pokemon': {'name': 'cal...",https://img.pokemondb.net/sprites/sword-shield...


In [None]:
#Salvataggio sia in .csv che in .json
pokemon_complete.to_json("Dataset integrati\pokemon_integrati.json.gz", orient='records', lines=True, compression='gzip')
pokemon_complete.to_csv("Dataset integrati\pokemon_integrati.csv")

# ITEMS

In [None]:
item_scrap = pd.read_json('Scraping_datasets/pokemondb/items.json')
item_scrap.head()

Unnamed: 0,Name,Category,Effect,sprite_url
0,Ability Capsule,Hold items,A capsule that allows a Pokémon with two Abili...,https://img.pokemondb.net/sprites/items/abilit...
1,Ability Patch,General items,Changes a Pokémon's ability to its Hidden Abil...,https://img.pokemondb.net/s.png
2,Ability Urge,Battle items,"When used, it activates the Ability of an ally...",https://img.pokemondb.net/sprites/items/abilit...
3,Abomasite,Hold items,Enables Abomasnow to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/abomas...
4,Absolite,Hold items,Enables Absol to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/absoli...


In [None]:
item_api = pd.read_json('API_per_integrazione/items.json.gz', compression='gzip', orient='records', lines=True)
item_api.head()

Unnamed: 0,ID,Name,Attributes,Category,Effect
0,1,Master-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Catches a wild Pokémon every time.
1,2,Ultra-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
2,3,Great-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon. Success rate i...
3,4,Poke-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon.
4,5,Safari-ball,"[countable, consumable, usable-in-battle, hold...",Standard-balls,Tries to catch a wild Pokémon in the Great Mar...


In [None]:
#Tolgo tutti i - e impongo che tutte le parole abbiano la prima lettera maisucola, in modo tale da facilitare il matching

#Dataset API
item_api['Name'] = item_api['Name'].str.replace('-',' ')
item_api['Name'] = item_api['Name'].str.title()

item_api['Category'] = item_api['Category'].str.replace('-',' ')
item_api['Category'] = item_api['Category'].str.title()

item_api.sort_values('Name', inplace=True)
item_api.reset_index(inplace=True)

item_api.drop(['index', 'ID'], axis=1, inplace=True)
item_api.head()

Unnamed: 0,Name,Attributes,Category,Effect
0,Ability Capsule,[],Vitamins,Switches a Pokémon between its two possible (n...
1,Ability Patch,[],Vitamins,
2,Ability Urge,[],Miracle Shooter,Forcibly activates a friendly Pokémon's ability.
3,Abomasite,[],Mega Stones,Held: Allows Abomasnow to Mega Evolve into Meg...
4,Abra Candy,[],Species Candies,


In [None]:
#Dataset scraping
item_scrap['Name'] = item_scrap['Name'].str.replace('-',' ')
item_scrap['Name'] = item_scrap['Name'].str.title()
item_scrap.head()

Unnamed: 0,Name,Category,Effect,sprite_url
0,Ability Capsule,Hold items,A capsule that allows a Pokémon with two Abili...,https://img.pokemondb.net/sprites/items/abilit...
1,Ability Patch,General items,Changes a Pokémon's ability to its Hidden Abil...,https://img.pokemondb.net/s.png
2,Ability Urge,Battle items,"When used, it activates the Ability of an ally...",https://img.pokemondb.net/sprites/items/abilit...
3,Abomasite,Hold items,Enables Abomasnow to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/abomas...
4,Absolite,Hold items,Enables Absol to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/absoli...


In [None]:
#Inizializzo l'indexer per l'integrazione e il matching
indexer = rl.Index()
indexer.full()



<Index>

In [None]:
candidates = indexer.index(item_api, item_scrap)
print(len(candidates))

1542720


In [None]:
#Voglio tenere tutti i match esatti fra gli strumenti, ma tengo anche quelli molto simili
compare = rl.Compare()
compare.string('Name', 'Name', threshold=0.9, label='Name')
compare.exact('Name', 'Name', label='Exact')

features = compare.compute(candidates, item_api, item_scrap)

In [None]:
features.sum(axis=1).value_counts().sort_index(ascending=False)

2.0        826
1.0         42
0.0    1541852
dtype: int64

Ci sono 826 match esatti, 42 match potenziali (stringhe di testo molto simili ma non esattamente identiche)

In [None]:
potential_matches = features[features.sum(axis=1) > 0].reset_index()
potential_matches['Score'] = potential_matches.loc[:, 'Name':'Exact'].sum(axis=1)
potential_matches.head()

Unnamed: 0,level_0,level_1,Name,Exact,Score
0,0,0,1.0,1,2.0
1,1,1,1.0,1,2.0
2,2,2,1.0,1,2.0
3,3,3,1.0,1,2.0
4,5,4,1.0,1,2.0


In [None]:
#Suddivido i match esatti e quelli simili
exact_matches = potential_matches[potential_matches['Score'] == 2.0]
temp = potential_matches[potential_matches['Score'] == 1.0]

### Tratto i match potenziali

In [None]:
#La soglia l'ho posta molto alta per non avere un numero eccessivo di match potenziali
item_similar = pd.merge(temp, item_api, left_on="level_0", right_index=True)
item_similar = pd.merge(item_similar, item_scrap, left_on="level_1", right_index=True)
item_similar.reset_index(inplace=True)
item_similar.head(50)

Unnamed: 0,index,level_0,level_1,Name_x,Exact,Score,Name_y,Attributes,Category_x,Effect_x,Name,Category_y,Effect_y,sprite_url
0,75,128,94,1.0,0,1.0,Charizardite X,[],Mega Stones,Held: Allows Charizard to Mega Evolve into Meg...,Charizardite Y,Hold items,Enables Charizard to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/chariz...
1,76,129,93,1.0,0,1.0,Charizardite Y,[],Mega Stones,Held: Allows Charizard to Mega Evolve into Meg...,Charizardite X,Hold items,Enables Charizard to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/chariz...
2,115,218,138,1.0,0,1.0,Dire Hit 2,[],Miracle Shooter,Raises critical hit rate by two stages in batt...,Dire Hit 3,Battle items,Greatly raises a Pokémon's critical-hit ratio ...,https://img.pokemondb.net/sprites/items/dire-h...
3,116,219,137,1.0,0,1.0,Dire Hit 3,[],Miracle Shooter,Raises critical hit rate by three stages in ba...,Dire Hit 2,Battle items,Raises a Pokémon's critical-hit ratio in battle.,https://img.pokemondb.net/sprites/items/dire-h...
4,153,582,179,1.0,0,1.0,Exp Candy L,[],Vitamins,,Exp. Candy L,Medicine,,https://img.pokemondb.net/s.png
5,154,583,180,1.0,0,1.0,Exp Candy M,[],Vitamins,,Exp. Candy M,Medicine,,https://img.pokemondb.net/s.png
6,155,584,181,1.0,0,1.0,Exp Candy S,[],Vitamins,,Exp. Candy S,Medicine,,https://img.pokemondb.net/s.png
7,156,585,182,1.0,0,1.0,Exp Candy Xl,[],Vitamins,,Exp. Candy Xl,Medicine,"Increases the Pokémon's Exp points by 30,000.",https://img.pokemondb.net/s.png
8,157,586,183,1.0,0,1.0,Exp Candy Xs,[],Vitamins,,Exp. Candy Xs,Medicine,,https://img.pokemondb.net/s.png
9,158,588,184,1.0,0,1.0,Exp Share,"[holdable, holdable-active]",Training,Held: Half the experience from a battle is spl...,Exp. Share,Hold items,An item to be held by a Pokémon. The holder ge...,https://img.pokemondb.net/sprites/items/exp-sh...


Guardando i nomi di questi match potenziali si vede che quelli che effettivamente coincidono sono quelli in cui da una parte abbiamo Exp, Spec, Sp, ... senza il . finale, mentre dall'altra abbiamo Exp., Spec., Sp., ... con il . finale

Vado a tenere questi e ad eliminare gli altri:

In [None]:
item_similar['Name_y'] = item_similar['Name_y'].str.replace('Exp','Exp.')
item_similar['Name_y'] = item_similar['Name_y'].str.replace('X Sp','X Sp.')
item_similar['Name_y'] = item_similar['Name_y'].str.replace('Spec','Spec.')

In [None]:
match = item_similar[item_similar['Name_y'] == item_similar['Name']]
match.head()

Unnamed: 0,index,level_0,level_1,Name_x,Exact,Score,Name_y,Attributes,Category_x,Effect_x,Name,Category_y,Effect_y,sprite_url
4,153,582,179,1.0,0,1.0,Exp. Candy L,[],Vitamins,,Exp. Candy L,Medicine,,https://img.pokemondb.net/s.png
5,154,583,180,1.0,0,1.0,Exp. Candy M,[],Vitamins,,Exp. Candy M,Medicine,,https://img.pokemondb.net/s.png
6,155,584,181,1.0,0,1.0,Exp. Candy S,[],Vitamins,,Exp. Candy S,Medicine,,https://img.pokemondb.net/s.png
7,156,585,182,1.0,0,1.0,Exp. Candy Xl,[],Vitamins,,Exp. Candy Xl,Medicine,"Increases the Pokémon's Exp points by 30,000.",https://img.pokemondb.net/s.png
8,157,586,183,1.0,0,1.0,Exp. Candy Xs,[],Vitamins,,Exp. Candy Xs,Medicine,,https://img.pokemondb.net/s.png


### Tratto i match esatti

In [None]:
#Aggiungo le informazioni da API (la colonna riferita a API è level_0)
item_complete = pd.merge(exact_matches, item_api, left_on="level_0", right_index=True)

#Aggiungo le informazioni da scraping (la colonna riferita a scraping è level_1)
item_complete = pd.merge(item_complete, item_scrap, left_on="level_1", right_index=True)

item_complete

Unnamed: 0,level_0,level_1,Name_x,Exact,Score,Name_y,Attributes,Category_x,Effect_x,Name,Category_y,Effect_y,sprite_url
0,0,0,1.0,1,2.0,Ability Capsule,[],Vitamins,Switches a Pokémon between its two possible (n...,Ability Capsule,Hold items,A capsule that allows a Pokémon with two Abili...,https://img.pokemondb.net/sprites/items/abilit...
1,1,1,1.0,1,2.0,Ability Patch,[],Vitamins,,Ability Patch,General items,Changes a Pokémon's ability to its Hidden Abil...,https://img.pokemondb.net/s.png
2,2,2,1.0,1,2.0,Ability Urge,[],Miracle Shooter,Forcibly activates a friendly Pokémon's ability.,Ability Urge,Battle items,"When used, it activates the Ability of an ally...",https://img.pokemondb.net/sprites/items/abilit...
3,3,3,1.0,1,2.0,Abomasite,[],Mega Stones,Held: Allows Abomasnow to Mega Evolve into Meg...,Abomasite,Hold items,Enables Abomasnow to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/abomas...
4,5,4,1.0,1,2.0,Absolite,[],Mega Stones,Held: Allows Absol to Mega Evolve into Mega Ab...,Absolite,Hold items,Enables Absol to Mega Evolve during battle.,https://img.pokemondb.net/sprites/items/absoli...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,1597,955,1.0,1,2.0,Yellow Scarf,[holdable-active],Scarves,Raises the holder's Toughness while in a contest.,Yellow Scarf,General items,Raises holder's Tough aspect in a Contest.,https://img.pokemondb.net/sprites/items/yellow...
864,1598,956,1.0,1,2.0,Yellow Shard,[underground],Collectibles,No effect. Can be traded for items or moves.,Yellow Shard,General items,A small yellow shard. It appears to be from so...,https://img.pokemondb.net/sprites/items/yellow...
865,1601,957,1.0,1,2.0,Zap Plate,"[holdable, holdable-active, underground]",Plates,Held: Electric-Type moves from holder do 20% m...,Zap Plate,Hold items,Increases power of Electric-type moves. Change...,https://img.pokemondb.net/sprites/items/zap-pl...
866,1603,958,1.0,1,2.0,Zinc,"[countable, consumable, usable-overworld, usab...",Vitamins,Raises Special Defense and happiness.,Zinc,Medicine,Increases Special Defense EVs by 10.,https://img.pokemondb.net/sprites/items/zinc.png


### Concatenazione match esatti e simili

In [None]:
item_total = pd.concat([item_complete, match])
item_total.sort_values('Name', inplace=True)
item_total.drop(["level_0", "level_1", "Name_x", "Exact", "Score", "Name_y", "index"], axis = 1, inplace=True)
item_total.tail()

Unnamed: 0,Attributes,Category_x,Effect_x,Name,Category_y,Effect_y,sprite_url
863,[holdable-active],Scarves,Raises the holder's Toughness while in a contest.,Yellow Scarf,General items,Raises holder's Tough aspect in a Contest.,https://img.pokemondb.net/sprites/items/yellow...
864,[underground],Collectibles,No effect. Can be traded for items or moves.,Yellow Shard,General items,A small yellow shard. It appears to be from so...,https://img.pokemondb.net/sprites/items/yellow...
865,"[holdable, holdable-active, underground]",Plates,Held: Electric-Type moves from holder do 20% m...,Zap Plate,Hold items,Increases power of Electric-type moves. Change...,https://img.pokemondb.net/sprites/items/zap-pl...
866,"[countable, consumable, usable-overworld, usab...",Vitamins,Raises Special Defense and happiness.,Zinc,Medicine,Increases Special Defense EVs by 10.,https://img.pokemondb.net/sprites/items/zinc.png
867,"[holdable, holdable-active]",Held Items,Held: Provides a 1/5 (20%) boost in accuracy i...,Zoom Lens,Hold items,Raises a move's accuracy if the holder moves a...,https://img.pokemondb.net/sprites/items/zoom-l...


In [None]:
item_total.reset_index(inplace=True)
item_total.drop(["index"], axis = 1, inplace=True)
item_total.tail()

Unnamed: 0,Attributes,Category_x,Effect_x,Name,Category_y,Effect_y,sprite_url
835,[holdable-active],Scarves,Raises the holder's Toughness while in a contest.,Yellow Scarf,General items,Raises holder's Tough aspect in a Contest.,https://img.pokemondb.net/sprites/items/yellow...
836,[underground],Collectibles,No effect. Can be traded for items or moves.,Yellow Shard,General items,A small yellow shard. It appears to be from so...,https://img.pokemondb.net/sprites/items/yellow...
837,"[holdable, holdable-active, underground]",Plates,Held: Electric-Type moves from holder do 20% m...,Zap Plate,Hold items,Increases power of Electric-type moves. Change...,https://img.pokemondb.net/sprites/items/zap-pl...
838,"[countable, consumable, usable-overworld, usab...",Vitamins,Raises Special Defense and happiness.,Zinc,Medicine,Increases Special Defense EVs by 10.,https://img.pokemondb.net/sprites/items/zinc.png
839,"[holdable, holdable-active]",Held Items,Held: Provides a 1/5 (20%) boost in accuracy i...,Zoom Lens,Hold items,Raises a move's accuracy if the holder moves a...,https://img.pokemondb.net/sprites/items/zoom-l...


In [None]:
item_total = item_total.reindex(columns=['Name', 'Effect_x', 'Effect_y', 'Attributes', 'Category_x', 'Category_y', 
                                         'sprite_url'])

item_total.rename(columns={"Name": "Name", "Effect_x": "Effect_API", "Effect_y": "Effect_SCRAPING", "Attributes": "Attributes",
                          "Category_x": "Category_API", "Category_y": "Category_SCRAPING", "sprite_url": "sprite_url"})

Unnamed: 0,Name,Effect_API,Effect_SCRAPING,Attributes,Category_API,Category_SCRAPING,sprite_url
0,Ability Capsule,Switches a Pokémon between its two possible (n...,A capsule that allows a Pokémon with two Abili...,[],Vitamins,Hold items,https://img.pokemondb.net/sprites/items/abilit...
1,Ability Patch,,Changes a Pokémon's ability to its Hidden Abil...,[],Vitamins,General items,https://img.pokemondb.net/s.png
2,Ability Urge,Forcibly activates a friendly Pokémon's ability.,"When used, it activates the Ability of an ally...",[],Miracle Shooter,Battle items,https://img.pokemondb.net/sprites/items/abilit...
3,Abomasite,Held: Allows Abomasnow to Mega Evolve into Meg...,Enables Abomasnow to Mega Evolve during battle.,[],Mega Stones,Hold items,https://img.pokemondb.net/sprites/items/abomas...
4,Absolite,Held: Allows Absol to Mega Evolve into Mega Ab...,Enables Absol to Mega Evolve during battle.,[],Mega Stones,Hold items,https://img.pokemondb.net/sprites/items/absoli...
...,...,...,...,...,...,...,...
835,Yellow Scarf,Raises the holder's Toughness while in a contest.,Raises holder's Tough aspect in a Contest.,[holdable-active],Scarves,General items,https://img.pokemondb.net/sprites/items/yellow...
836,Yellow Shard,No effect. Can be traded for items or moves.,A small yellow shard. It appears to be from so...,[underground],Collectibles,General items,https://img.pokemondb.net/sprites/items/yellow...
837,Zap Plate,Held: Electric-Type moves from holder do 20% m...,Increases power of Electric-type moves. Change...,"[holdable, holdable-active, underground]",Plates,Hold items,https://img.pokemondb.net/sprites/items/zap-pl...
838,Zinc,Raises Special Defense and happiness.,Increases Special Defense EVs by 10.,"[countable, consumable, usable-overworld, usab...",Vitamins,Medicine,https://img.pokemondb.net/sprites/items/zinc.png


In [None]:
#Salvataggio sia in .csv che in .json
item_total.to_json("Dataset integrati\item_integrati.json.gz", orient='records', lines=True, compression='gzip')
item_total.to_csv("Dataset integrati\item_integrati.csv")

# ABILITIES

In [None]:
ability_scrap = pd.read_json('Scraping_datasets/pokemondb/abilities.json')
ability_scrap.head()

Unnamed: 0,Name,Pokémon,Description,Gen.
0,Adaptability,16,Powers up moves of the same type.,4
1,Aerilate,2,Turns Normal-type moves into Flying-type moves.,6
2,Aftermath,10,Damages the attacker landing the finishing hit.,4
3,Air Lock,1,Eliminates the effects of weather.,3
4,Analytic,12,Boosts move power when the Pokémon moves last.,5


In [None]:
#Avevamo detto di togliere attributo Pokemon
ability_scrap.drop('Pokémon', axis=1, inplace=True)
ability_scrap.head()

Unnamed: 0,Name,Description,Gen.
0,Adaptability,Powers up moves of the same type.,4
1,Aerilate,Turns Normal-type moves into Flying-type moves.,6
2,Aftermath,Damages the attacker landing the finishing hit.,4
3,Air Lock,Eliminates the effects of weather.,3
4,Analytic,Boosts move power when the Pokémon moves last.,5


In [None]:
ability_api = pd.read_json('API_per_integrazione/Abilities.json.gz', compression='gzip', orient='records', lines=True)
ability_api.head()

Unnamed: 0,ID,Name,Generation,Is_Main_Series,Effect_changes,Effect_entries,Pokemon
0,1,Stench,3,True,[{'effect_entries': [{'effect': 'Hat im Kampf ...,This Pokémon's damaging moves have a 10% chanc...,"[{'is_hidden': True, 'pokemon': {'name': 'gloo..."
1,2,Drizzle,3,True,[],The weather changes to rain when this Pokémon ...,"[{'is_hidden': True, 'pokemon': {'name': 'poli..."
2,3,Speed-boost,3,True,[],This Pokémon's Speed rises one stage after eac...,"[{'is_hidden': False, 'pokemon': {'name': 'yan..."
3,4,Battle-armor,3,True,[],Moves cannot score critical hits against this ...,"[{'is_hidden': True, 'pokemon': {'name': 'cubo..."
4,5,Sturdy,3,True,[{'effect_entries': [{'effect': 'Verhindert ke...,"When this Pokémon is at full HP, any hit that ...","[{'is_hidden': False, 'pokemon': {'name': 'geo..."


In [None]:
#Avevamo detto di togliere attributi Is_Main_Series, Effect_changes e Effect_entries
ability_api.drop(['Is_Main_Series', 'Effect_changes', 'Effect_entries'], axis=1, inplace=True)
ability_api.head()

Unnamed: 0,ID,Name,Generation,Pokemon
0,1,Stench,3,"[{'is_hidden': True, 'pokemon': {'name': 'gloo..."
1,2,Drizzle,3,"[{'is_hidden': True, 'pokemon': {'name': 'poli..."
2,3,Speed-boost,3,"[{'is_hidden': False, 'pokemon': {'name': 'yan..."
3,4,Battle-armor,3,"[{'is_hidden': True, 'pokemon': {'name': 'cubo..."
4,5,Sturdy,3,"[{'is_hidden': False, 'pokemon': {'name': 'geo..."


In [None]:
#Come per gli strumenti, tolgo i trattini e metto la maiuscola a tutte le parole per evitare problemi da quel punto di vista

#API
ability_api['Name'] = ability_api['Name'].str.replace('-',' ')
ability_api['Name'] = ability_api['Name'].str.title()

ability_api.sort_values('Name', inplace=True)
ability_api.reset_index(inplace=True)

In [None]:
ability_api.drop(['index', 'ID'], axis=1, inplace=True)

In [None]:
#SCRAPING
ability_scrap['Name'] = ability_scrap['Name'].str.replace('-',' ')
ability_scrap['Name'] = ability_scrap['Name'].str.title()

In [None]:
#Inizializzo l'indexer per l'integrazione e il matching
indexer = rl.Index()
indexer.full()



<Index>

In [None]:
candidates = indexer.index(ability_api, ability_scrap)
print(len(candidates))

71022


In [None]:
#Voglio tenere tutti i match esatti fra gli strumenti, ma tengo anche quelli molto simili
compare = rl.Compare()
compare.exact('Name', 'Name', label='Exact')

features = compare.compute(candidates, ability_api, ability_scrap)

In [None]:
features.sum(axis=1).value_counts().sort_index(ascending=False)

1      264
0    70758
dtype: int64

In [None]:
potential_matches = features[features.sum(axis=1) > 0].reset_index()
potential_matches.head()

Unnamed: 0,level_0,level_1,Exact
0,0,0,1
1,1,1,1
2,2,2,1
3,3,3,1
4,4,4,1


Si sono perse 3/4 abilità

In [None]:
#Aggiungo le informazioni da API (la colonna riferita a API è level_0)
abilities_complete = pd.merge(potential_matches, ability_api, left_on="level_0", right_index=True)

#Aggiungo le informazioni da scraping (la colonna riferita a scraping è level_1)
abilities_complete = pd.merge(abilities_complete, ability_scrap, left_on="level_1", right_index=True)

abilities_complete

Unnamed: 0,level_0,level_1,Exact,Name_x,Generation,Pokemon,Name_y,Description,Gen.
0,0,0,1,Adaptability,4,"[{'is_hidden': False, 'pokemon': {'name': 'eev...",Adaptability,Powers up moves of the same type.,4
1,1,1,1,Aerilate,6,"[{'is_hidden': False, 'pokemon': {'name': 'pin...",Aerilate,Turns Normal-type moves into Flying-type moves.,6
2,2,2,1,Aftermath,4,"[{'is_hidden': True, 'pokemon': {'name': 'volt...",Aftermath,Damages the attacker landing the finishing hit.,4
3,3,3,1,Air Lock,3,"[{'is_hidden': False, 'pokemon': {'name': 'ray...",Air Lock,Eliminates the effects of weather.,3
4,4,4,1,Analytic,5,"[{'is_hidden': True, 'pokemon': {'name': 'magn...",Analytic,Boosts move power when the Pokémon moves last.,5
...,...,...,...,...,...,...,...,...,...
259,262,261,1,White Smoke,3,"[{'is_hidden': False, 'pokemon': {'name': 'tor...",White Smoke,Prevents other Pokémon from lowering its stats.,3
260,263,262,1,Wimp Out,7,"[{'is_hidden': False, 'pokemon': {'name': 'wim...",Wimp Out,Switches out when HP drops below half.,7
261,264,263,1,Wonder Guard,3,"[{'is_hidden': False, 'pokemon': {'name': 'she...",Wonder Guard,Only supereffective moves will hit.,3
262,265,264,1,Wonder Skin,5,"[{'is_hidden': True, 'pokemon': {'name': 'veno...",Wonder Skin,Makes status-changing moves more likely to miss.,5


In [None]:
abilities_complete.drop(["level_0", "level_1", "Exact", "Name_x", "Generation"], axis = 1, inplace=True)
abilities_complete.tail()

Unnamed: 0,Pokemon,Name_y,Description,Gen.
259,"[{'is_hidden': False, 'pokemon': {'name': 'tor...",White Smoke,Prevents other Pokémon from lowering its stats.,3
260,"[{'is_hidden': False, 'pokemon': {'name': 'wim...",Wimp Out,Switches out when HP drops below half.,7
261,"[{'is_hidden': False, 'pokemon': {'name': 'she...",Wonder Guard,Only supereffective moves will hit.,3
262,"[{'is_hidden': True, 'pokemon': {'name': 'veno...",Wonder Skin,Makes status-changing moves more likely to miss.,5
263,"[{'is_hidden': True, 'pokemon': {'name': 'darm...",Zen Mode,Changes form when HP drops below half.,5


In [None]:
abilities_complete = abilities_complete.reindex(columns=['Name_y', 'Gen.', 'Description', 'Pokemon'])
abilities_complete.rename(columns={"Name_y": "Name", "Gen.": "Generation", "Description": "Description", "Pokemon": "Pokemon"}, 
                         inplace=True)

In [None]:
abilities_complete

Unnamed: 0,Name,Generation,Description,Pokemon
0,Adaptability,4,Powers up moves of the same type.,"[{'is_hidden': False, 'pokemon': {'name': 'eev..."
1,Aerilate,6,Turns Normal-type moves into Flying-type moves.,"[{'is_hidden': False, 'pokemon': {'name': 'pin..."
2,Aftermath,4,Damages the attacker landing the finishing hit.,"[{'is_hidden': True, 'pokemon': {'name': 'volt..."
3,Air Lock,3,Eliminates the effects of weather.,"[{'is_hidden': False, 'pokemon': {'name': 'ray..."
4,Analytic,5,Boosts move power when the Pokémon moves last.,"[{'is_hidden': True, 'pokemon': {'name': 'magn..."
...,...,...,...,...
259,White Smoke,3,Prevents other Pokémon from lowering its stats.,"[{'is_hidden': False, 'pokemon': {'name': 'tor..."
260,Wimp Out,7,Switches out when HP drops below half.,"[{'is_hidden': False, 'pokemon': {'name': 'wim..."
261,Wonder Guard,3,Only supereffective moves will hit.,"[{'is_hidden': False, 'pokemon': {'name': 'she..."
262,Wonder Skin,5,Makes status-changing moves more likely to miss.,"[{'is_hidden': True, 'pokemon': {'name': 'veno..."


SALVATAGGIO ABILITA' SOTTO DOPO CONTROLLO RIGHE NON MATCHATE

# MOSSE

In [None]:
moves_scrap = pd.read_json('Scraping_datasets/pokemondb/moves.json')
moves_scrap.head()

Unnamed: 0,Name,Type,Power,Acc.,PP,Effect,Prob. (%),Cat.
0,"10,000,000 Volt Thunderbolt",Electric,195,—,1,Pikachu-exclusive Z-Move. High critical hit ra...,—,
1,Absorb,Grass,20,100,25,User recovers half the HP inflicted on opponent.,—,
2,Accelerock,Rock,40,100,20,User attacks first.,—,
3,Acid,Poison,40,100,30,May lower opponent's Special Defense.,10,
4,Acid Armor,Poison,—,—,20,Sharply raises user's Defense.,—,


In [None]:
moves_api = pd.read_json('API_per_integrazione/moves.json.gz', compression='gzip', orient='records', lines=True)
moves_api.head()

Unnamed: 0,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
0,1,Pound,Normal,40.0,100.0,35.0,Physical,1,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
1,2,Karate-chop,Fighting,50.0,100.0,25.0,Physical,1,"[{'name': 'mankey', 'url': 'https://pokeapi.co..."
2,3,Double-slap,Normal,15.0,85.0,10.0,Physical,1,"[{'name': 'clefairy', 'url': 'https://pokeapi...."
3,4,Comet-punch,Normal,18.0,85.0,15.0,Physical,1,"[{'name': 'hitmonchan', 'url': 'https://pokeap..."
4,5,Mega-punch,Normal,80.0,85.0,20.0,Physical,1,"[{'name': 'charmander', 'url': 'https://pokeap..."


In [None]:
#Come per gli strumenti, tolgo i trattini e metto la maiuscola a tutte le parole per evitare problemi da quel punto di vista

#API
moves_api['Name'] = moves_api['Name'].str.replace('-',' ')
moves_api['Name'] = moves_api['Name'].str.title()

moves_api.sort_values('Name', inplace=True)
moves_api.reset_index(inplace=True)

In [None]:
#SCRAPING
moves_scrap['Name'] = moves_scrap['Name'].str.replace('-',' ')
moves_scrap['Name'] = moves_scrap['Name'].str.title()

In [None]:
#Inizializzo l'indexer per l'integrazione e il matching
indexer = rl.Index()
indexer.full()



<Index>

In [None]:
candidates = indexer.index(moves_api, moves_scrap)
print(len(candidates))

730060


In [None]:
compare = rl.Compare()
compare.exact('Name', 'Name', label='Name')

features = compare.compute(candidates, moves_api, moves_scrap)

In [None]:
features.sum(axis=1).value_counts().sort_index(ascending=False)

1       783
0    729277
dtype: int64

In [None]:
potential_matches = features[features.sum(axis=1) > 0].reset_index()
potential_matches

Unnamed: 0,level_0,level_1,Name
0,1,1,1
1,2,2,1
2,3,3,1
3,4,4,1
4,7,6,1
...,...,...,...
778,839,860,1
779,840,861,1
780,841,862,1
781,842,863,1


In [None]:
#Aggiungo le informazioni da API (la colonna riferita a API è level_0)
moves_complete = pd.merge(potential_matches, moves_api, left_on="level_0", right_index=True)

#Aggiungo le informazioni da scraping (la colonna riferita a scraping è level_1)
moves_complete = pd.merge(moves_complete, moves_scrap, left_on="level_1", right_index=True)

moves_complete.head()

Unnamed: 0,level_0,level_1,Name_x,index,ID,Name_y,Type_x,Power_x,Accuracy,PP_x,...,Introducted_in,Learned_by,Name,Type_y,Power_y,Acc.,PP_y,Effect,Prob. (%),Cat.
0,1,1,1,70,71,Absorb,Grass,20.0,100.0,25.0,...,1,"[{'name': 'zubat', 'url': 'https://pokeapi.co/...",Absorb,Grass,20,100,25,User recovers half the HP inflicted on opponent.,—,
1,2,2,1,708,709,Accelerock,Rock,40.0,100.0,20.0,...,7,"[{'name': 'lycanroc-midday', 'url': 'https://p...",Accelerock,Rock,40,100,20,User attacks first.,—,
2,3,3,1,50,51,Acid,Poison,40.0,100.0,30.0,...,1,"[{'name': 'ekans', 'url': 'https://pokeapi.co/...",Acid,Poison,40,100,30,May lower opponent's Special Defense.,10,
3,4,4,1,150,151,Acid Armor,Poison,,,20.0,...,1,"[{'name': 'tentacool', 'url': 'https://pokeapi...",Acid Armor,Poison,—,—,20,Sharply raises user's Defense.,—,
4,7,6,1,490,491,Acid Spray,Poison,40.0,100.0,20.0,...,5,"[{'name': 'ekans', 'url': 'https://pokeapi.co/...",Acid Spray,Poison,40,100,20,Sharply lowers opponent's Special Defense.,100,


In [None]:
moves_complete.drop(["level_0", "level_1", "Name_x", "index", "ID", "Name_y", "Cat."], axis = 1, inplace=True)
moves_complete.tail()

Unnamed: 0,Type_x,Power_x,Accuracy,PP_x,Damage_class,Introducted_in,Learned_by,Name,Type_y,Power_y,Acc.,PP_y,Effect,Prob. (%)
778,Normal,,,10.0,Status,3,"[{'name': 'squirtle', 'url': 'https://pokeapi....",Yawn,Normal,—,—,10,Puts opponent to sleep in the next turn.,—
779,Electric,120.0,50.0,5.0,Special,2,"[{'name': 'pikachu', 'url': 'https://pokeapi.c...",Zap Cannon,Electric,120,50,5,Paralyzes opponent.,100
780,Psychic,80.0,90.0,15.0,Physical,4,"[{'name': 'squirtle', 'url': 'https://pokeapi....",Zen Headbutt,Psychic,80,90,15,May cause flinching.,20
781,Electric,80.0,100.0,10.0,Physical,7,"[{'name': 'togedemaru', 'url': 'https://pokeap...",Zing Zap,Electric,80,100,10,May cause flinching.,30
782,Electric,80.0,100.0,10.0,Physical,7,"[{'name': 'pikachu-starter', 'url': 'https://p...",Zippy Zap,Electric,50,100,15,Always results in a critical hit.,100


In [None]:
moves_complete = moves_complete.reindex(columns=['Name', 'Introducted_in', 'Type_x', 'Type_y', 'Power_x', 'Power_y',
                                                'Accuracy', 'Acc.', 'PP_x', 'PP_y', 'Damage_class', 'Effect', 'Prob. (%)', 
                                                'Learned_by'])
moves_complete.rename(columns={"Name": "Name", "Introducted_in": "Generation", "Type_x": "Type_API", "Type_y": "Type_SCRAP",
                               "Power_x": "Power_API", "Power_y": "Power_SCRAP", "Accuracy": "Accuracy_API", 
                               "Acc.": "Accuracy_SCRAP", "PP_x": "PP_API", "PP_y": "PP_SCRAP", "Damage_class": "Damage_class", 
                                "Effect": "Effect", "Prob. (%)": "Prob. (%)", "Learned_by": "Learned_by"})

Unnamed: 0,Name,Generation,Type_API,Type_SCRAP,Power_API,Power_SCRAP,Accuracy_API,Accuracy_SCRAP,PP_API,PP_SCRAP,Damage_class,Effect,Prob. (%),Learned_by
0,Absorb,1,Grass,Grass,20.0,20,100.0,100,25.0,25,Special,User recovers half the HP inflicted on opponent.,—,"[{'name': 'zubat', 'url': 'https://pokeapi.co/..."
1,Accelerock,7,Rock,Rock,40.0,40,100.0,100,20.0,20,Physical,User attacks first.,—,"[{'name': 'lycanroc-midday', 'url': 'https://p..."
2,Acid,1,Poison,Poison,40.0,40,100.0,100,30.0,30,Special,May lower opponent's Special Defense.,10,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."
3,Acid Armor,1,Poison,Poison,,—,,—,20.0,20,Status,Sharply raises user's Defense.,—,"[{'name': 'tentacool', 'url': 'https://pokeapi..."
4,Acid Spray,5,Poison,Poison,40.0,40,100.0,100,20.0,20,Special,Sharply lowers opponent's Special Defense.,100,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
778,Yawn,3,Normal,Normal,,—,,—,10.0,10,Status,Puts opponent to sleep in the next turn.,—,"[{'name': 'squirtle', 'url': 'https://pokeapi...."
779,Zap Cannon,2,Electric,Electric,120.0,120,50.0,50,5.0,5,Special,Paralyzes opponent.,100,"[{'name': 'pikachu', 'url': 'https://pokeapi.c..."
780,Zen Headbutt,4,Psychic,Psychic,80.0,80,90.0,90,15.0,15,Physical,May cause flinching.,20,"[{'name': 'squirtle', 'url': 'https://pokeapi...."
781,Zing Zap,7,Electric,Electric,80.0,80,100.0,100,10.0,10,Physical,May cause flinching.,30,"[{'name': 'togedemaru', 'url': 'https://pokeap..."


SALVATAGGIO MOSSE SOTTO DOPO CONTROLLO RIGHE NON MATCHATE

# CONTROLLO NON MATCH

## Dataset Item, controllo intersezione nomi:

In [None]:
intersection_arr_scrap = np.intersect1d(item_total['Name'], item_scrap['Name'])

In [None]:
item_scrap_unmatch = item_scrap.loc[~item_scrap['Name'].isin(intersection_arr_scrap),:]

In [None]:
item_scrap_unmatch.head()

Unnamed: 0,Name,Category,Effect,sprite_url
6,Adamant Crystal,Hold items,,https://img.pokemondb.net/s.png
15,Aloraichium Z,Hold items,Allows Alolan Raichu to upgrade Thunderbolt to...,https://img.pokemondb.net/sprites/items/alorai...
21,Apricorn,General items,A fruit whose shell can be used to form the ca...,https://img.pokemondb.net/s.png
27,Aux Evasion,Battle items,Makes moves less likely to strike a Pokémon du...,https://img.pokemondb.net/s.png
28,Aux Guard,Battle items,Sharply boosts the defensive stats of a Pokémo...,https://img.pokemondb.net/s.png


Gli strumenti non matchati sono strumenti introdotti in Leggende Pokemon Arceus o Cristalli Z (comunque non utilizzabili nel competitivo attuale).

In [None]:
intersection_arr_api = np.intersect1d(item_total['Name'], item_api['Name'])
item_api_unmatch = item_api.loc[~item_api['Name'].isin(intersection_arr_api),:]

In [None]:
item_api_unmatch

Unnamed: 0,Name,Attributes,Category,Effect
4,Abra Candy,[],Species Candies,
7,Acro Bike,[],Gameplay,"More maneuverable than the Mach Bike, and allo..."
11,Adventure Rules,[],Gameplay,Contains basic gameplay information.
12,Aerodactyl Candy,[],Species Candies,
17,Air Mail,[],All Mail,Lets a Trainer write a message and send it via...
...,...,...,...,...
1599,Z Power Ring,[],Unused,XXX new effect for z-power-ring
1600,Z Ring,[],Gameplay,Allows the player's Pokémon to use Z-moves.
1602,Zapdos Candy,[],Species Candies,
1605,Zubat Candy,[],Species Candies,


Per quanto riguarda gli strumenti non matchati dall'API sono tutti strumenti di trama.

## Dataset Abilities, controllo intersezione nomi:

In [None]:
intersection_abi_scrap = np.intersect1d(abilities_complete['Name'], ability_scrap['Name'])
abil_scrap_unmatch = ability_scrap.loc[~ability_scrap['Name'].isin(intersection_abi_scrap),:]

In [None]:
abil_scrap_unmatch

Unnamed: 0,Name,Description,Gen.
9,As One,Combines Unnerve and Chilling Neigh/Grim Neigh,8
47,Dragon'S Maw,Signature ability of Regidrago. Powers up Drag...,8


In [None]:
intersection_abi_api = np.intersect1d(abilities_complete['Name'], ability_api['Name'])
abil_api_unmatch = ability_api.loc[~ability_api['Name'].isin(intersection_abi_api),:]

In [None]:
abil_api_unmatch

Unnamed: 0,Name,Generation,Pokemon
9,As One Glastrier,8,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
10,As One Spectrier,8,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
48,Dragons Maw,8,"[{'is_hidden': False, 'pokemon': {'name': 'reg..."


Vado ad aggiungere le descrizioni a quello delle API, che è già a posto così:

In [None]:
abil_api_unmatch.loc[9, 'Description'] = 'Combines Unnerve and Chilling Neigh.'
abil_api_unmatch.loc[10, 'Description'] = 'Combines Unnerve and Grim Neigh.'
abil_api_unmatch.loc[48, 'Description'] = 'Signature ability of Regidrago. Powers up Dragon-type moves.'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
abil_api_unmatch = abil_api_unmatch.reindex(columns=['Name', 'Generation', 'Description', 'Pokemon'])

In [None]:
abil_api_unmatch

Unnamed: 0,Name,Generation,Description,Pokemon
9,As One Glastrier,8,Combines Unnerve and Chilling Neigh.,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
10,As One Spectrier,8,Combines Unnerve and Grim Neigh.,"[{'is_hidden': False, 'pokemon': {'name': 'cal..."
48,Dragons Maw,8,Signature ability of Regidrago. Powers up Drag...,"[{'is_hidden': False, 'pokemon': {'name': 'reg..."


In [None]:
abilities_total = pd.concat([abilities_complete, abil_api_unmatch])
abilities_total.sort_values('Name', inplace=True)
abilities_total.reset_index(inplace=True)

In [None]:
#Salvataggio sia in .csv che in .json
abilities_total.to_json("Dataset integrati\Abilities_integrati.json.gz", orient='records', lines=True, compression='gzip')
abilities_total.to_csv("Dataset integrati\Abilities_integrati.csv")

## Dataset Moves, controllo intersezione nomi:

In [None]:
intersection_moves_scrap = np.intersect1d(moves_complete['Name'], moves_scrap['Name'])
moves_scrap_unmatch = moves_scrap.loc[~moves_scrap['Name'].isin(intersection_moves_scrap),:]

In [None]:
moves_scrap_unmatch.tail()

Unnamed: 0,Name,Type,Power,Acc.,PP,Effect,Prob. (%),Cat.
818,Twinkle Tackle,Fairy,—,—,1,Fairy type Z-Move.,—,—
827,Victory Dance,,—,∞,10,,—,
829,Vise Grip,Normal,55,100,30,,—,
841,Wave Crash,Water,75,100,10,,—,
848,Wildbolt Storm,Electric,95,80,5,,—,


In [None]:
intersection_moves_api = np.intersect1d(moves_complete['Name'], moves_api['Name'])
moves_api_unmatch = moves_api.loc[~moves_api['Name'].isin(intersection_moves_scrap),:]

In [None]:
moves_api_unmatch.head()

Unnamed: 0,index,ID,Name,Type,Power,Accuracy,PP,Damage_class,Introducted_in,Learned_by
0,718,719,10 000 000 Volt Thunderbolt,Electric,195.0,,1.0,Special,7,[]
5,627,628,Acid Downpour Physical,Poison,,,1.0,Physical,7,[]
6,628,629,Acid Downpour Special,Poison,,,1.0,Special,7,[]
16,623,624,All Out Pummeling Physical,Fighting,,,1.0,Physical,7,[]
17,624,625,All Out Pummeling Special,Fighting,,,1.0,Special,7,[]


In [None]:
#Inizializzo l'indexer per l'integrazione e il matching
indexer = rl.Index()
indexer.full()



<Index>

In [None]:
candidates = indexer.index(moves_api_unmatch, moves_scrap_unmatch)
print(len(candidates))

5002


Valuto un possibile matching con una soglia più bassa fra tutte le righe scartate:

In [None]:
compare = rl.Compare()
compare.string('Name', 'Name', threshold=0.8, label='Name')

features = compare.compute(candidates, moves_api_unmatch, moves_scrap_unmatch)

In [None]:
features.sum(axis=1).value_counts().sort_index(ascending=False)

1.0       7
0.0    4995
dtype: int64

In [None]:
potential_matches = features[features.sum(axis=1) > 0].reset_index()
potential_matches

Unnamed: 0,level_0,level_1,Name
0,0,0,1.0
1,261,260,1.0
2,375,406,1.0
3,377,408,1.0
4,389,420,1.0
5,478,512,1.0
6,809,829,1.0


In [None]:
#Aggiungo le informazioni da API (la colonna riferita a API è level_0)
moves_unmatch = pd.merge(potential_matches, moves_api_unmatch, left_on="level_0", right_index=True)

#Aggiungo le informazioni da scraping (la colonna riferita a scraping è level_1)
moves_unmatch = pd.merge(moves_unmatch, moves_scrap_unmatch, left_on="level_1", right_index=True)

moves_unmatch

Unnamed: 0,level_0,level_1,Name_x,index,ID,Name_y,Type_x,Power_x,Accuracy,PP_x,...,Introducted_in,Learned_by,Name,Type_y,Power_y,Acc.,PP_y,Effect,Prob. (%),Cat.
0,0,0,1.0,718,719,10 000 000 Volt Thunderbolt,Electric,195.0,,1.0,...,7,[],"10,000,000 Volt Thunderbolt",Electric,195,—,1,Pikachu-exclusive Z-Move. High critical hit ra...,—,
1,261,260,1.0,570,571,Forests Curse,Grass,,100.0,20.0,...,6,"[{'name': 'phantump', 'url': 'https://pokeapi....",Forest'S Curse,Grass,—,100,20,Adds Grass type to opponent.,—,
2,375,406,1.0,587,588,Kings Shield,Steel,,,10.0,...,6,"[{'name': 'aegislash-shield', 'url': 'https://...",King'S Shield,Steel,—,—,10,Protects the user and lowers opponent's Attack...,—,
3,377,408,1.0,615,616,Lands Wrath,Ground,90.0,100.0,10.0,...,6,"[{'name': 'zygarde-50', 'url': 'https://pokeap...",Land'S Wrath,Ground,90,100,10,,—,
4,389,420,1.0,725,726,Lets Snuggle Forever,Fairy,190.0,,1.0,...,7,[],Let'S Snuggle Forever,Fairy,190,—,1,Mimikyu-exclusive Z-Move.,—,
5,478,512,1.0,716,717,Natures Madness,Fairy,,90.0,10.0,...,7,"[{'name': 'tapu-koko', 'url': 'https://pokeapi...",Nature'S Madness,Fairy,—,90,10,Halves the foe's HP.,—,
6,809,829,1.0,10,11,Vice Grip,Normal,55.0,100.0,30.0,...,1,"[{'name': 'krabby', 'url': 'https://pokeapi.co...",Vise Grip,Normal,55,100,30,,—,


In [None]:
moves_unmatch.drop(["level_0", "level_1", "Name_x", "index", "ID", "Name_y", "Cat."], axis=1, inplace=True)

In [None]:
moves_unmatch.drop([0,4], axis=0, inplace=True)

In [None]:
moves_unmatch = moves_unmatch.reindex(columns=['Name', 'Introducted_in', 'Type_x', 'Type_y', 'Power_x', 'Power_y',
                                                'Accuracy', 'Acc.', 'PP_x', 'PP_y', 'Damage_class', 'Effect', 'Prob. (%)', 
                                                'Learned_by'])
moves_unmatch.rename(columns={"Name": "Name", "Introducted_in": "Generation", "Type_x": "Type_API", "Type_y": "Type_SCRAP",
                               "Power_x": "Power_API", "Power_y": "Power_SCRAP", "Accuracy": "Accuracy_API", 
                               "Acc.": "Accuracy_SCRAP", "PP_x": "PP_API", "PP_y": "PP_SCRAP", "Damage_class": "Damage_class", 
                                "Effect": "Effect", "Prob. (%)": "Prob. (%)", "Learned_by": "Learned_by"})

Unnamed: 0,Name,Generation,Type_API,Type_SCRAP,Power_API,Power_SCRAP,Accuracy_API,Accuracy_SCRAP,PP_API,PP_SCRAP,Damage_class,Effect,Prob. (%),Learned_by
1,Forest'S Curse,6,Grass,Grass,,—,100.0,100,20.0,20,Status,Adds Grass type to opponent.,—,"[{'name': 'phantump', 'url': 'https://pokeapi...."
2,King'S Shield,6,Steel,Steel,,—,,—,10.0,10,Status,Protects the user and lowers opponent's Attack...,—,"[{'name': 'aegislash-shield', 'url': 'https://..."
3,Land'S Wrath,6,Ground,Ground,90.0,90,100.0,100,10.0,10,Physical,,—,"[{'name': 'zygarde-50', 'url': 'https://pokeap..."
5,Nature'S Madness,7,Fairy,Fairy,,—,90.0,90,10.0,10,Special,Halves the foe's HP.,—,"[{'name': 'tapu-koko', 'url': 'https://pokeapi..."
6,Vise Grip,1,Normal,Normal,55.0,55,100.0,100,30.0,30,Physical,,—,"[{'name': 'krabby', 'url': 'https://pokeapi.co..."


In [None]:
moves_total = pd.concat([moves_complete, moves_unmatch])
moves_total.sort_values('Name', inplace=True)
moves_total.reset_index(inplace=True)

In [None]:
moves_total

Unnamed: 0,index,Name,Introducted_in,Type_x,Type_y,Power_x,Power_y,Accuracy,Acc.,PP_x,PP_y,Damage_class,Effect,Prob. (%),Learned_by
0,0,Absorb,1,Grass,Grass,20.0,20,100.0,100,25.0,25,Special,User recovers half the HP inflicted on opponent.,—,"[{'name': 'zubat', 'url': 'https://pokeapi.co/..."
1,1,Accelerock,7,Rock,Rock,40.0,40,100.0,100,20.0,20,Physical,User attacks first.,—,"[{'name': 'lycanroc-midday', 'url': 'https://p..."
2,2,Acid,1,Poison,Poison,40.0,40,100.0,100,30.0,30,Special,May lower opponent's Special Defense.,10,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."
3,3,Acid Armor,1,Poison,Poison,,—,,—,20.0,20,Status,Sharply raises user's Defense.,—,"[{'name': 'tentacool', 'url': 'https://pokeapi..."
4,4,Acid Spray,5,Poison,Poison,40.0,40,100.0,100,20.0,20,Special,Sharply lowers opponent's Special Defense.,100,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,778,Yawn,3,Normal,Normal,,—,,—,10.0,10,Status,Puts opponent to sleep in the next turn.,—,"[{'name': 'squirtle', 'url': 'https://pokeapi...."
784,779,Zap Cannon,2,Electric,Electric,120.0,120,50.0,50,5.0,5,Special,Paralyzes opponent.,100,"[{'name': 'pikachu', 'url': 'https://pokeapi.c..."
785,780,Zen Headbutt,4,Psychic,Psychic,80.0,80,90.0,90,15.0,15,Physical,May cause flinching.,20,"[{'name': 'squirtle', 'url': 'https://pokeapi...."
786,781,Zing Zap,7,Electric,Electric,80.0,80,100.0,100,10.0,10,Physical,May cause flinching.,30,"[{'name': 'togedemaru', 'url': 'https://pokeap..."


In [None]:
#Estraggo le mosse G-Max dal dataset dello scraping e le salvo a parte
gmax_moves = moves_scrap_unmatch[moves_scrap_unmatch['Name'].str.contains('G Max')]
gmax_moves

Unnamed: 0,Name,Type,Power,Acc.,PP,Effect,Prob. (%),Cat.
275,G Max Befuddle,Bug,—,∞,5,"Butterfree-exclusive G-Max Move. Poisons, para...",100,—
276,G Max Cannonade,Water,—,∞,10,Blastoise-exclusive G-Max Move. Damages non-Wa...,—,—
277,G Max Centiferno,Fire,—,∞,5,Centiskorch-exclusive G-Max Move. Traps oppone...,100,—
278,G Max Chi Strike,Fighting,—,∞,5,Machamp-exclusive G-Max Move. Increases critic...,—,—
279,G Max Cuddle,Normal,—,∞,5,Eevee-exclusive G-Max Move. Infatuates opponents.,100,—
280,G Max Depletion,Dragon,—,∞,5,Duraludon-exclusive G-Max Move. Reduces oppone...,—,—
281,G Max Drum Solo,Grass,—,∞,5,Rillaboom-exclusive G-Max Move. Ignores target...,—,—
282,G Max Finale,Fairy,—,∞,5,Alcremie-exclusive G-Max Move. Heals the user'...,—,—
283,G Max Fireball,Fire,—,∞,5,Cinderace-exclusive G-Max Move. Ignores target...,—,—
284,G Max Foam Burst,Water,—,∞,5,Kingler-exclusive G-Max Move. Harshly lowers o...,100,—


In [None]:
#Salvataggio sia in .csv che in .json
moves_total.to_json("Dataset integrati\moves_integrati.json.gz", orient='records', lines=True, compression='gzip')
moves_total.to_csv("Dataset integrati\moves_integrati.csv")

In [None]:
#Salvataggio sia in .csv che in .json
gmax_moves.to_json("Dataset integrati\gmax_moves.json.gz", orient='records', lines=True, compression='gzip')
gmax_moves.to_csv("Dataset integrati\gmax_moves.csv")

# CREAZIONE TABELLE PONTE

In [None]:
pokemon = pd.read_json('Dataset integrati/pokemon_integrati.json.gz', compression='gzip', orient='records', lines=True)
pokemon.head()

Unnamed: 0,index,#,Name,Generation,Rarity,Evolves_from,Has_gender_diff,Type1,Type2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Variant,Varieties,sprite_url
0,0,1,Bulbasaur,1,Common,,False,Grass,Poison,318,45,49,49,65,65,45,,"[{'is_default': True, 'pokemon': {'name': 'bul...",https://img.pokemondb.net/sprites/sword-shield...
1,1,2,Ivysaur,1,Common,Bulbasaur,False,Grass,Poison,405,60,62,63,80,80,60,,"[{'is_default': True, 'pokemon': {'name': 'ivy...",https://img.pokemondb.net/sprites/sword-shield...
2,2,3,Venusaur,1,Common,Ivysaur,True,Grass,Poison,525,80,82,83,100,100,80,,"[{'is_default': True, 'pokemon': {'name': 'ven...",https://img.pokemondb.net/sprites/sword-shield...
3,3,3,Venusaur,1,Common,Ivysaur,True,Grass,Poison,625,80,100,123,122,120,80,Mega Venusaur,"[{'is_default': True, 'pokemon': {'name': 'ven...",https://img.pokemondb.net/sprites/sword-shield...
4,4,4,Charmander,1,Common,,False,Fire,,309,39,52,43,60,50,65,,"[{'is_default': True, 'pokemon': {'name': 'cha...",https://img.pokemondb.net/sprites/sword-shield...


In [None]:
moves = pd.read_json('Dataset integrati/moves_integrati.json.gz', compression='gzip', orient='records', lines=True)
moves.head()

Unnamed: 0,index,Name,Introducted_in,Type_x,Type_y,Power_x,Power_y,Accuracy,Acc.,PP_x,PP_y,Damage_class,Effect,Prob. (%),Learned_by
0,0,Absorb,1,Grass,Grass,20.0,20,100.0,100,25,25,Special,User recovers half the HP inflicted on opponent.,—,"[{'name': 'zubat', 'url': 'https://pokeapi.co/..."
1,1,Accelerock,7,Rock,Rock,40.0,40,100.0,100,20,20,Physical,User attacks first.,—,"[{'name': 'lycanroc-midday', 'url': 'https://p..."
2,2,Acid,1,Poison,Poison,40.0,40,100.0,100,30,30,Special,May lower opponent's Special Defense.,10,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."
3,3,Acid Armor,1,Poison,Poison,,—,,—,20,20,Status,Sharply raises user's Defense.,—,"[{'name': 'tentacool', 'url': 'https://pokeapi..."
4,4,Acid Spray,5,Poison,Poison,40.0,40,100.0,100,20,20,Special,Sharply lowers opponent's Special Defense.,100,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."


In [None]:
abilities = pd.read_json('Dataset integrati/Abilities_integrati.json.gz', compression='gzip', orient='records', lines=True)
abilities.head()

Unnamed: 0,index,Name,Generation,Description,Pokemon
0,0,Adaptability,4,Powers up moves of the same type.,"[{'is_hidden': False, 'pokemon': {'name': 'eev..."
1,1,Aerilate,6,Turns Normal-type moves into Flying-type moves.,"[{'is_hidden': False, 'pokemon': {'name': 'pin..."
2,2,Aftermath,4,Damages the attacker landing the finishing hit.,"[{'is_hidden': True, 'pokemon': {'name': 'volt..."
3,3,Air Lock,3,Eliminates the effects of weather.,"[{'is_hidden': False, 'pokemon': {'name': 'ray..."
4,4,Analytic,5,Boosts move power when the Pokémon moves last.,"[{'is_hidden': True, 'pokemon': {'name': 'magn..."


# PONTE ABILITA' - POKEMON

In [None]:
abil_poke = abilities[["Name", "Pokemon"]]
abil_poke.head()

Unnamed: 0,Name,Pokemon
0,Adaptability,"[{'is_hidden': False, 'pokemon': {'name': 'eev..."
1,Aerilate,"[{'is_hidden': False, 'pokemon': {'name': 'pin..."
2,Aftermath,"[{'is_hidden': True, 'pokemon': {'name': 'volt..."
3,Air Lock,"[{'is_hidden': False, 'pokemon': {'name': 'ray..."
4,Analytic,"[{'is_hidden': True, 'pokemon': {'name': 'magn..."


In [None]:
for index, row in abil_poke.iterrows():
    row['Pokemon'] = list(row['Pokemon'])

In [None]:
abil_poke = abil_poke.explode('Pokemon')
abil_poke.head()

Unnamed: 0,Name,Pokemon
0,Adaptability,"{'is_hidden': False, 'pokemon': {'name': 'eeve..."
0,Adaptability,"{'is_hidden': True, 'pokemon': {'name': 'corph..."
0,Adaptability,"{'is_hidden': True, 'pokemon': {'name': 'crawd..."
0,Adaptability,"{'is_hidden': True, 'pokemon': {'name': 'feeba..."
0,Adaptability,"{'is_hidden': False, 'pokemon': {'name': 'pory..."


In [None]:
abil_poke.reset_index(inplace=True)

In [None]:
ponte_abil_poke = pd.concat([abil_poke['Pokemon'].apply(pd.Series), abil_poke.drop('Pokemon', axis = 1)], axis = 1)

In [None]:
ponte_abil_poke = pd.concat([ponte_abil_poke['pokemon'].apply(pd.Series), ponte_abil_poke.drop('pokemon', axis = 1)], axis = 1)

In [None]:
ponte_abil_poke.head()

Unnamed: 0,name,url,is_hidden,slot,index,Name
0,eevee,https://pokeapi.co/api/v2/pokemon/133/,False,2,0,Adaptability
1,corphish,https://pokeapi.co/api/v2/pokemon/341/,True,3,0,Adaptability
2,crawdaunt,https://pokeapi.co/api/v2/pokemon/342/,True,3,0,Adaptability
3,feebas,https://pokeapi.co/api/v2/pokemon/349/,True,3,0,Adaptability
4,porygon-z,https://pokeapi.co/api/v2/pokemon/474/,False,1,0,Adaptability


In [None]:
ponte_abil_poke = ponte_abil_poke[['Name', 'name', 'is_hidden']]
ponte_abil_poke.columns = ['Ability', 'Pokemon', 'Hidden']
ponte_abil_poke.head()

Unnamed: 0,Ability,Pokemon,Hidden
0,Adaptability,eevee,False
1,Adaptability,corphish,True
2,Adaptability,crawdaunt,True
3,Adaptability,feebas,True
4,Adaptability,porygon-z,False


In [None]:
#Salvataggio sia in .csv che in .json
ponte_abil_poke.to_json("Tabelle ponte/ponte_abil_poke.json.gz", orient='records', lines=True, compression='gzip')
ponte_abil_poke.to_csv("Tabelle ponte/ponte_abil_poke.csv")

# PONTE MOSSE - POKEMON

In [None]:
move_poke = moves[["Name", "Learned_by"]]
move_poke.head()

Unnamed: 0,Name,Learned_by
0,Absorb,"[{'name': 'zubat', 'url': 'https://pokeapi.co/..."
1,Accelerock,"[{'name': 'lycanroc-midday', 'url': 'https://p..."
2,Acid,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."
3,Acid Armor,"[{'name': 'tentacool', 'url': 'https://pokeapi..."
4,Acid Spray,"[{'name': 'ekans', 'url': 'https://pokeapi.co/..."


In [None]:
for index, row in move_poke.iterrows():
    row['Learned_by'] = list(row['Learned_by'])

In [None]:
move_poke = move_poke.explode('Learned_by')
move_poke.head()

Unnamed: 0,Name,Learned_by
0,Absorb,"{'name': 'zubat', 'url': 'https://pokeapi.co/a..."
0,Absorb,"{'name': 'golbat', 'url': 'https://pokeapi.co/..."
0,Absorb,"{'name': 'oddish', 'url': 'https://pokeapi.co/..."
0,Absorb,"{'name': 'gloom', 'url': 'https://pokeapi.co/a..."
0,Absorb,"{'name': 'vileplume', 'url': 'https://pokeapi...."


In [None]:
move_poke.reset_index(inplace=True)

In [None]:
ponte_move_poke = pd.concat([move_poke['Learned_by'].apply(pd.Series), move_poke.drop('Learned_by', axis = 1)], axis = 1)

In [None]:
ponte_move_poke.head()

Unnamed: 0,0,name,url,index,Name
0,,zubat,https://pokeapi.co/api/v2/pokemon/41/,0,Absorb
1,,golbat,https://pokeapi.co/api/v2/pokemon/42/,0,Absorb
2,,oddish,https://pokeapi.co/api/v2/pokemon/43/,0,Absorb
3,,gloom,https://pokeapi.co/api/v2/pokemon/44/,0,Absorb
4,,vileplume,https://pokeapi.co/api/v2/pokemon/45/,0,Absorb


In [None]:
ponte_move_poke = ponte_move_poke[['Name', 'name']]
ponte_move_poke.columns = ['Move', 'Pokemon']
ponte_move_poke.head()

Unnamed: 0,Move,Pokemon
0,Absorb,zubat
1,Absorb,golbat
2,Absorb,oddish
3,Absorb,gloom
4,Absorb,vileplume


In [None]:
#Salvataggio sia in .csv che in .json
ponte_move_poke.to_json("Tabelle ponte/ponte_move_poke.json.gz", orient='records', lines=True, compression='gzip')
ponte_move_poke.to_csv("Tabelle ponte/ponte_move_poke.csv")

# CORREZIONE INFORMAZIONI VARIANTI

In [None]:
for index, row in pokemon.iterrows():
    if row['Variante'] == 'Mega':
        row['Generazione'] = 6
        row['Evolves_from'] = row['Species']
    if row['Variante'] == 'Alolan':
        row['Generazione'] = 7
    if row['Variante'] == 'Galarian':
        row['Generazione'] = 8 
    if row['Variante'] == 'Primal': #ArcheoGroudon e ArcheoKyogre
        row['Generazione'] = 6