In [1]:
import pandas as pd
import numpy as np
import json
import os

In [2]:
#pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
def load_jsonfile(path, filename):
    with open(f'{path}/{filename}', 'r') as f:
        return json.load(f)

In [4]:
def get_dataframe_from_jsonfiles(path, file_list):
    try:
        json_list = [ load_jsonfile(path, x) for x in file_list ]
        
        df = pd.DataFrame(json_list) #pd.json_normalize(json_list, sep='_')
        
        return df
        
    except Exception as error:
        print("An exception occurred:", error)

# CAPA SILVER

# - TABLA POKEMON

In [5]:
#df_raw.to_parquet('./bronze/pokemon.parquet')

df_raw = pd.read_parquet('./bronze/pokemon.parquet')

In [6]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1302 entries, 0 to 1301
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   abilities                 1302 non-null   object
 1   base_experience           1302 non-null   int64 
 2   cries                     1302 non-null   object
 3   forms                     1302 non-null   object
 4   game_indices              1302 non-null   object
 5   height                    1302 non-null   int64 
 6   held_items                1302 non-null   object
 7   id                        1302 non-null   int64 
 8   is_default                1302 non-null   bool  
 9   location_area_encounters  1302 non-null   object
 10  moves                     1302 non-null   object
 11  name                      1302 non-null   object
 12  order                     1302 non-null   int64 
 13  past_abilities            1302 non-null   object
 14  past_types              

In [7]:
df_clean = (
    df_raw
    .rename(columns={'name':'pokemon_name', 'order':'pokemon_order'})
    .sort_values(by='id')
    .reset_index(drop=True)
    .set_index('id', drop=False)
)

# cries
df_clean = (
    df_clean
    .assign(
        cry_latest = lambda df : df['cries'].apply(lambda x : x.get('latest',None)),
        cry_legacy = lambda df : df['cries'].apply(lambda x : x.get('legacy',None))
    )
    .drop(columns=['cries'])
)

# forms
df_clean = (
    df_clean
    .assign(
        forms_list = lambda df : (
            df['forms'].apply(
                lambda values : [ x.get('name',None) for x in values ]
            )
        )
    )
    .drop(columns=['forms'])
)

# types
df_clean = (
    df_clean
    .assign(
        types_dic = lambda df : (
            df['types'].apply(
                lambda values : { 
                    x.get('slot',0) : { 'name':x.get('type',{}).get('name',None) }
                    for x in values
                }
            )
        )
    )
    .assign(
        # types
        pokemon_first_type = lambda df : df['types_dic'].apply(lambda x : x.get(1,{}).get('name',None)),
        pokemon_second_type = lambda df : df['types_dic'].apply(lambda x : x.get(2,{}).get('name',None))
    )
    .drop(columns=['types','types_dic'])
)

# stats
df_clean = (
    df_clean
    .assign(
        stats_dic = lambda df : (
            df['stats'].apply(
                lambda values : { 
                    x.get('stat',{}).get('name','unknown_stat') : { 'base':x.get('base_stat',0), 'effort':x.get('effort',0) }
                    for x in values 
                }
            )
        )
    )
    .assign(
        # base stats
        stat_hp_base = lambda df : df['stats_dic'].apply(lambda x : x.get('hp',{}).get('base',0)),
        stat_attack_base = lambda df : df['stats_dic'].apply(lambda x : x.get('attack',{}).get('base',0)),
        stat_defense_base = lambda df : df['stats_dic'].apply(lambda x : x.get('defense',{}).get('base',0)),
        stat_special_attack_base = lambda df : df['stats_dic'].apply(lambda x : x.get('special-attack',{}).get('base',0)),
        stat_special_defense_base = lambda df : df['stats_dic'].apply(lambda x : x.get('special-defense',{}).get('base',0)),
        stat_speed_base = lambda df : df['stats_dic'].apply(lambda x : x.get('speed',{}).get('base',0)),
        
        # effort values
        stat_hp_effort = lambda df : df['stats_dic'].apply(lambda x : x.get('hp',{}).get('effort',0)),
        stat_attack_effort = lambda df : df['stats_dic'].apply(lambda x : x.get('attack',{}).get('effort',0)),
        stat_defense_effort = lambda df : df['stats_dic'].apply(lambda x : x.get('defense',{}).get('effort',0)),
        stat_special_attack_effort = lambda df : df['stats_dic'].apply(lambda x : x.get('special-attack',{}).get('effort',0)),
        stat_special_defense_effort = lambda df : df['stats_dic'].apply(lambda x : x.get('special-defense',{}).get('effort',0)),
        stat_speed_effort = lambda df : df['stats_dic'].apply(lambda x : x.get('speed',{}).get('effort',0))
    )
    .assign(
        # total
        stats_total = lambda df : df['stat_hp_base'] + df['stat_attack_base'] + df['stat_defense_base'] + df['stat_special_attack_base'] + df['stat_special_defense_base'] + df['stat_speed_base'],
        effort_total = lambda df : df['stat_hp_effort'] + df['stat_attack_effort'] + df['stat_defense_effort'] + df['stat_special_attack_effort'] + df['stat_special_defense_effort'] + df['stat_speed_effort'] 
    )
    .drop(columns=['stats','stats_dic'])
)

# abilities
df_clean = (
    df_clean
    .assign(
        abilities_dic = lambda df : (
            df['abilities'].apply(
                lambda values : { 
                    x.get('slot',0) : { 'name':x.get('ability',{}).get('name',None) }
                    for x in values
                }
            )
        )
    )
    .assign(
        # abilities
        first_ability = lambda df : df['abilities_dic'].apply(lambda x : x.get(1,{}).get('name',None)),
        second_ability = lambda df : df['abilities_dic'].apply(lambda x : x.get(2,{}).get('name',None)),
        hidden_ability = lambda df : df['abilities_dic'].apply(lambda x : x.get(3,{}).get('name',None))
    )
    .drop(columns=['abilities','abilities_dic'])
)

# past abilities
df_clean = (
    df_clean
    .assign(
        past_first_abilities = lambda df : (
            df['past_abilities'].apply(
                lambda values :
                [ x for xs in 
                    [ 
                        [ y.get('ability',{}).get('name',None) for y in v.get('abilities',[]) if y.get('ability') and y.get('slot')==1 ]
                        for v in values 
                    ]
                 for x in xs ]
            )
        ),
        past_second_abilities = lambda df : (
            df['past_abilities'].apply(
                lambda values :
                [ x for xs in 
                    [ 
                        [ y.get('ability',{}).get('name',None) for y in v.get('abilities',[]) if y.get('ability') and y.get('slot')==2 ]
                        for v in values 
                    ]
                 for x in xs ]
            )
        ),
        past_hidden_abilities = lambda df : (
            df['past_abilities'].apply(
                lambda values :
                [ x for xs in 
                    [ 
                        [ y.get('ability',{}).get('name',None) for y in v.get('abilities',[]) if y.get('ability') and y.get('slot')==3 ]
                        for v in values 
                    ]
                 for x in xs ]
            )
        )
    )
)

# moves
df_clean = (
    df_clean
    .assign(
        moves_list = lambda df : (
            df['moves'].apply(
                lambda values : [ x.get('move',{}).get('name',None) for x in values ]
            )
        )
    )
)

# species name
df_clean = (
    df_clean
    .assign(
        species_name = lambda df : df['species'].apply(lambda x : x.get('name',None))
    )
    .drop(columns=['species'])
)

# is mega / gmax
suffixes_mega = ('-mega','-mega-x','-mega-y')
suffixes_gmax = '-gmax'
df_clean = (
    df_clean
    .assign(
        is_mega = lambda df : df['pokemon_name'].str.endswith(suffixes_mega),
        is_gmax = lambda df : df['pokemon_name'].str.endswith(suffixes_gmax)
    )
)

# is totem
df_clean = (
    df_clean
    .assign(
        is_totem = lambda df : df['pokemon_name'].apply(lambda values : '-totem' in values )
    )
)

# is regional form
suffixes_regional = ['-alola','-galar','-hisui','-paldea']
df_clean = (
    df_clean
    .assign(
        is_regional = lambda df : df['pokemon_name'].apply(lambda values : any(x in values for x in suffixes_regional) ) #if '-cap' not in values) )
    )
)

# separated from species
df_clean['is_separated_from_species'] = False
separated_pokemon_ids = [
    10061, # floette eternal
    10080,10081,10082,10083,10084,10085, # pikachu cosplay
    10094,10095,10096,10097,10098,10099,10148,10160, # pikachu cap
    10093,10121,10122,10128,10129,10144,10145,10146,10149,10150,10153,10154, # totem
    10116,10117, # greninja ash
    10118,10119,10120, # zygarde
    10158,10159, # pikachu & eevee starters
    #10192, # zarude dada
    10272 # ursaluna bloodmoon
]
df_clean.loc[separated_pokemon_ids, 'is_separated_from_species'] = True

# sprite default
df_clean = (
    df_clean
    .assign(
        sprite_default = lambda df : df['sprites'].apply(lambda x: x.get('other',{}).get('home',{}).get('front_default',None))
    )
)

# arreglo de errores o valores nulos en sprite_default 
ids = [10061, 10080, 10081, 10082, 10083, 10084, 10144, 10158, 10159]
koraidon_ids = [10264, 10265, 10266, 10267]
miraidon_ids = [10268, 10269, 10270, 10271]
df_clean.loc[ids, 'sprite_default'] = df_clean.loc[ids, 'sprites'].apply(lambda values: values['other']['official-artwork']['front_default'])
df_clean.at[10085, 'sprite_default'] = 'https://archives.bulbagarden.net/media/upload/b/b4/0025Pikachu-Cosplay.png' # pikachu cosplay
df_clean.at[10151, 'sprite_default'] = df_clean.at[744, 'sprite_default'] # rockruff own tempo
df_clean.at[10117, 'sprite_default'] = df_clean.at[10116, 'sprite_default'] # greninja ash
df_clean.at[10116, 'sprite_default'] = df_clean.at[658, 'sprite_default'] # greninja battle bond
df_clean.at[10144, 'sprite_default'] = df_clean.at[778, 'sprite_default'] # mimikyu totem
df_clean.loc[koraidon_ids, 'sprite_default'] = df_clean.at[1007, 'sprite_default'] # koraidon
df_clean.loc[miraidon_ids, 'sprite_default'] = df_clean.at[1008, 'sprite_default'] # miraidon

# pokemon base name
df_clean['pokemon_base_name'] = df_clean['species_name']
df_clean.loc[df_clean.is_regional, 'pokemon_base_name'] = df_clean.loc[df_clean.is_regional, 'pokemon_name']

df_clean.at[10093,'pokemon_base_name'] = 'raticate-alola'
df_clean.at[10177,'pokemon_base_name'] = 'darmanitan-galar'
df_clean.at[10178,'pokemon_base_name'] = 'darmanitan-galar'
df_clean.at[10250,'pokemon_base_name'] = 'tauros-paldea'
df_clean.at[10251,'pokemon_base_name'] = 'tauros-paldea'
df_clean.at[10252,'pokemon_base_name'] = 'tauros-paldea'

df_clean.at[10151,'pokemon_base_name'] = df_clean.at[10151,'pokemon_name'] # rockruff own tempo
df_clean.at[10152,'pokemon_base_name'] = df_clean.at[10152,'pokemon_name'] # lycanroc dusk

#for x in [413,10004,10005,745,10126,10151,849,10184,925,10257,678,10025]:
    #print(df_clean.loc[x,'pokemon_name'])
    #df_clean.loc[x,'pokemon_base_name'] = df_clean.loc[x,'pokemon_name']



## arreglo de errores
df_clean.at[10149,'pokemon_name'] = 'marowak-totem-alola'
df_clean.at[10149,'forms_list'] = ['marowak-totem-alola']
df_clean.at[10149,'is_regional'] = True
df_clean.at[10149,'pokemon_base_name'] = 'marowak-alola'
df_clean.at[10149, 'sprite_default'] = df_clean.at[10115, 'sprite_default']

df_clean.at[414, 'forms_list'] = ['mothim'] # mothim
df_clean.at[493, 'forms_list'] = [ x for x in df_clean.at[493, 'forms_list'] if x!='arceus-unknown' ] # arceus

df_clean.at[716, 'forms_list'] = ['xerneas-neutral','xerneas-active'] # xerneas
df_clean.at[664, 'forms_list'] = ['scatterbug'] # scatterbug
df_clean.at[665, 'forms_list'] = ['spewpa'] # spewpa
df_clean.at[10272, 'is_default'] = False # ursaluna bloodmoon
df_clean.at[10099, 'is_regional'] = False # pikachu alola-cap

df_clean.loc[(df_clean[df_clean.is_gmax]).index, 'weight'] = None
df_clean.at[10190, 'weight'] = None # eternatus eternamax weight



# save
df_clean.to_parquet('./silver/pokemon.parquet')

In [8]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1302 entries, 1 to 10277
Data columns (total 48 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   base_experience              1302 non-null   int64  
 1   game_indices                 1302 non-null   object 
 2   height                       1302 non-null   int64  
 3   held_items                   1302 non-null   object 
 4   id                           1302 non-null   int64  
 5   is_default                   1302 non-null   bool   
 6   location_area_encounters     1302 non-null   object 
 7   moves                        1302 non-null   object 
 8   pokemon_name                 1302 non-null   object 
 9   pokemon_order                1302 non-null   int64  
 10  past_abilities               1302 non-null   object 
 11  past_types                   1302 non-null   object 
 12  sprites                      1302 non-null   object 
 13  weight                

In [9]:
df_clean.head(1)

Unnamed: 0_level_0,base_experience,game_indices,height,held_items,id,is_default,location_area_encounters,moves,pokemon_name,pokemon_order,past_abilities,past_types,sprites,weight,cry_latest,cry_legacy,forms_list,first_type,second_type,stat_hp_base,stat_attack_base,stat_defense_base,stat_special_attack_base,stat_special_defense_base,stat_speed_base,stat_hp_effort,stat_attack_effort,stat_defense_effort,stat_special_attack_effort,stat_special_defense_effort,stat_speed_effort,stats_total,effort_total,first_ability,second_ability,hidden_ability,past_first_abilities,past_second_abilities,past_hidden_abilities,moves_list,species_name,is_mega,is_gmax,is_totem,is_regional,is_separated_from_species,sprite_default,pokemon_base_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1
1,64,"[{'game_index': 153, 'version': {'name': 'red'...",7,[],1,True,https://pokeapi.co/api/v2/pokemon/1/encounters,"[{'move': {'name': 'razor-wind', 'url': 'https...",bulbasaur,1,"[{'abilities': [{'ability': None, 'is_hidden':...",[],{'back_default': 'https://raw.githubuserconten...,69.0,https://raw.githubusercontent.com/PokeAPI/crie...,https://raw.githubusercontent.com/PokeAPI/crie...,[bulbasaur],grass,poison,45,49,49,65,65,45,0,0,0,1,0,0,318,1,overgrow,,chlorophyll,[],[],[],"[razor-wind, swords-dance, cut, bind, vine-whi...",bulbasaur,False,False,False,False,False,https://raw.githubusercontent.com/PokeAPI/spri...,bulbasaur


# - TABLA POKEMON-SPECIES

In [10]:
#df_species_raw.to_parquet('./bronze/pokemon-species.parquet')

df_species_raw = pd.read_parquet('./bronze/pokemon-species.parquet')

In [11]:
df_species_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 27 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   base_happiness          1025 non-null   int64 
 1   capture_rate            1025 non-null   int64 
 2   color                   1025 non-null   object
 3   egg_groups              1025 non-null   object
 4   evolution_chain         1025 non-null   object
 5   evolves_from_species    484 non-null    object
 6   flavor_text_entries     1025 non-null   object
 7   form_descriptions       1025 non-null   object
 8   forms_switchable        1025 non-null   bool  
 9   gender_rate             1025 non-null   int64 
 10  genera                  1025 non-null   object
 11  generation              1025 non-null   object
 12  growth_rate             1025 non-null   object
 13  habitat                 386 non-null    object
 14  has_gender_differences  1025 non-null   bool  
 15  hatc

In [12]:
df_species_clean = (
    df_species_raw
    .assign(
        # normalize
        color_name = lambda df : df['color'].apply(lambda x : x.get('name',None) if x is not None else x),
        species_generation_name = lambda df : df['generation'].apply(lambda x : x.get('name',None) if x is not None else x),
        growth_rate_name = lambda df : df['growth_rate'].apply(lambda x : x.get('name',None) if x is not None else x),
        habitat_name = lambda df : df['habitat'].apply(lambda x : x.get('name',None) if x is not None else x),
        shape_name = lambda df : df['shape'].apply(lambda x : x.get('name',None) if x is not None else x),
        evolves_from_species_name = lambda df : df['evolves_from_species'].apply(lambda x : x.get('name',None) if x is not None else x)
    )
    .drop(columns=['color','generation','growth_rate','habitat','shape','evolves_from_species'])
    .sort_values(by='id')
    .reset_index(drop=True)
    #.set_index('id', drop=False)
    .rename(columns={'id':'species_id', 'name':'species_name', 'names':'species_names', 'order':'species_order'})
)

# egg_groups
df_species_clean = (
    df_species_clean
    .assign(
        egg_groups_dic = lambda df : (
            df['egg_groups'].apply(
                lambda values : { 
                    n : x.get('name', None)
                    for n,x in enumerate(values, start=1)
                }
            )
        )
    )
    .assign(
        # egg_groups
        first_egg_group = lambda df : df['egg_groups_dic'].apply(lambda x : x.get(1,None)),
        second_egg_group = lambda df : df['egg_groups_dic'].apply(lambda x : x.get(2,None))
    )
    .drop(columns=['egg_groups','egg_groups_dic'])
)

# generation
generations_dic = {
    'generation-i' : 1,
    'generation-ii' : 2,
    'generation-iii' : 3,
    'generation-iv' : 4,
    'generation-v' : 5,
    'generation-vi' : 6,
    'generation-vii' : 7,
    'generation-viii' : 8,
    'generation-ix' : 9
}
df_species_clean = (
    df_species_clean
    .assign(
        species_generation_number = lambda df : df['species_generation_name'].map(generations_dic)
    )
    .drop(columns=['species_generation_name'])
)

# varieties
df_species_clean = (
    df_species_clean
    .assign(
        varieties_list = lambda df : (
            df['varieties'].apply(
                lambda values : [
                    x.get('pokemon',{}).get('name', None)
                    for x in values
                ]
            )
        )
    )
    .drop(columns=['varieties'])
)

# evolution_chain_url to id
df_species_clean = (
    df_species_clean
    .assign(
        evolution_chain_id = lambda df : (
            df['evolution_chain']
            .apply(lambda x : x.get('url',None) if x is not None else x)
            .str.replace('https://pokeapi.co/api/v2/evolution-chain','')
            .str.replace('/','')
        )
        .astype(int)
    )
    .drop(columns=['evolution_chain'])
)

# form_description
df_species_clean = (
    df_species_clean
    .assign(
        forms_description = lambda df : df['form_descriptions'].apply(lambda x : x[0].get('description',None) if len(x)>0 else None)
    )
    .drop(columns=['form_descriptions'])
)

# genus
df_species_clean = (
    df_species_clean
    .assign(
        genera_dic = lambda df : df['genera'].apply(
            lambda values : { 
                x.get('language',{}).get('name','default') : x.get('genus',None)
                for x in values
            }
        )
    )
    .assign(
        genus = lambda df : df['genera_dic'].apply(lambda x : x.get('en',None))
    )
    .drop(columns=['genera','genera_dic'])
)

# species_names
species_names_df =(
    pd.json_normalize(
        df_species_clean['species_names'].apply(
            lambda values : { 
                'species_name_'+(x.get('language',{}).get('name','default')).replace('-','_') : x.get('name',None)
                for x in values
            }
        )
    )
)

#species_names_df.columns = [ 'species_name_'+x.replace('-','_') for x in species_names_df.columns.values ]

df_species_clean = (
    df_species_clean
    .merge(species_names_df, left_index=True, right_index=True, how='left')
    .drop(columns=['species_names'])
)

# flavor_text_entries
flavor_text_entries_df =(
    pd.json_normalize(
        df_species_clean['flavor_text_entries'].apply(
            lambda values : { 
                (x.get('version',{}).get('name','v') +'_'+ x.get('language',{}).get('name','lang')).replace('-','_') : x.get('flavor_text',None)
                for x in values
            }
        )
    )
)

game_entries_columns=[
    'red_en', 'blue_en', 'yellow_en', 
    'gold_en', 'silver_en', 'crystal_en',
    'ruby_en', 'sapphire_en', 'emerald_en', 'firered_en', 'leafgreen_en',
    'diamond_en', 'pearl_en', 'platinum_en', 'heartgold_en', 'soulsilver_en', 
    'black_en', 'white_en', 'black_2_en', 'white_2_en',
    'x_en', 'y_en', 'omega_ruby_en', 'alpha_sapphire_en', 
    'sun_en', 'moon_en', 'ultra_sun_en', 'ultra_moon_en', 'lets_go_pikachu_en', 'lets_go_eevee_en', 
    'sword_en', 'shield_en', 'legends_arceus_en', 
    'scarlet_en', 'violet_en'
]

df_species_clean = (
    df_species_clean
    .merge(flavor_text_entries_df[game_entries_columns], left_index=True, right_index=True, how='left')
    .drop(columns=['flavor_text_entries'])
)

# set index
df_species_clean = (
    df_species_clean
    .set_index('species_id', drop=False)
)


# arreglo de errores
df_species_clean.at[916, 'gender_rate'] = 4 # oinkologne
df_species_clean.at[105, 'varieties_list'] = ['marowak', 'marowak-alola', 'marowak-totem-alola']

# rellenar campos vacíos
df_species_clean.at[658, 'forms_description'] = 'Greninja with the Ability Battle Bond are treated as a separate form to other Greninja, and cannot breed.\n\nIn Generation VII, Greninja with the Ability Battle Bond will transform into Ash-Greninja after it directly causes an opponent to faint without ending the battle.'
df_species_clean.at[25, 'forms_description'] = 'Cosplay Pikachu can change forms by changing its costume at a Contest Hall.\n- Each costume can learn an exclusive move.\n- Exclusive to Omega Ruby and Alpha Sapphire.\n- Forms: Cosplay Pikachu, Pikachu Rock Star, Pikachu Belle, Pikachu Pop Star, Pikachu Ph.D., Pikachu Libre.\n\nPikachu in a cap cannot change forms.\n- Able to use the Z-Move 10,000,000 Volt Thunderbolt.\n- Have a different cry than regular Pikachu.\n- Forms: Original Cap, Hoenn Cap, Sinnoh Cap, Unova Cap, Kalos Cap, Alola Cap, Partner Cap, World Cap.\n\nThe partner Pikachu has a separate internal index from standard Pikachu. This allows it to have higher base stats than wild Pikachu.\n\nGigantamax Pikachu can only be obtained through special methods, and the Gigantamax Factor cannot be bred.'
df_species_clean.at[133, 'forms_description'] = 'The partner Eevee has a separate internal index from standard Eevee. This allows it to have higher base stats than wild Eevee.\n\nGigantamax Eevee can only be obtained through special methods, and the Gigantamax Factor cannot be bred.'
df_species_clean.at[744, 'forms_description'] = 'Rockruff with the Ability Own Tempo are treated as a separate form to other Rockruff, and will always evolve into Dusk Form Lycanroc.'
df_species_clean.at[745, 'forms_description'] = 'Rockruff with the Abilities Keen Eye, Vital Spirit, or Steadfast evolve into:\n- Midday Form Lycanroc when leveled up during the day.\n- Midnight Form Lycanroc when leveled up at night.\n\nRockruff with the Ability Own Tempo evolve into Dusk Form Lycanroc when leveled up during the evening.'
df_species_clean.at[52, 'forms_description'] = 'In Alola, Meowth has a Dark-type regional form. It evolves into Alolan Persian when leveled up with high friendship.\n\nIn Galar, Meowth has a Steel-type regional form. It evolves into Perrserker.\n\nKantonian Meowth can Gigantamax into Gigantamax Meowth if it has the Gigantamax Factor. Meowth with the Gigantamax Factor cannot evolve.'
df_species_clean.at[718, 'forms_description'] = 'Zygarde cannot change forms. Zygarde can be broken down and re-assembled into a different form by using the Zygarde Cube.\n- Internally, the 10% and 50% Formes of Zygarde with the Ability Power Construct are each treated as separate forms from the 10% and 50% Formes of Zygarde with the Ability Aura Break.'
df_species_clean.at[869, 'forms_description'] = """Alcremie's form depends on two different parameters: its cream and type of Sweet. Cream affects the color of Alcremie's body, while the Sweet affects its eye color and head ornaments. There are nine different types of cream and seven different types of Sweet. Any type of cream can be paired with any Sweet, resulting in 63 different non-Gigantamax forms of Alcremie, not including its Shiny coloration."""


# save
df_species_clean.to_parquet('./silver/pokemon-species.parquet')

In [13]:
df_species_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1025 entries, 1 to 1025
Data columns (total 72 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   base_happiness             1025 non-null   int64 
 1   capture_rate               1025 non-null   int64 
 2   forms_switchable           1025 non-null   bool  
 3   gender_rate                1025 non-null   int64 
 4   has_gender_differences     1025 non-null   bool  
 5   hatch_counter              1025 non-null   int64 
 6   species_id                 1025 non-null   int64 
 7   is_baby                    1025 non-null   bool  
 8   is_legendary               1025 non-null   bool  
 9   is_mythical                1025 non-null   bool  
 10  species_name               1025 non-null   object
 11  species_order              1025 non-null   int64 
 12  pal_park_encounters        1025 non-null   object
 13  pokedex_numbers            1025 non-null   object
 14  color_name   

In [14]:
df_species_clean.head(1)

Unnamed: 0_level_0,base_happiness,capture_rate,forms_switchable,gender_rate,has_gender_differences,hatch_counter,species_id,is_baby,is_legendary,is_mythical,species_name,species_order,pal_park_encounters,pokedex_numbers,color_name,growth_rate_name,habitat_name,shape_name,evolves_from_species_name,first_egg_group,second_egg_group,species_generation_number,varieties_list,evolution_chain_id,forms_description,genus,species_name_ja_Hrkt,species_name_roomaji,species_name_ko,species_name_zh_Hant,species_name_fr,species_name_de,species_name_es,species_name_it,species_name_en,species_name_ja,species_name_zh_Hans,red_en,blue_en,yellow_en,gold_en,silver_en,crystal_en,ruby_en,sapphire_en,emerald_en,firered_en,leafgreen_en,diamond_en,pearl_en,platinum_en,heartgold_en,soulsilver_en,black_en,white_en,black_2_en,white_2_en,x_en,y_en,omega_ruby_en,alpha_sapphire_en,sun_en,moon_en,ultra_sun_en,ultra_moon_en,lets_go_pikachu_en,lets_go_eevee_en,sword_en,shield_en,legends_arceus_en,scarlet_en,violet_en
species_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1
1,50,45,False,1,False,20,1,False,False,False,bulbasaur,1,"[{'area': {'name': 'field', 'url': 'https://po...","[{'entry_number': 1, 'pokedex': {'name': 'nati...",green,medium-slow,grassland,quadruped,,monster,plant,1,[bulbasaur],1,,Seed Pokémon,フシギダネ,Fushigidane,이상해씨,妙蛙種子,Bulbizarre,Bisasam,Bulbasaur,Bulbasaur,Bulbasaur,フシギダネ,妙蛙种子,A strange seed was\nplanted on its\nback at bi...,A strange seed was\nplanted on its\nback at bi...,It can go for days\nwithout eating a\nsingle m...,The seed on its\nback is filled\nwith nutrient...,It carries a seed\non its back right\nfrom bir...,"While it is young,\nit uses the\nnutrients tha...",BULBASAUR can be seen napping in\nbright sunli...,BULBASAUR can be seen napping in\nbright sunli...,BULBASAUR can be seen napping in bright\nsunli...,There is a plant seed on its back right\nfrom ...,A strange seed was planted on its back at\nbir...,"For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...",The seed on its back is filled\nwith nutrients...,It carries a seed on its back right\nfrom birt...,"For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...",A strange seed was planted on its back at birt...,"For some time after its birth, it grows by gai...",Bulbasaur can be seen napping in bright sunlig...,Bulbasaur can be seen napping in bright sunlig...,,,,,It can go for days without eating a single mor...,It can go for days without eating a single mor...,There is a plant seed on its back right from t...,"While it is young, it uses the nutrients that ...",,,


# - TABLA POKEMON-FORM

In [16]:
#df_form_raw.to_parquet('./bronze/pokemon-form.parquet')

df_form_raw = pd.read_parquet('./bronze/pokemon-form.parquet')

In [17]:
df_form_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   form_name       1500 non-null   object
 1   form_names      1500 non-null   object
 2   form_order      1500 non-null   int64 
 3   id              1500 non-null   int64 
 4   is_battle_only  1500 non-null   bool  
 5   is_default      1500 non-null   bool  
 6   is_mega         1500 non-null   bool  
 7   name            1500 non-null   object
 8   names           1500 non-null   object
 9   order           1500 non-null   int64 
 10  pokemon         1500 non-null   object
 11  sprites         1500 non-null   object
 12  types           1500 non-null   object
 13  version_group   1500 non-null   object
dtypes: bool(3), int64(3), object(8)
memory usage: 133.4+ KB


In [20]:
df_form_clean = (
    df_form_raw
    .assign(
        pokemon_name = lambda df : df['pokemon'].apply(lambda x : x.get('name',None) if x is not None else x),
        version_group_name = lambda df : df['version_group'].apply(lambda x : x.get('name',None) if x is not None else x)
    )
    .drop(columns=['pokemon', 'version_group', 'is_default', 'is_mega'])
    .sort_values(by='id')
    .set_index('id', drop=False)
    .rename(columns={
        'id':'form_id', 
        'name':'pokemon_form_name', 
        'names':'pokemon_form_names', 
        'order':'form_order_global', 
        'form_order':'form_order_species'}
    )
    #.reset_index(drop=True)
)

# types
df_form_clean = (
    df_form_clean
    .assign(
        types_dic = lambda df : (
            df['types'].apply(
                lambda values : { 
                    x.get('slot',0) : { 'name':x.get('type',{}).get('name',None) }
                    for x in values
                }
            )
        )
    )
    .assign(
        # types
        first_type = lambda df : df['types_dic'].apply(lambda x : x.get(1,{}).get('name',None)),
        second_type = lambda df : df['types_dic'].apply(lambda x : x.get(2,{}).get('name',None))
    )
    .drop(columns=['types','types_dic'])
)

# generation
version_generation = {
    'red-blue': 1,
    'red-green-japan': 1,
    'blue-japan': 1,
    'yellow': 1,
    'gold-silver': 2,
    'crystal': 2,
    'ruby-sapphire': 3,
    'emerald': 3,
    'firered-leafgreen': 3,
    'colosseum': 3,
    'xd': 3,
    'diamond-pearl': 4,
    'platinum': 4,
    'heartgold-soulsilver': 4,
    'black-white': 5,
    'black-2-white-2': 5,
    'x-y': 6,
    'omega-ruby-alpha-sapphire': 6,
    'sun-moon': 7,
    'ultra-sun-ultra-moon': 7,
    'lets-go-pikachu-lets-go-eevee': 7,
    'sword-shield': 8,
    'the-isle-of-armor': 8,
    'the-crown-tundra': 8,
    'brilliant-diamond-and-shining-pearl': 8,
    'legends-arceus': 8,
    'scarlet-violet': 9,
    'the-teal-mask': 9,
    'the-indigo-disk': 9,
}

df_form_clean = (
    df_form_clean
    .assign(
        # generation
        form_generation_number = lambda df : df['version_group_name'].map(version_generation)
    )
    #.drop(columns=['version_group_name'])
)

# version names
version_names = {
    'red-blue': 'red-and-blue',
    'red-green-japan': 'red-and-green-japan',
    'gold-silver': 'gold-and-silver',
    'ruby-sapphire': 'ruby-and-sapphire',
    'firered-leafgreen': 'firered-and-leafgreen',
    'diamond-pearl': 'diamond-and-pearl',
    'heartgold-soulsilver': 'heartgold-and-soulsilver',
    'black-white': 'black-and-white',
    'black-2-white-2': 'black-2-and-white-2',
    'x-y': 'x-and-y',
    'omega-ruby-alpha-sapphire': 'omega-ruby-and-alpha-sapphire',
    'sun-moon': 'sun-and-moon',
    'ultra-sun-ultra-moon': 'ultra-sun-and-ultra-moon',
    'lets-go-pikachu-lets-go-eevee': 'lets-go-pikachu-and-lets-go-eevee',
    'sword-shield': 'sword-and-shield',
    'scarlet-violet': 'scarlet-and-violet',
}

df_form_clean = (
    df_form_clean
    .assign(
        # versions
        version_group_name = lambda df : df['version_group_name'].map(lambda x: version_names.get(x, x) )
    )
)

# sprite
df_form_clean = (
    df_form_clean
    .assign(
        sprite_form = lambda df : df['sprites'].apply(lambda x : x.get('front_default',None))
    )
    .drop(columns=['sprites'])
)

# form name
df_form_clean = (
    df_form_clean
    .assign(
        form_names_dic = lambda df : df['form_names'].apply(
            lambda values : { 
                x.get('language',{}).get('name','lang').replace('-','_') : x.get('name',None)
                for x in values
            }
        )
    )
    .assign(
        form_name_text = lambda df : df['form_names_dic'].apply(lambda x : x.get('en',None))
    )
    .drop(columns=['form_names','form_names_dic'])
)

# pokemon_form_names
df_form_clean = (
    df_form_clean
    .assign(
        pokemon_form_names_dic = lambda df : df['pokemon_form_names'].apply(
            lambda values : { 
                x.get('language',{}).get('name','lang').replace('-','_') : x.get('name',None)
                for x in values
            }
        )
    )
    .assign(
        pokemon_form_name_text = lambda df : df['pokemon_form_names_dic'].apply(lambda x : x.get('en',None))
    )
    .drop(columns=['pokemon_form_names','pokemon_form_names_dic'])
)


# arreglo de errores
#df_form_clean.at[10057, 'first_type'] = None # arceus-unknown
df_form_clean.at[10268, 'pokemon_name'] = 'marowak-totem-alola' # marowak totem
df_form_clean.at[10268, 'form_name'] = 'totem-alola' # marowak totem

df_form_clean.at[774, 'is_battle_only'] = True # minior

# form_name_text
df_form_clean.loc[df_form_clean[df_form_clean.form_name=='mega'].index, 'form_name_text'] = 'Mega'
df_form_clean.loc[df_form_clean[df_form_clean.form_name=='mega-x'].index, 'form_name_text'] = 'Mega X'
df_form_clean.loc[df_form_clean[df_form_clean.form_name=='mega-y'].index, 'form_name_text'] = 'Mega Y'
df_form_clean.loc[df_form_clean[df_form_clean.form_name=='totem'].index, 'form_name_text'] = 'Totem'
df_form_clean.loc[df_form_clean[df_form_clean.form_name=='totem-alola'].index, 'form_name_text'] = 'Totem'
df_form_clean.loc[df_form_clean[df_form_clean.form_name=='totem-disguised'].index, 'form_name_text'] = 'Totem'
df_form_clean.loc[df_form_clean[df_form_clean.form_name=='totem-busted'].index, 'form_name_text'] = 'Totem'

# mothim
df_form_clean.at[414, 'form_name'] = None
df_form_clean.at[414, 'pokemon_form_name'] = None

# quitar filas
ids_to_drop = [
    10057, # arceus-unknown
    10269, 10270, # mothim
    10271, 10272, 10273, 10274, 10275, 10276, 10277, 10278, 10279,
    10280, 10281, 10282, 10283, 10284, 10285, 10286, 10287, 10288, 10289,  # scatterbug
    10290, 10291, 10292, 10293, 10294, 10295, 10296, 10297, 10298,
    10299, 10300, 10301, 10302, 10303, 10304, 10305, 10306, 10307, 10308, # spewpa
]
df_form_clean = df_form_clean.drop(ids_to_drop)


# xerneas
xerneas_serie = df_form_clean.loc[716].copy()
df_form_clean.loc[716] = df_form_clean.loc[10132]
df_form_clean.loc[10132] = xerneas_serie
df_form_clean.at[716, 'form_id']=716
df_form_clean.at[10132, 'form_id']=10132

# rellenar sprites
df_form_clean.at[10132, 'sprite_form']='https://raw.githubusercontent.com/ansmartin/poke-sprites/refs/heads/main/sprites/pk/10132.png'
df_form_clean.at[10065, 'sprite_form']='https://raw.githubusercontent.com/ansmartin/poke-sprites/refs/heads/main/sprites/pk/10065.png'

df_form_clean.at[10344, 'sprite_form']='https://raw.githubusercontent.com/ansmartin/poke-sprites/refs/heads/main/sprites/pk/10344.png'
df_form_clean.at[10345, 'sprite_form']='https://raw.githubusercontent.com/ansmartin/poke-sprites/refs/heads/main/sprites/pk/10345.png'
df_form_clean.at[10447, 'sprite_form']='https://raw.githubusercontent.com/ansmartin/poke-sprites/refs/heads/main/sprites/pk/10447.png'
df_form_clean.at[10448, 'sprite_form']='https://raw.githubusercontent.com/ansmartin/poke-sprites/refs/heads/main/sprites/pk/10448.png'


# game versions
df_form_clean.loc[[891,892,893,10323,10324,10360,10361,10364,10366,10378,10379,10380,10395,10396], 'version_group_name'] = 'the-isle-of-armor'
df_form_clean.loc[[894,895,896,897,898,10328,10329,10330,10331,10362,10363], 'version_group_name'] = 'the-crown-tundra'

# save
df_form_clean.to_parquet('./silver/pokemon-form.parquet')

In [21]:
df_form_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1459 entries, 1 to 10475
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   form_name               1458 non-null   object
 1   form_order_species      1459 non-null   int64 
 2   form_id                 1459 non-null   int64 
 3   is_battle_only          1459 non-null   bool  
 4   pokemon_form_name       1458 non-null   object
 5   form_order_global       1459 non-null   int64 
 6   pokemon_name            1459 non-null   object
 7   version_group_name      1459 non-null   object
 8   first_type              1459 non-null   object
 9   second_type             758 non-null    object
 10  form_generation_number  1459 non-null   int64 
 11  sprite_form             1423 non-null   object
 12  form_name_text          497 non-null    object
 13  pokemon_form_name_text  467 non-null    object
dtypes: bool(1), int64(4), object(9)
memory usage: 193.3+ KB


In [22]:
df_form_clean.head(1)

Unnamed: 0_level_0,form_name,form_order_species,form_id,is_battle_only,pokemon_form_name,form_order_global,pokemon_name,version_group_name,first_type,second_type,form_generation_number,sprite_form,form_name_text,pokemon_form_name_text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,,1,1,False,bulbasaur,1,bulbasaur,red-and-blue,grass,poison,1,https://raw.githubusercontent.com/PokeAPI/spri...,,


In [12]:
#df_form_clean = pd.read_parquet('./silver/pokemon-form.parquet')

---

# - Merge de tablas

In [21]:
df_merge = (
    df_clean
    .merge(df_species_clean, on='species_name', how='left')
    #.merge(df_evo_clean, on='evolution_chain_id', how='left')
    #.merge(df_form_clean, on='pokemon_name', how='outer') # how='left'
    .sort_values(by='id')
    .set_index('id')
)


# creacion de nuevas columnas

# evolves_from_pokemon_base_name
df_merge = (
    df_merge
    .rename(columns={'evolves_from_species_name':'evolves_from_pokemon_base_name'})
)
df_merge.loc[(df_merge.is_separated_from_species | df_merge.is_mega | df_merge.is_gmax), 'evolves_from_pokemon_base_name'] = None

pokemon_ids = [ 
    10092,10102,10104,10106,10108,10110,10111,10113, # alolan
    10163,10165,10175, # galar
    10230,10232,10239,10242,10248, # hisui
]

for pokemon_id in pokemon_ids:
    #print(df_merge.loc[pokemon_id].pokemon_name)
    df_merge.loc[pokemon_id, 'evolves_from_pokemon_base_name'] = df_merge.loc[pokemon_id-1].pokemon_name

df_merge.loc[862, 'evolves_from_pokemon_base_name'] = df_merge.loc[10175].pokemon_name # galar linoone
df_merge.loc[863, 'evolves_from_pokemon_base_name'] = df_merge.loc[10161].pokemon_name # galar meowth
df_merge.loc[864, 'evolves_from_pokemon_base_name'] = df_merge.loc[10173].pokemon_name # galar corsola
df_merge.loc[865, 'evolves_from_pokemon_base_name'] = df_merge.loc[10166].pokemon_name # galar farfetch'd
df_merge.loc[866, 'evolves_from_pokemon_base_name'] = df_merge.loc[10168].pokemon_name # galar mr mime
df_merge.loc[867, 'evolves_from_pokemon_base_name'] = df_merge.loc[10179].pokemon_name # galar yamask
df_merge.loc[10172, 'evolves_from_pokemon_base_name'] = df_merge.loc[10164].pokemon_name # galar slowpoke
df_merge.loc[10177, 'evolves_from_pokemon_base_name'] = df_merge.loc[10176].pokemon_name # galar darumaka
df_merge.loc[10178, 'evolves_from_pokemon_base_name'] = df_merge.loc[10176].pokemon_name # galar darumaka
df_merge.loc[10152, 'evolves_from_pokemon_base_name'] = df_merge.loc[10151].pokemon_name # lycanroc dusk
df_merge.loc[902, 'evolves_from_pokemon_base_name'] = df_merge.loc[10247].pokemon_name # basculin-white-striped
df_merge.loc[903, 'evolves_from_pokemon_base_name'] = df_merge.loc[10235].pokemon_name # sneasel-hisui
df_merge.loc[904, 'evolves_from_pokemon_base_name'] = df_merge.loc[10234].pokemon_name # qwilfish-hisui
df_merge.loc[980, 'evolves_from_pokemon_base_name'] = df_merge.loc[10253].pokemon_name # wooper-paldea
#df_merge.loc[10272, 'evolves_from_pokemon_base_name'] = None # ursaluna bloodmoon

# has mega or gmax
df_merge = (
    df_merge
    .assign(
        has_mega = lambda df : df['varieties_list'].apply( lambda values : any(df[df.pokemon_name==x].iloc[0].is_mega for x in values)) & df['is_default'],
        has_gmax = lambda df : df['varieties_list'].apply( lambda values : any(df[df.pokemon_name==x].iloc[0].is_gmax for x in values)) & df['is_default']
    )
)

# has regional form
df_merge = (
    df_merge
    .assign(
        has_regional = lambda df : df['varieties_list'].apply( lambda values : any(df[df.pokemon_name==x].iloc[0].is_regional for x in values)) & df['is_default']
    )
)

# number of generation
df_merge['pokemon_generation_number'] = df_merge['species_generation_number']
df_merge.loc[10033:10090, 'pokemon_generation_number'] = 6
df_merge.loc[10091:10157, 'pokemon_generation_number'] = 7
df_merge.loc[10158:10249, 'pokemon_generation_number'] = 8
df_merge.loc[10250:, 'pokemon_generation_number'] = 9

# evolutions
df_merge = (
    df_merge
    .assign(
        evolutions = lambda df : [ 
            [ y[1].pokemon_base_name for y in df.iterrows() if y[1].evolves_from_pokemon_base_name==x[1].pokemon_base_name ]
            if (not x[1].is_separated_from_species and not x[1].is_mega and not x[1].is_gmax) else [] 
            for x in df.iterrows()
        ]
    )
)



# arreglo de errores 

# gender differences
#df_merge.loc[(df_merge[~df_merge.is_default]).index, 'has_gender_differences'] = False

#evoluciones
df_merge.at[206, 'evolutions'] = ['dudunsparce']
df_merge.at[412, 'evolutions'] = ['wormadam', 'mothim']
df_merge.at[554, 'evolutions'] = ['darmanitan']
df_merge.at[677, 'evolutions'] = ['meowstic']
df_merge.at[710, 'evolutions'] = ['gourgeist']
df_merge.at[744, 'evolutions'] = ['lycanroc'] #['lycanroc-midday','lycanroc-midnight']
df_merge.at[848, 'evolutions'] = ['toxtricity'] #['toxtricity-amped','toxtricity-low-key']
df_merge.at[891, 'evolutions'] = ['urshifu']
df_merge.at[915, 'evolutions'] = ['oinkologne']
df_merge.at[924, 'evolutions'] = ['maushold']
df_merge.at[963, 'evolutions'] = ['palafin']
df_merge.at[10027, 'evolutions'] = ['gourgeist']
df_merge.at[10028, 'evolutions'] = ['gourgeist']
df_merge.at[10029, 'evolutions'] = ['gourgeist']
df_merge.at[10176, 'evolutions'] = ['darmanitan-galar']
df_merge.at[10247, 'evolutions'] = ['basculegion']

# colors
df_merge.at[10004, 'color_name'] = 'brown'
df_merge.at[10005, 'color_name'] = 'red'
df_merge.at[10013, 'color_name'] = 'red'
df_merge.at[10014, 'color_name'] = 'blue'
df_merge.at[10015, 'color_name'] = 'white'
df_merge.at[10017, 'color_name'] = 'blue'
df_merge.at[10025, 'color_name'] = 'white'
df_merge.at[10034, 'color_name'] = 'black'
df_merge.at[10062, 'color_name'] = 'purple'
df_merge.at[10063, 'color_name'] = 'purple'
df_merge.at[10069, 'color_name'] = 'white'
df_merge.at[10091, 'color_name'] = 'black'
df_merge.at[10092, 'color_name'] = 'black'
df_merge.at[10093, 'color_name'] = 'black'
df_merge.at[10100, 'color_name'] = 'brown'
df_merge.at[10101, 'color_name'] = 'white'
df_merge.at[10102, 'color_name'] = 'blue'
df_merge.at[10103, 'color_name'] = 'white'
df_merge.at[10104, 'color_name'] = 'blue'
df_merge.at[10107, 'color_name'] = 'blue'
df_merge.at[10108, 'color_name'] = 'blue'
df_merge.at[10109, 'color_name'] = 'gray'
df_merge.at[10110, 'color_name'] = 'gray'
df_merge.at[10111, 'color_name'] = 'gray'
df_merge.at[10112, 'color_name'] = 'green'
df_merge.at[10113, 'color_name'] = 'green'
df_merge.at[10115, 'color_name'] = 'purple'
df_merge.at[10118, 'color_name'] = 'black'
df_merge.at[10120, 'color_name'] = 'black'
df_merge.at[10123, 'color_name'] = 'yellow'
df_merge.at[10124, 'color_name'] = 'pink'
df_merge.at[10125, 'color_name'] = 'purple'
df_merge.at[10126, 'color_name'] = 'red'
df_merge.at[10136, 'color_name'] = 'red'
df_merge.at[10137, 'color_name'] = 'red'
df_merge.at[10138, 'color_name'] = 'yellow'
df_merge.at[10139, 'color_name'] = 'green'
df_merge.at[10140, 'color_name'] = 'blue'
df_merge.at[10141, 'color_name'] = 'blue'
df_merge.at[10142, 'color_name'] = 'purple'
df_merge.at[10147, 'color_name'] = 'red'
df_merge.at[10149, 'color_name'] = 'purple'
df_merge.at[10155, 'color_name'] = 'yellow'
df_merge.at[10156, 'color_name'] = 'blue'
df_merge.at[10157, 'color_name'] = 'yellow'
df_merge.at[10161, 'color_name'] = 'brown'
df_merge.at[10162, 'color_name'] = 'white'
df_merge.at[10163, 'color_name'] = 'white'
df_merge.at[10167, 'color_name'] = 'gray'
df_merge.at[10168, 'color_name'] = 'white'
df_merge.at[10169, 'color_name'] = 'purple'
df_merge.at[10171, 'color_name'] = 'red'
df_merge.at[10173, 'color_name'] = 'white'
df_merge.at[10174, 'color_name'] = 'white'
df_merge.at[10176, 'color_name'] = 'white'
df_merge.at[10177, 'color_name'] = 'white'
df_merge.at[10178, 'color_name'] = 'white'
df_merge.at[10181, 'color_name'] = 'black'
df_merge.at[10193, 'color_name'] = 'white'
df_merge.at[10194, 'color_name'] = 'black'
df_merge.at[10250, 'color_name'] = 'black'
df_merge.at[10251, 'color_name'] = 'black'
df_merge.at[10252, 'color_name'] = 'black'
df_merge.at[10253, 'color_name'] = 'brown'
df_merge.at[10254, 'color_name'] = 'brown'
df_merge.at[978, 'color_name'] = 'red'
df_merge.at[10259, 'color_name'] = 'yellow'
df_merge.at[10260, 'color_name'] = 'blue'
df_merge.at[10261, 'color_name'] = 'yellow'
df_merge.at[10262, 'color_name'] = 'white'
df_merge.at[999, 'color_name'] = 'red'
df_merge.at[10263, 'color_name'] = 'gray'
df_merge.at[10273, 'color_name'] = 'blue'
df_merge.at[10274, 'color_name'] = 'red'
df_merge.at[10275, 'color_name'] = 'gray'



# save
df_merge.to_parquet('./silver/pokemon-join-species.parquet')

In [22]:
df_merge.info(max_cols=300)

<class 'pandas.core.frame.DataFrame'>
Index: 1302 entries, 1 to 10277
Data columns (total 123 columns):
 #    Column                          Non-Null Count  Dtype  
---   ------                          --------------  -----  
 0    base_experience                 1302 non-null   int64  
 1    game_indices                    1302 non-null   object 
 2    height                          1302 non-null   int64  
 3    held_items                      1302 non-null   object 
 4    is_default                      1302 non-null   bool   
 5    location_area_encounters        1302 non-null   object 
 6    moves                           1302 non-null   object 
 7    pokemon_name                    1302 non-null   object 
 8    pokemon_order                   1302 non-null   int64  
 9    past_abilities                  1302 non-null   object 
 10   past_types                      1302 non-null   object 
 11   sprites                         1302 non-null   object 
 12   weight                

In [23]:
df_merge.head(1)

Unnamed: 0_level_0,base_experience,game_indices,height,held_items,is_default,location_area_encounters,moves,pokemon_name,pokemon_order,past_abilities,past_types,sprites,weight,cry_latest,cry_legacy,forms_list,first_type,second_type,stat_hp_base,stat_attack_base,stat_defense_base,stat_special_attack_base,stat_special_defense_base,stat_speed_base,stat_hp_effort,stat_attack_effort,stat_defense_effort,stat_special_attack_effort,stat_special_defense_effort,stat_speed_effort,stats_total,effort_total,first_ability,second_ability,hidden_ability,past_first_abilities,past_second_abilities,past_hidden_abilities,moves_list,species_name,is_mega,is_gmax,is_totem,is_regional,is_separated_from_species,sprite_default,pokemon_base_name,base_happiness,capture_rate,forms_switchable,gender_rate,has_gender_differences,hatch_counter,species_id,is_baby,is_legendary,is_mythical,species_order,pal_park_encounters,pokedex_numbers,color_name,growth_rate_name,habitat_name,shape_name,evolves_from_pokemon_base_name,first_egg_group,second_egg_group,species_generation_number,varieties_list,evolution_chain_id,forms_description,genus,species_name_ja_Hrkt,species_name_roomaji,species_name_ko,species_name_zh_Hant,species_name_fr,species_name_de,species_name_es,species_name_it,species_name_en,species_name_ja,species_name_zh_Hans,red_en,blue_en,yellow_en,gold_en,silver_en,crystal_en,ruby_en,sapphire_en,emerald_en,firered_en,leafgreen_en,diamond_en,pearl_en,platinum_en,heartgold_en,soulsilver_en,black_en,white_en,black_2_en,white_2_en,x_en,y_en,omega_ruby_en,alpha_sapphire_en,sun_en,moon_en,ultra_sun_en,ultra_moon_en,lets_go_pikachu_en,lets_go_eevee_en,sword_en,shield_en,legends_arceus_en,scarlet_en,violet_en,has_mega,has_gmax,has_regional,pokemon_generation_number,evolutions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1
1,64,"[{'game_index': 153, 'version': {'name': 'red'...",7,[],True,https://pokeapi.co/api/v2/pokemon/1/encounters,"[{'move': {'name': 'razor-wind', 'url': 'https...",bulbasaur,1,"[{'abilities': [{'ability': None, 'is_hidden':...",[],{'back_default': 'https://raw.githubuserconten...,69.0,https://raw.githubusercontent.com/PokeAPI/crie...,https://raw.githubusercontent.com/PokeAPI/crie...,[bulbasaur],grass,poison,45,49,49,65,65,45,0,0,0,1,0,0,318,1,overgrow,,chlorophyll,[],[],[],"[razor-wind, swords-dance, cut, bind, vine-whi...",bulbasaur,False,False,False,False,False,https://raw.githubusercontent.com/PokeAPI/spri...,bulbasaur,50,45,False,1,False,20,1,False,False,False,1,"[{'area': {'name': 'field', 'url': 'https://po...","[{'entry_number': 1, 'pokedex': {'name': 'nati...",green,medium-slow,grassland,quadruped,,monster,plant,1,[bulbasaur],1,,Seed Pokémon,フシギダネ,Fushigidane,이상해씨,妙蛙種子,Bulbizarre,Bisasam,Bulbasaur,Bulbasaur,Bulbasaur,フシギダネ,妙蛙种子,A strange seed was\nplanted on its\nback at bi...,A strange seed was\nplanted on its\nback at bi...,It can go for days\nwithout eating a\nsingle m...,The seed on its\nback is filled\nwith nutrient...,It carries a seed\non its back right\nfrom bir...,"While it is young,\nit uses the\nnutrients tha...",BULBASAUR can be seen napping in\nbright sunli...,BULBASAUR can be seen napping in\nbright sunli...,BULBASAUR can be seen napping in bright\nsunli...,There is a plant seed on its back right\nfrom ...,A strange seed was planted on its back at\nbir...,"For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...",The seed on its back is filled\nwith nutrients...,It carries a seed on its back right\nfrom birt...,"For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...",A strange seed was planted on its back at birt...,"For some time after its birth, it grows by gai...",Bulbasaur can be seen napping in bright sunlig...,Bulbasaur can be seen napping in bright sunlig...,,,,,It can go for days without eating a single mor...,It can go for days without eating a single mor...,There is a plant seed on its back right from t...,"While it is young, it uses the nutrients that ...",,,,False,False,False,1,[ivysaur]


In [26]:
#df_merge = pd.read_parquet('./silver/pokemon-join-species.parquet')

In [27]:
#df_form_clean = pd.read_parquet('./silver/pokemon-form.parquet')

In [25]:
df_full = (
    df_form_clean
    .merge(
        df_merge.drop(columns=['first_type','second_type']), 
        on='pokemon_name', how='left'
    )
    .sort_values(by='form_id')
    .set_index('form_id')
)


# arreglo de errores

# is_default
df_full.loc[:9999, 'is_default'] = True
df_full.loc[10000:, 'is_default'] = False

# sprites
lista_formas = df_merge[df_merge.forms_list.apply(len)>1].forms_list.apply(lambda values: values[1:]).explode().to_list()
lista_indices = [ df_full.loc[df_full[df_full.pokemon_form_name==x].index].index for x in lista_formas ]
lista_ids = [ int(x[0]) for x in lista_indices if len(x)>0 ]
for x in lista_ids:
    df_full.at[x, 'sprite_default'] = df_full.at[x, 'sprite_form']

#df_full = df_full.drop(columns=['sprite_form'])

# evolutions
df_full.at[10065, 'evolutions'] = []

# save
df_full.to_parquet('./silver/merged.parquet')

In [26]:
df_full.head(1)

Unnamed: 0_level_0,form_name,form_order_species,is_battle_only,pokemon_form_name,form_order_global,pokemon_name,version_group_name,first_type,second_type,form_generation_number,sprite_form,form_name_text,pokemon_form_name_text,base_experience,game_indices,height,held_items,is_default,location_area_encounters,moves,pokemon_order,past_abilities,past_types,sprites,weight,cry_latest,cry_legacy,forms_list,stat_hp_base,stat_attack_base,stat_defense_base,stat_special_attack_base,stat_special_defense_base,stat_speed_base,stat_hp_effort,stat_attack_effort,stat_defense_effort,stat_special_attack_effort,stat_special_defense_effort,stat_speed_effort,stats_total,effort_total,first_ability,second_ability,hidden_ability,past_first_abilities,past_second_abilities,past_hidden_abilities,moves_list,species_name,is_mega,is_gmax,is_totem,is_regional,is_separated_from_species,sprite_default,pokemon_base_name,base_happiness,capture_rate,forms_switchable,gender_rate,has_gender_differences,hatch_counter,species_id,is_baby,is_legendary,is_mythical,species_order,pal_park_encounters,pokedex_numbers,color_name,growth_rate_name,habitat_name,shape_name,evolves_from_pokemon_base_name,first_egg_group,second_egg_group,species_generation_number,varieties_list,evolution_chain_id,forms_description,genus,species_name_ja_Hrkt,species_name_roomaji,species_name_ko,species_name_zh_Hant,species_name_fr,species_name_de,species_name_es,species_name_it,species_name_en,species_name_ja,species_name_zh_Hans,red_en,blue_en,yellow_en,gold_en,silver_en,crystal_en,ruby_en,sapphire_en,emerald_en,firered_en,leafgreen_en,diamond_en,pearl_en,platinum_en,heartgold_en,soulsilver_en,black_en,white_en,black_2_en,white_2_en,x_en,y_en,omega_ruby_en,alpha_sapphire_en,sun_en,moon_en,ultra_sun_en,ultra_moon_en,lets_go_pikachu_en,lets_go_eevee_en,sword_en,shield_en,legends_arceus_en,scarlet_en,violet_en,has_mega,has_gmax,has_regional,pokemon_generation_number,evolutions
form_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1
1,,1,False,bulbasaur,1,bulbasaur,red-and-blue,grass,poison,1,https://raw.githubusercontent.com/PokeAPI/spri...,,,64,"[{'game_index': 153, 'version': {'name': 'red'...",7,[],True,https://pokeapi.co/api/v2/pokemon/1/encounters,"[{'move': {'name': 'razor-wind', 'url': 'https...",1,"[{'abilities': [{'ability': None, 'is_hidden':...",[],{'back_default': 'https://raw.githubuserconten...,69.0,https://raw.githubusercontent.com/PokeAPI/crie...,https://raw.githubusercontent.com/PokeAPI/crie...,[bulbasaur],45,49,49,65,65,45,0,0,0,1,0,0,318,1,overgrow,,chlorophyll,[],[],[],"[razor-wind, swords-dance, cut, bind, vine-whi...",bulbasaur,False,False,False,False,False,https://raw.githubusercontent.com/PokeAPI/spri...,bulbasaur,50,45,False,1,False,20,1,False,False,False,1,"[{'area': {'name': 'field', 'url': 'https://po...","[{'entry_number': 1, 'pokedex': {'name': 'nati...",green,medium-slow,grassland,quadruped,,monster,plant,1,[bulbasaur],1,,Seed Pokémon,フシギダネ,Fushigidane,이상해씨,妙蛙種子,Bulbizarre,Bisasam,Bulbasaur,Bulbasaur,Bulbasaur,フシギダネ,妙蛙种子,A strange seed was\nplanted on its\nback at bi...,A strange seed was\nplanted on its\nback at bi...,It can go for days\nwithout eating a\nsingle m...,The seed on its\nback is filled\nwith nutrient...,It carries a seed\non its back right\nfrom bir...,"While it is young,\nit uses the\nnutrients tha...",BULBASAUR can be seen napping in\nbright sunli...,BULBASAUR can be seen napping in\nbright sunli...,BULBASAUR can be seen napping in bright\nsunli...,There is a plant seed on its back right\nfrom ...,A strange seed was planted on its back at\nbir...,"For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...",The seed on its back is filled\nwith nutrients...,It carries a seed on its back right\nfrom birt...,"For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...","For some time after its birth, it\ngrows by ga...",A strange seed was planted on its back at birt...,"For some time after its birth, it grows by gai...",Bulbasaur can be seen napping in bright sunlig...,Bulbasaur can be seen napping in bright sunlig...,,,,,It can go for days without eating a single mor...,It can go for days without eating a single mor...,There is a plant seed on its back right from t...,"While it is young, it uses the nutrients that ...",,,,False,False,False,1,[ivysaur]


---

# CAPA GOLD

In [27]:
# pokemon (1-1025)
gold_species = (
    df_full
    .loc[1:9999,
        [
            'species_name', 'pokemon_base_name', 'pokemon_name', 'species_id',
            #'pokemon_order', 'species_order', 
            'species_generation_number', 'pokemon_generation_number',
            'base_happiness', 'base_experience',
            'height', 'weight', 
            'shape_name', 'color_name', 'growth_rate_name', 'habitat_name',
            'first_type', 'second_type',
            'first_ability', 'second_ability', 'hidden_ability',
            #'past_first_abilities', 'past_second_abilities', 'past_hidden_abilities',
            'moves_list', 
            'varieties_list', 'forms_list', 
            'forms_description', 
            'stat_hp_base','stat_attack_base','stat_defense_base','stat_special_attack_base','stat_special_defense_base','stat_speed_base',
            'stat_hp_effort','stat_attack_effort','stat_defense_effort','stat_special_attack_effort','stat_special_defense_effort','stat_speed_effort',
            'stats_total', 'effort_total',
            #'is_mega', 'is_gmax', 'is_totem', 'is_regional', 'is_separated_from_species',
            'sprite_default', 
            #'forms_switchable', 
            'capture_rate',
            'gender_rate', 'has_gender_differences', 
            'hatch_counter', 'first_egg_group', 'second_egg_group', 
            'is_baby', 'is_legendary', 'is_mythical', 
                    
            #'species_name_ja_Hrkt', 'species_name_roomaji', 'species_name_ko',
            #'species_name_zh_Hant', 'species_name_fr', 'species_name_de',
            #'species_name_es', 'species_name_it', 'species_name_en',
            #'species_name_ja', 'species_name_zh_Hans',
            
            'genus', 
            #'genus_en', 'genus_ja_Hrkt','genus_ko', 'genus_zh_Hant', 'genus_fr', 'genus_de', 'genus_es','genus_it', 'genus_ja', 'genus_zh_Hans', 
            
            'red_en', 'blue_en', 'yellow_en', 
            'gold_en', 'silver_en', 'crystal_en',
            'ruby_en', 'sapphire_en', 'emerald_en', 'firered_en', 'leafgreen_en',
            'diamond_en', 'pearl_en', 'platinum_en', 'heartgold_en', 'soulsilver_en', 
            'black_en', 'white_en', 'black_2_en', 'white_2_en',
            'x_en', 'y_en', 'omega_ruby_en', 'alpha_sapphire_en', 
            'sun_en', 'moon_en', 'ultra_sun_en', 'ultra_moon_en', 'lets_go_pikachu_en', 'lets_go_eevee_en', 
            'sword_en', 'shield_en', 'legends_arceus_en', 
            'scarlet_en', 'violet_en', 
            
            'has_mega', 'has_gmax', 'has_regional', 
            'evolves_from_pokemon_base_name', 'evolutions'
        ]
    ]
)

# save
gold_species.to_parquet('./gold/pokemon-species.parquet')
#gold_species.to_parquet('./streamlit/data/pokemon-species.parquet')

In [28]:
# pokemon (1-1025 & all forms)
gold_list = (
    df_full
    .loc[:,
        [
            'pokemon_name', 'form_name',
            'first_type', 'second_type',
            'first_ability', 'second_ability', 'hidden_ability',
            
            'pokemon_base_name','pokemon_generation_number',
            'height','weight',
            'color_name', 
            'base_experience',
            'stat_hp_base','stat_attack_base','stat_defense_base','stat_special_attack_base','stat_special_defense_base','stat_speed_base',
            'stat_hp_effort','stat_attack_effort','stat_defense_effort','stat_special_attack_effort','stat_special_defense_effort','stat_speed_effort',
            'stats_total', 'effort_total',
            'evolves_from_pokemon_base_name', 'evolutions',
            'is_default', 
            'is_mega', 'is_gmax', 'is_regional', 'is_totem', 'is_separated_from_species',
            'is_battle_only',
            
            'species_name','species_generation_number',
            'shape_name',
            'base_happiness', 'capture_rate', 'growth_rate_name',
            'hatch_counter', 'first_egg_group', 'second_egg_group', 
            'gender_rate', 'has_gender_differences', 
            'is_baby', 'is_legendary', 'is_mythical', 
            'has_mega', 'has_gmax', 'has_regional', 
            
            'form_generation_number', 'version_group_name',
            'form_name_text','pokemon_form_name','pokemon_form_name_text', 
            
            'sprite_default',
        ]
    ]
)

# save
gold_list.to_parquet('./gold/pokemon-forms.parquet')
#gold_list.to_parquet('./streamlit/data/pokemon-forms.parquet')

In [30]:
gold_list.head(1)

Unnamed: 0_level_0,pokemon_name,form_name,first_type,second_type,first_ability,second_ability,hidden_ability,pokemon_base_name,pokemon_generation_number,height,weight,color_name,base_experience,stat_hp_base,stat_attack_base,stat_defense_base,stat_special_attack_base,stat_special_defense_base,stat_speed_base,stat_hp_effort,stat_attack_effort,stat_defense_effort,stat_special_attack_effort,stat_special_defense_effort,stat_speed_effort,stats_total,effort_total,evolves_from_pokemon_base_name,evolutions,is_default,is_mega,is_gmax,is_regional,is_totem,is_separated_from_species,is_battle_only,species_name,species_generation_number,shape_name,base_happiness,capture_rate,growth_rate_name,hatch_counter,first_egg_group,second_egg_group,gender_rate,has_gender_differences,is_baby,is_legendary,is_mythical,has_mega,has_gmax,has_regional,form_generation_number,version_group_name,form_name_text,pokemon_form_name,pokemon_form_name_text,sprite_default
form_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
1,bulbasaur,,grass,poison,overgrow,,chlorophyll,bulbasaur,1,7,69.0,green,64,45,49,49,65,65,45,0,0,0,1,0,0,318,1,,[ivysaur],True,False,False,False,False,False,False,bulbasaur,1,quadruped,50,45,medium-slow,20,monster,plant,1,False,False,False,False,False,False,False,1,red-and-blue,,bulbasaur,,https://raw.githubusercontent.com/PokeAPI/spri...


In [None]:
# test

In [29]:
#gold_list = pd.read_parquet('./streamlit/data/pokemon-forms.parquet')

In [33]:
# (df_merge.is_default) | (df_merge.is_mega) | (df_merge.is_gmax) | (df_merge.is_regional & ~df_merge.is_totem) | (df_merge.pokemon_name=='eternatus-eternamax')