In [11]:
import pandas as pd
import numpy as np

evolve_df = pd.read_csv('data-processed/pokemon_evolve.csv')
evolve_df.head()

word_mapping = {'evolve': 1, 'evolved': 2, 'evolving': 3}
evolve_df['evolve_word_code'] = evolve_df['evolve_word_form'].map(word_mapping)

In [17]:
# Read the CSV file into a pandas DataFrame
columns_to_read = ['Dex_No', 'Name', 'Stage', 'Max_Stage', 'Is_Final_Stage', 'Candy_Reqs', 'Evolution_Line', 'Generation', 'Is_Legendary']
pokemon_metadata = pd.read_csv("pokemon_list.csv", usecols = columns_to_read)
pokemon_metadata.columns = map(str.lower, pokemon_metadata.columns)

pokemon_metadata['name'] = pokemon_metadata['name'].str.lower()
pokemon_metadata['evolution_line'] = pokemon_metadata['evolution_line'].str.lower()
pokemon_metadata['is_legendary'] = pokemon_metadata['is_legendary'].astype('int64')

# Assign number to the EvolutionLine based on DexNo of base pokemon
dex_mapping = dict(zip(pokemon_metadata['name'], pokemon_metadata['dex_no']))
pokemon_metadata['evolution_line_no'] = pokemon_metadata['evolution_line'].map(dex_mapping)

pokemon_metadata.dtypes

dex_no                int64
name                 object
stage                 int64
max_stage             int64
is_final_stage        int64
candy_reqs            int64
evolution_line       object
generation            int64
is_legendary          int64
evolution_line_no     int64
dtype: object

In [19]:
evolve_pokemon_df = pd.merge(evolve_df, pokemon_metadata, left_on='pokemon_lemma', right_on='name').drop('name', axis = 1)
evolve_pokemon_df.head()

Unnamed: 0,pokemon_lemma,evolve_word_form,evolve_frame,pokemon_name_in_text,sentence,evolve_word_code,dex_no,stage,max_stage,is_final_stage,candy_reqs,evolution_line,generation,is_legendary,evolution_line_no
0,gyarados,evolved,"['EVOLVE', '7', 'POKEMON']",garados,"['i', ""'ve"", 'evolved', '7', 'garados', ',', '...",2,130,2,2,1,400,magikarp,1,0,129
1,dratini,evolve,"['EVOLVE', 'my', 'very', 'first', 'POKEMON']",dratini,"['i', ""'m"", 'about', 'to', 'evolve', 'my', 've...",1,147,1,3,0,0,dratini,1,0,147
2,dragonite,evolve,"['EVOLVE', 'a', 'second', 'POKEMON']",dragonite,"['i', 'had', 'been', 'waiting', 'forever', 'fo...",1,149,3,3,1,100,dratini,1,0,147
3,dragonite,evolved,"['EVOLVE', 'my', 'very', 'first', 'POKEMON']",dragonite,"['i', 'just', 'evolved', 'my', 'very', 'first'...",2,149,3,3,1,100,dratini,1,0,147
4,geodude,evolve,"['EVOLVE', 'a', '35', 'POKEMON']",geodude,"['ivs', 'matter', ',', 'sure', ',', 'but', 'th...",1,74,1,3,0,0,geodude,1,0,74


In [20]:
# Most common pokemon:
evolve_pokemon_df['pokemon_lemma'].value_counts().head(15)

pokemon_lemma
eevee        522
pidgey       478
magikarp     258
gyarados     144
dragonite    108
pikachu       98
pidgeotto     80
dratini       71
vaporeon      57
hypno         53
jolteon       48
dragonair     47
weedle        44
squirtle      44
espeon        43
Name: count, dtype: int64

In [10]:
# Most common evolution lines:
evolve_pokemon_df['evolution_line'].value_counts(normalize=True).head(15)

evolution_line
eevee         0.201738
pidgey        0.160467
magikarp      0.109150
dratini       0.061363
pikachu       0.035026
charmander    0.025523
drowzee       0.025523
squirtle      0.022536
zubat         0.017377
weedle        0.017377
larvitar      0.014662
bulbasaur     0.014662
gastly        0.014662
abra          0.012761
machop        0.011947
Name: proportion, dtype: float64

In [6]:
# Most common evolve syntactic frames:
evolve_pokemon_df['evolve_frame'].value_counts().head(15)

evolve_frame
['EVOLVE', 'POKEMON']                        604
['EVOLVE', 'a', 'POKEMON']                   538
['EVOLVE', 'my', 'POKEMON']                  287
['EVOLVE', 'an', 'POKEMON']                  133
['EVOLVE', 'my', 'first', 'POKEMON']          69
['EVOLVE', 'the', 'POKEMON']                  66
['EVOLVE', 'your', 'POKEMON']                 59
['EVOLVE', '4', 'POKEMON']                    35
['EVOLVE', '2', 'POKEMON']                    34
['EVOLVE', 'two', 'POKEMON']                  30
['EVOLVE', '3', 'POKEMON']                    29
['EVOLVE', 'another', 'POKEMON']              29
['EVOLVE', 'a', 'bunch', 'of', 'POKEMON']     25
['EVOLVE', 'one', 'POKEMON']                  25
['EVOLVE', 'that', 'POKEMON']                 19
Name: count, dtype: int64

In [7]:
evolve_pokemon_df.groupby('evolve_word_form')['pokemon_lemma'].value_counts()

evolve_word_form  pokemon_lemma
evolve            pidgey           267
                  eevee            214
                  magikarp         133
                  pikachu           58
                  dragonite         54
                                  ... 
evolving          tyrogue            1
                  venusaur           1
                  wailmer            1
                  wartortle          1
                  weepinbell         1
Name: count, Length: 496, dtype: int64

In [8]:
evolve_pokemon_df.loc[np.where(evolve_pokemon_df['max_stage'] == 1)].head(5)

Unnamed: 0,pokemon_lemma,evolve_word_form,evolve_frame,pokemon_name_in_text,sentence,evolve_word_code,dex_no,stage,max_stage,is_final_stage,candy_reqs,evolution_line,generation,is_legendary,evolution_line_no
42,mew,evolve,"['EVOLVE', 'your', 'POKEMON']",mew,"['i', 'seem', 'to', 'be', 'getting', 'more', '...",1,151,0,1,5,0,mew,1,0,151
185,celebi,evolve,"['EVOLVE', 'per', 'battleshiny', 'POKEMON']",celebi,"['100', 'candy', 'a', 'mega', 'evolve', 'per',...",1,251,0,1,5,0,celebi,2,0,251
295,absol,evolve,"['EVOLVE', 'itmawile', 'and', 'POKEMON']",absol,"['party', 'hat', 'pichu', 'also.why', 'is', 'a...",1,359,0,1,5,0,absol,3,0,359
312,luvdisc,evolved,"['EVOLVE', 'form', 'of', 'POKEMON']",luvdisc,"['dunno', 'that', 'much', 'lore', 'though', '....",2,370,0,1,5,0,luvdisc,3,0,370
568,mew,evolve,"['EVOLVE', 'POKEMON']",mew,"['so', 'is', 'there', 'a', 'way', 'to', 'evolv...",1,151,0,1,5,0,mew,1,0,151


In [9]:
evolve_pokemon_df.loc[(np.where((evolve_pokemon_df['max_stage'] == 2) & (evolve_pokemon_df['is_final_stage'] == 1)))].head(5)

Unnamed: 0,pokemon_lemma,evolve_word_form,evolve_frame,pokemon_name_in_text,sentence,evolve_word_code,dex_no,stage,max_stage,is_final_stage,candy_reqs,evolution_line,generation,is_legendary,evolution_line_no
0,gyarados,evolved,"['EVOLVE', '7', 'POKEMON']",garados,"['i', ""'ve"", 'evolved', '7', 'garados', ',', '...",2,130,2,2,1,400,magikarp,1,0,130
7,jolteon,evolve,"['EVOLVE', 'more', 'than', 'one', 'POKEMON']",jolteon,"['nope', ',', 'it', 'only', 'works', 'once', '...",1,135,2,2,1,25,eevee,1,0,471
9,vaporeon,evolved,"['EVOLVE', 'POKEMON']",vaporeon,"['[', 'just', 'evolved', 'vaporeon', ',', '100...",2,134,2,2,1,25,eevee,1,0,471
14,umbreon,evolve,"['EVOLVE', 'your', 'POKEMON']",umbreon,"['so', 'you', 'can', 'evolve', 'your', 'umbreo...",1,197,2,2,1,25,eevee,2,0,471
23,gyarados,evolved,"['EVOLVE', 'my', 'fifth', 'POKEMON']",gyarados,"['!', 'bye', 'bye', 'bite', 'on', 'all', 'of',...",2,130,2,2,1,400,magikarp,1,0,130
