In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import os
import re
from scipy.stats import linregress

In [None]:
file_to_load = os.path.join('..','Resources','pokemon.csv')
pokemon_df = pd.read_csv(file_to_load)
pokemon_df.head()

In [None]:
pokemon_df.columns

In [None]:
pokemon_df.isnull().sum()

In [None]:
pokemon_df[pokemon_df['weight_kg'].isna()]

In [None]:
len(pokemon_df)

In [None]:
# number of null values in this column makes up 2.5% of the dataset, and therefore is worth the time cleaning
100 * len(pokemon_df[pokemon_df['weight_kg'].isna()]) / len(pokemon_df)

In [None]:
pokemon_df[pokemon_df['height_m'].isna()]

In [None]:
pokemon_df[pokemon_df['percentage_male'].isna()]

In [None]:
pokemon_df.columns.get_loc('percentage_male')

In [None]:
# # removed percentage_male column; will undo this in future
# clean_pokemon_df = pokemon_df.loc[:,pokemon_df.columns != 'percentage_male']
# 'percentage_male' in pokemon_df.columns


In [None]:
# check that pokemon with no height and weight values are the same
pokemon_df[pokemon_df['height_m'].isna()]['name'] == pokemon_df[pokemon_df['weight_kg'].isna()]['name']

In [None]:
# added all of the weights manually
pokemon_df.loc[18,'height_m'] = 0.3 
pokemon_df.loc[18,'weight_kg'] = 3.5 

In [None]:
pokemon_df.loc[18,'height_m']

In [None]:
pokemon_df.loc[19,'height_m'] = 0.7 
pokemon_df.loc[19,'weight_kg'] = 18.5

In [None]:
pokemon_df.loc[25,'height_m'] = 0.8 
pokemon_df.loc[25,'weight_kg'] = 30.0

In [None]:
pokemon_df.loc[26,'height_m'] = 0.6 
pokemon_df.loc[26,'weight_kg'] = 12.0

In [None]:
pokemon_df.loc[27,'height_m'] = 1.0 
pokemon_df.loc[27,'weight_kg'] = 29.5

In [None]:
pokemon_df.loc[36,'height_m'] = 0.6
pokemon_df.loc[36,'weight_kg'] = 9.9

In [None]:
pokemon_df.loc[37,'height_m'] = 1.1
pokemon_df.loc[37,'weight_kg'] = 19.9

In [None]:
pokemon_df.loc[49,'height_m'] = 0.2 
pokemon_df.loc[49,'weight_kg'] = 0.8

In [None]:
pokemon_df.loc[50,'height_m'] = 0.7
pokemon_df.loc[50,'weight_kg'] = 33.3

In [None]:
pokemon_df.loc[51,'height_m'] = 0.4
pokemon_df.loc[51,'weight_kg'] = 4.2

In [None]:
pokemon_df.loc[52,'height_m'] = 1.0
pokemon_df.loc[52,'weight_kg'] = 32.0

In [None]:
pokemon_df.loc[73,'height_m'] = 0.4
pokemon_df.loc[73,'weight_kg'] = 20.0

In [None]:
pokemon_df.loc[74,'height_m'] = 1.0
pokemon_df.loc[74,'weight_kg'] = 105.0

In [None]:
pokemon_df.loc[75,'height_m'] = 1.4
pokemon_df.loc[75,'weight_kg'] = 300.0

In [None]:
pokemon_df.loc[87,'height_m'] = 0.9
pokemon_df.loc[87,'weight_kg'] = 30.0

In [None]:
pokemon_df.loc[88,'height_m'] = 1.2
pokemon_df.loc[88,'weight_kg'] = 30.0

In [None]:
pokemon_df.loc[102,'height_m'] = 2.0
pokemon_df.loc[102,'weight_kg'] = 120.0

In [None]:
pokemon_df.loc[104,'height_m'] = 1.0
pokemon_df.loc[104,'weight_kg'] = 45.0

In [None]:
pokemon_df.loc[719,'height_m'] = 0.5
pokemon_df.loc[719,'weight_kg'] = 9.0

In [None]:
pokemon_df.loc[744,'height_m'] = 0.9
pokemon_df.loc[744,'weight_kg'] = 25.0

In [None]:
# data is clean
pokemon_df.isnull().sum()

In [None]:
# pokemon_df[['percentage_male', 'percentage_female']]

In [None]:
pokemon_df['non-binary'] = 0
pokemon_df.head(2)

In [None]:
pokemon_df.loc[pokemon_df['percentage_male'].isna()] = pokemon_df.loc[pokemon_df['percentage_male'].isna()].replace(np.nan, 0)

In [None]:
pokemon_df['percentage_female'] = 100 - pokemon_df['percentage_male'] 
pokemon_df['percentage_female'] 

In [None]:
# pokemon_df.loc[pokemon_df['percentage_male'].isna()] = pokemon_df.loc[pokemon_df['percentage_male'].isna()].replace(False, True)

In [None]:
pokemon_df.iloc[490:493, 10:]

In [None]:
pokemon_df.isna().sum()

In [None]:
# clean_pokemon_df = clean_pokemon_df.replace(np.nan, 'none')
pokemon_df.loc[:,'percentage_male'] = pokemon_df.loc[:,'percentage_male'].replace(np.nan, 0)
pokemon_df.isnull().sum()

In [None]:
# cleaned values in row 773
clean_pokemon_df.iloc[773,23] = '30'
clean_pokemon_df.iloc[773,23]

In [None]:
clean_pokemon_df=clean_pokemon_df.astype({'capture_rate':int})
clean_pokemon_df.dtypes

In [None]:
clean_pokemon_df.isnull().sum()

In [None]:
(slope, intercept, r_value, p_value, std_err) = linregress(clean_pokemon_df['weight_kg'].values.tolist(), clean_pokemon_df['speed'].values.tolist())
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = [(weight * slope + intercept) for weight in clean_pokemon_df['weight_kg'].values.tolist()]
plt.scatter(clean_pokemon_df['weight_kg'],clean_pokemon_df['speed'], s = 0.6, alpha = 0.8)
plt.plot(clean_pokemon_df['weight_kg'].values.tolist(),regress_values,"r")
plt.annotate(line_eq, (10,40), fontsize=15, color="red")
print(f"r value: {r_value}")
plt.show()

In [None]:
(slope, intercept, r_value, p_value, std_err) = linregress(clean_pokemon_df['height_m'].values.tolist(), clean_pokemon_df['weight_kg'].values.tolist())
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = [(height * slope + intercept) for height in clean_pokemon_df['height_m'].values.tolist()]
plt.scatter(clean_pokemon_df['height_m'],clean_pokemon_df['weight_kg'], s = 0.6, alpha = 0.8)
plt.plot(clean_pokemon_df['height_m'].values.tolist(),regress_values,"r")
plt.annotate(line_eq, (10,40), fontsize=15, color="red")
print(f"r value: {r_value}")
plt.show()

In [None]:
(slope, intercept, r_value, p_value, std_err) = linregress(clean_pokemon_df['attack'].values.tolist(), clean_pokemon_df['sp_attack'].values.tolist())
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = [(a * slope + intercept) for a in clean_pokemon_df['attack'].values.tolist()]
plt.scatter(clean_pokemon_df['attack'],clean_pokemon_df['sp_attack'], s = 0.6, alpha = 0.8)
plt.plot(clean_pokemon_df['attack'].values.tolist(),regress_values,"r")
plt.annotate(line_eq, (10,40), fontsize=15, color="red")
print(f"r value: {r_value}")
plt.show()

In [None]:
(slope, intercept, r_value, p_value, std_err) = linregress(clean_pokemon_df['attack'].values.tolist(), clean_pokemon_df['defense'].values.tolist())
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = [(a * slope + intercept) for a in clean_pokemon_df['attack'].values.tolist()]
plt.scatter(clean_pokemon_df['attack'],clean_pokemon_df['defense'], s = 0.6, alpha = 0.8)
plt.plot(clean_pokemon_df['attack'].values.tolist(),regress_values,"r")
plt.annotate(line_eq, (10,40), fontsize=15, color="red")
print(f"r value: {r_value}")
plt.show()

In [None]:
(slope, intercept, r_value, p_value, std_err) = linregress(clean_pokemon_df['defense'].values.tolist(), clean_pokemon_df['sp_defense'].values.tolist())
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = [(a * slope + intercept) for a in clean_pokemon_df['defense'].values.tolist()]
plt.scatter(clean_pokemon_df['defense'],clean_pokemon_df['sp_defense'], s = 0.6, alpha = 0.8)
plt.plot(clean_pokemon_df['defense'].values.tolist(),regress_values,"r")
plt.annotate(line_eq, (10,40), fontsize=15, color="red")
print(f"r value: {r_value}")
plt.show()

In [None]:
plt.scatter(clean_pokemon_df['defense'],clean_pokemon_df['attack'] / clean_pokemon_df['sp_attack'] ,s = 0.6, alpha = 0.8 )

In [None]:
plt.scatter(clean_pokemon_df['base_total'],clean_pokemon_df['attack'] / clean_pokemon_df['sp_attack'] ,s = 0.6, alpha = 0.8 )

In [None]:
happiness = clean_pokemon_df.groupby('is_legendary').mean()['base_happiness']
happiness

In [None]:
happiness[0]

In [None]:
plt.bar(['not legendary', 'legendary'],[happiness[0], happiness[1]])

In [None]:
base_tot = clean_pokemon_df.groupby('is_legendary').mean()['base_total']
plt.bar(['not legendary', 'legendary'],[base_tot[0], base_tot[1]])

In [None]:
capt_rate = clean_pokemon_df.groupby('is_legendary').mean()['capture_rate']
plt.bar(['not legendary', 'legendary'],[capt_rate[0], capt_rate[1]])

In [None]:
generations = clean_pokemon_df.groupby('generation').count()
generations

In [None]:
generations_list = generations.index.tolist()

In [None]:
generations['name']

In [None]:
plt.bar(generations_list, generations['name'])

In [None]:
plt.scatter(generations_list, generations['name'])

In [None]:
clean_pokemon_df

In [None]:
clean_pokemon_df.corr().iloc[:,18:]

In [None]:
rarity_dict = {0:'blue', 1:'red'}
plt.scatter(clean_pokemon_df['hp'],clean_pokemon_df['attack'], c = clean_pokemon_df['is_legendary'].map(rarity_dict), alpha=0.5)

In [None]:
typing_info = clean_pokemon_df.groupby('type1').mean().iloc[:,18:]
typing_info

In [None]:
types = typing_info.index.tolist()
types

In [None]:
plt.bar(types,typing_info['defense'])
plt.xticks(rotation = 45)

In [None]:
typing_info

In [None]:
count_type = clean_pokemon_df.groupby('type1').count()
count_type

In [None]:
total = count_type['name'].sum()

In [None]:
type_pct = round(count_type['name'] / total * 100,1)
type_pct

In [None]:
type_pct.values.tolist()

In [None]:
plt.bar(type_pct.index.tolist(), type_pct.values.tolist())
plt.xticks(rotation = 60)

In [None]:
df = clean_pokemon_df.copy()

In [None]:
df.groupby(['type1','type2']).count().head(30)

In [None]:
clean_pokemon_df[(clean_pokemon_df['type1'] == 'fire') | (clean_pokemon_df['type2'] == 'fire')]

In [None]:
types = count_type.index.tolist()
types

In [None]:
type_dict = dict()
for _type in types:
    type_dict[_type] = clean_pokemon_df[(clean_pokemon_df['type1'] == _type) | (clean_pokemon_df['type2'] == _type)]

In [None]:
len(type_dict['bug'])

In [None]:
types

In [None]:
num_list = []
for _type in types:
    length = len(type_dict[_type])
    num_list.append(length)
num_list

In [None]:
pct_list = num_list / total * 100
pct_list.tolist()

In [None]:
plt.bar(types, pct_list.tolist())
plt.xticks(rotation = 65)

In [None]:
clean_pokemon_df.base_total

In [None]:
type_dict['bug']['base_total'].mean()

In [None]:
types_base_total = list()
for _type in types:
    _mean = type_dict[_type]['base_total'].mean()
    types_base_total.append(round(_mean,0))

plt.bar(types, types_base_total)
plt.xticks(rotation = 65)
plt.ylim(300, 520)

In [None]:
types_attack = list()
for _type in types:
    _mean = type_dict[_type]['attack'].mean()
    types_attack.append(round(_mean,0))

plt.bar(types, types_attack)
plt.xticks(rotation = 65)
plt.ylim(40,110)

In [None]:
types_weight = list()
for _type in types:
    _mean = type_dict[_type]['weight_kg'].mean()
    types_weight.append(round(_mean,0))

plt.bar(types, types_weight)
plt.xticks(rotation = 65)

In [None]:
types_height = list()
for _type in types:
    _mean = type_dict[_type]['height_m'].mean()
    types_height.append(_mean)

plt.bar(types, types_height)
plt.xticks(rotation = 65)

In [None]:
clean_pokemon_df

In [None]:
the_list = clean_pokemon_df.abilities.values
the_list

In [None]:
counter = 1
abilities_list = []
for l in the_list:
    abilities = l.split(',')
    for ability in abilities:
        match = re.search(pattern=r'[A-Z][a-z]+', string = ability)
        if match is not None:
            print(match.group())
            if match.group() not in abilities_list:
                abilities_list.append(match.group())
        else:
            print(match)
        

In [None]:
abilities_list

In [None]:
clean_pokemon_df.head(20)

In [None]:
df['abilities']=df['abilities'].str.strip('\"[')

In [None]:
df['abilities'] = df['abilities'].str.strip(']\"')

In [None]:
df['abilities'] = df['abilities'].str.split(',')

In [None]:
df['abilities']

In [None]:
df['ability1'] = None
df['ability2'] = None
df['ability3'] = None
df['ability4'] = None
df['ability5'] = None
df['ability6'] = None

In [None]:
index = 0
maximum = 0
all_list = []
for row in df.iterrows():
    abilities = row[1][0]
    if len(abilities) > maximum:
        maximum = len(abilities)
    type_counter = 1
    for ability in abilities:
        all_list.append(ability)
        if type_counter == 1:
            df.iloc[index, 40] = ability
        elif type_counter == 2:
            df.iloc[index, 41] = ability
        elif type_counter == 3:
            df.iloc[index, 42] = ability
        elif type_counter == 4:
            df.iloc[index, 43] = ability
        elif type_counter == 5:
            df.iloc[index, 44] = ability
        else:
            df.iloc[index, 45] = ability
        type_counter += 1
#         print(index, type_counter, ability)
    index += 1
print(maximum)

In [None]:
all_list

In [None]:
df.head(20)

In [None]:
against = df.columns.tolist()[1:19]

In [None]:
df['against_sum'] = 0
total = 0
total = df[against].sum()

In [None]:
types_dict = {}
for i,j in list(enumerate(df[against].iterrows())):
#     print(sum(j[1].values.tolist()))
    types_dict[i] = sum(j[1].values.tolist())
#     print(re.match(pattern = '[+-]?([0-9]*[.])?[0-9]+', string= j[1].str))
types_dict

In [None]:
pd.Series(types_dict)

In [None]:
df['against_sum'] = pd.Series(types_dict)

In [None]:
df.head()

In [None]:
df['against_sum'].min()
df['against_sum'].loc[df['against_sum'] == df['against_sum'].min()]

In [None]:
df.iloc[[302,706,800]].iloc[:,31:]

In [None]:
df

In [None]:
generation_means = df.groupby('generation').mean().iloc[:,20:]

In [None]:
plt.plot(generation_means.index, generation_means['is_legendary'])

In [None]:
plt.plot(generation_means.index, generation_means['base_happiness'])

In [None]:
plt.plot(generation_means.index, generation_means['against_sum'])

In [None]:
plt.plot(generation_means.index, generation_means['height_m'])

In [None]:
plt.plot(generation_means.index, generation_means['weight_kg'])

In [None]:
df.head()

In [None]:
all_list

In [None]:
all_abilities = []
[all_abilities.append(i.replace("'",'')) for i in [a.strip() for a in all_list]]

In [None]:
all_abilities

In [None]:
abilities_df = pd.DataFrame(all_abilities, columns = ['Ability'])

In [None]:
abilities_df['Count'] = 1

In [None]:
num_abilities = len(abilities_df)
tot_abilities = abilities_df.groupby('Ability').count().sum()

In [None]:
abl_stat = abilities_df.groupby('Ability').count() 
abl_stat['Percent'] = round(abilities_df.groupby('Ability').count() / tot_abilities * 100,2)

In [None]:
abl_stat

In [None]:
abl_stat = abl_stat.sort_values(by='Count', ascending=False)

In [None]:
abl_stat.head(10)

In [None]:
each_ability = []
for i in all_abilities:
    if i not in each_ability:
        each_ability.append(i)
each_ability

In [None]:
ability_dict = {}
for a in each_ability:
    d = df[(df['ability1'].str.strip('\'') == a) | (df['ability2'].str.strip('\'') == a) | (df['ability3'].str.strip('\'') == a) | (df['ability4'].str.strip('\'') == a) | (df['ability5'].str.strip('\'') == a) | (df['ability6'].str.strip('\'') == a)]
    n = len(d)
    ability_dict[a] = n

In [None]:
ability_dict

In [None]:
df.against_sum.max()

In [None]:
df.loc[df['against_sum'] == 26.0][['type1','type2']]

In [None]:
df.columns

In [None]:
df['capture_rate'].iloc[773] = '30'

In [None]:
df['capture_rate'] = pd.to_numeric(df['capture_rate'])

In [None]:
df.groupby('generation').mean()['capture_rate'].plot()

In [None]:
dragon = df[(df['type1'] == 'dragon') | (df['type2'] == 'dragon')]
dark = df[(df['type1'] == 'dark') | (df['type2'] == 'dark')]
bug = df[(df['type1'] == 'bug') | (df['type2'] == 'bug')]
electric = df[(df['type1'] == 'electric') | (df['type2'] == 'electric')]
fairy = df[(df['type1'] == 'fairy') | (df['type2'] == 'fairy')]
fighting = df[(df['type1'] == 'fighting') | (df['type2'] == 'fighting')]
fire = df[(df['type1'] == 'fire') | (df['type2'] == 'fire')]
flying = df[(df['type1'] == 'flying') | (df['type2'] == 'flying')]
ghost = df[(df['type1'] == 'ghost') | (df['type2'] == 'ghost')]
grass = df[(df['type1'] == 'grass') | (df['type2'] == 'grass')]
ground = df[(df['type1'] == 'ground') | (df['type2'] == 'ground')]
ice = df[(df['type1'] == 'ice') | (df['type2'] == 'ice')]
normal = df[(df['type1'] == 'normal') | (df['type2'] == 'normal')]
poison= df[(df['type1'] == 'poison') | (df['type2'] == 'poison')]
psychic = df[(df['type1'] == 'psychic') | (df['type2'] == 'psychic')]
rock = df[(df['type1'] == 'rock') | (df['type2'] == 'rock')]
steel = df[(df['type1'] == 'steel') | (df['type2'] == 'steel')]
water = df[(df['type1'] == 'water') | (df['type2'] == 'water')]

In [None]:
cpt_rt = [dragon['capture_rate'].mean(),
dark['capture_rate'].mean(),
bug['capture_rate'].mean(),
electric['capture_rate'].mean(),
fairy['capture_rate'].mean(),
fighting['capture_rate'].mean(),
fire['capture_rate'].mean(),
flying['capture_rate'].mean(),
ghost['capture_rate'].mean(),
grass['capture_rate'].mean(),
ground['capture_rate'].mean(),
ice['capture_rate'].mean(),
normal['capture_rate'].mean(),
poison['capture_rate'].mean(),
psychic['capture_rate'].mean(),
rock['capture_rate'].mean(),
steel['capture_rate'].mean(),
water['capture_rate'].mean()]

In [None]:
cpt_rt

In [None]:
for r in cpt_rt:
    index = cpt_rt.index(r)
    cpt_rt[index] = int(round(r,0))
cpt_rt

In [None]:
len(cpt_rt)


In [None]:
reordered_types = types
reordered_types[0] = 'dragon'
reordered_types[2] = 'bug'
plt.bar(reordered_types, cpt_rt)
plt.xticks(rotation = 60)
# plt.ylim(60,130)

In [None]:
len(reordered_types)

In [None]:
base_t = [dragon['base_total'].mean(),
dark['base_total'].mean(),
bug['base_total'].mean(),
electric['base_total'].mean(),
fairy['base_total'].mean(),
fighting['base_total'].mean(),
fire['base_total'].mean(),
flying['base_total'].mean(),
ghost['base_total'].mean(),
grass['base_total'].mean(),
ground['base_total'].mean(),
ice['base_total'].mean(),
normal['base_total'].mean(),
poison['base_total'].mean(),
psychic['base_total'].mean(),
rock['base_total'].mean(),
steel['base_total'].mean(),
water['base_total'].mean()]

In [None]:
for r in base_t:
    index = base_t.index(r)
    base_t[index] = int(round(r,0))
base_t

In [None]:
plt.bar(reordered_types,base_t)
plt.xticks(rotation = 60)
plt.ylim(350,530)

In [None]:
df_list = [
    dragon,
    dark,
    bug,
    electric,
    fairy,
    fighting,
    fire,
    flying,
    ghost,
    grass,
    ground,
    ice,
    normal,
    poison,
    psychic,
    rock,
    steel,
    water 
]

len(df_list)

In [None]:
base_h = [dragon['base_happiness'].mean(),
dark['base_happiness'].mean(),
bug['base_happiness'].mean(),
electric['base_happiness'].mean(),
fairy['base_happiness'].mean(),
fighting['base_happiness'].mean(),
fire['base_happiness'].mean(),
flying['base_happiness'].mean(),
ghost['base_happiness'].mean(),
grass['base_happiness'].mean(),
ground['base_happiness'].mean(),
ice['base_happiness'].mean(),
normal['base_happiness'].mean(),
poison['base_happiness'].mean(),
psychic['base_happiness'].mean(),
rock['base_happiness'].mean(),
steel['base_happiness'].mean(),
water['base_happiness'].mean()]


for r in base_h:
    index = base_h.index(r)
    base_h[index] = int(round(r,0))


plt.bar(reordered_types,base_h)
plt.xticks(rotation = 60)
plt.ylim(40,75)
plt.ylabel('Happiness')

In [None]:
base_s = [dragon['speed'].mean(),
dark['speed'].mean(),
bug['speed'].mean(),
electric['speed'].mean(),
fairy['speed'].mean(),
fighting['speed'].mean(),
fire['speed'].mean(),
flying['speed'].mean(),
ghost['speed'].mean(),
grass['speed'].mean(),
ground['speed'].mean(),
ice['speed'].mean(),
normal['speed'].mean(),
poison['speed'].mean(),
psychic['speed'].mean(),
rock['speed'].mean(),
steel['speed'].mean(),
water['speed'].mean()]


for r in base_s:
    index = base_s.index(r)
    base_s[index] = int(round(r,0))


plt.bar(reordered_types,base_s)
plt.xticks(rotation = 60)
plt.ylim(50,85)
plt.ylabel('Speed')

In [None]:
dragon.base_total.plot.hist(bins=8)

In [None]:
df.base_total.plot.hist(bins=10)

In [None]:
df.attack.plot.hist(bins=10)

In [None]:
df.defense.plot.hist(bins=10)

In [None]:
df.capture_rate.plot.hist(bins=10)

In [None]:
df.weight_kg.plot.hist(bins=100)

In [None]:
dragon.base_happiness.plot.hist(bins=5)

In [None]:
df.height_m.plot.hist(bins=100)

In [None]:
df['weight_kg'].max()

In [None]:
df.loc[df['weight_kg'] == 999.9]['name']

In [None]:
df['base_happiness'].max()

In [None]:
df.loc[df['base_happiness'] == 140]['name']

In [None]:
df.attack.max()

In [None]:
df[df.attack == 185]['name']

In [None]:

df[df.speed == (df.speed.max())]['name']

In [None]:
df[df.defense == (df.defense.max())]['name']

In [None]:
df[df.base_total == df.base_total.min()]['name']

In [None]:
normal.base_total.plot.hist(bins=12)

In [None]:
df

In [None]:
all_abilities

In [None]:
len(each_ability)

In [None]:
dragon

In [None]:
dragon.columns

In [None]:
dragon.describe().iloc[:, 20:]

In [None]:
dragon.corr().iloc[:, 20:]

In [None]:
df.corr().iloc[18:, 18:]

In [None]:
df.groupby('generation').mean()['experience_growth'].plot()

In [None]:
df.groupby('generation').mean()['speed'].plot()

In [None]:
df.groupby('generation').mean()['base_total'].plot()

In [None]:
df.head()

In [None]:
df.columns.tolist()

In [None]:
df['ability1'] = df['ability1'].str.strip("'")
df['ability2'] = df['ability2'].str.strip("'").str.strip(' ').str.strip("'")
df['ability3'] = df['ability3'].str.strip("'").str.strip(' ').str.strip("'")
df['ability4'] = df['ability4'].str.strip("'").str.strip(' ').str.strip("'")
df['ability5'] = df['ability5'].str.strip("'").str.strip(' ').str.strip("'")
df['ability6'] = df['ability6'].str.strip("'").str.strip(' ').str.strip("'")

In [None]:
df.head()

In [None]:
df['ability2'][0]

In [None]:
df['ability2'].values.tolist()