In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from scipy import stats
from matplotlib import pyplot as plt

pokemon = pd.read_csv('/Users/talia/Downloads/pokemon.csv')

pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [8]:
def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):
    
    results = {}

    for i in features:
        x = stats.ttest_ind(s1[i], s2[i])
        results[i]= x.pvalue   
    return results

In [9]:
po_legend = pokemon.loc[pokemon["Legendary"]==True]
po_non_legend = pokemon.loc[pokemon["Legendary"]==False]
t_test_features(po_legend,po_non_legend,['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total'])

{'HP': 3.330647684846191e-15,
 'Attack': 7.827253003205333e-24,
 'Defense': 1.5842226094427255e-12,
 'Sp. Atk': 6.314915770427266e-41,
 'Sp. Def': 1.8439809580409594e-26,
 'Speed': 2.3540754436898437e-21,
 'Total': 3.0952457469652825e-52}

In [None]:
# Legendary and non-Legendary pokemons have the most different stats on Attack and Sp.Atk feature.
# Generally, Legendary and non-Legendary pokemons have significantly different stats.

In [10]:
# Next, conduct t-test for Generation 1 and Generation 2 pokemons.

po_gen1 = pokemon.loc[pokemon["Generation"]==1]
po_gen2 = pokemon.loc[pokemon["Generation"]==2]
t_test_features(po_gen1,po_gen2,['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total'])

{'HP': 0.13791881412813622,
 'Attack': 0.24050968418101445,
 'Defense': 0.5407630349194362,
 'Sp. Atk': 0.14119788176331508,
 'Sp. Def': 0.1678122623160639,
 'Speed': 0.0028356954812578704,
 'Total': 0.5599140649014442}

In [None]:
# Pokemon Generation 1 and Generation 2 have the most similarity on feature Speed. 
# On the other features, they don't have much differences.

In [11]:
# Compare pokemons who have single type vs those having two types.

po_1type = pokemon.loc[(pokemon["Type 1"].isna()) | (pokemon["Type 2"].isna())]
po_2type = pokemon.loc[(pokemon["Type 1"].notna()) & (pokemon["Type 2"].notna())]
t_test_features(po_1type,po_2type,['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total'])

{'HP': 0.11060643144431842,
 'Attack': 0.00015741395666164396,
 'Defense': 3.250594205757004e-08,
 'Sp. Atk': 0.0001454917404035147,
 'Sp. Def': 0.00010893304795534396,
 'Speed': 0.02405141079403746,
 'Total': 1.1749035008828752e-07}

In [None]:
# Pokemons single type and pokemons 2 types don't have significant difference on features Attack, Sp. Attack and Sp. Def.
# On the other hand, they clearly have difference on Defense feature.

In [16]:
poks = pokemon["Name"].unique().tolist()
poks

['Bulbasaur',
 'Ivysaur',
 'Venusaur',
 'VenusaurMega Venusaur',
 'Charmander',
 'Charmeleon',
 'Charizard',
 'CharizardMega Charizard X',
 'CharizardMega Charizard Y',
 'Squirtle',
 'Wartortle',
 'Blastoise',
 'BlastoiseMega Blastoise',
 'Caterpie',
 'Metapod',
 'Butterfree',
 'Weedle',
 'Kakuna',
 'Beedrill',
 'BeedrillMega Beedrill',
 'Pidgey',
 'Pidgeotto',
 'Pidgeot',
 'PidgeotMega Pidgeot',
 'Rattata',
 'Raticate',
 'Spearow',
 'Fearow',
 'Ekans',
 'Arbok',
 'Pikachu',
 'Raichu',
 'Sandshrew',
 'Sandslash',
 'Nidoran♀',
 'Nidorina',
 'Nidoqueen',
 'Nidoran♂',
 'Nidorino',
 'Nidoking',
 'Clefairy',
 'Clefable',
 'Vulpix',
 'Ninetales',
 'Jigglypuff',
 'Wigglytuff',
 'Zubat',
 'Golbat',
 'Oddish',
 'Gloom',
 'Vileplume',
 'Paras',
 'Parasect',
 'Venonat',
 'Venomoth',
 'Diglett',
 'Dugtrio',
 'Meowth',
 'Persian',
 'Psyduck',
 'Golduck',
 'Mankey',
 'Primeape',
 'Growlithe',
 'Arcanine',
 'Poliwag',
 'Poliwhirl',
 'Poliwrath',
 'Abra',
 'Kadabra',
 'Alakazam',
 'AlakazamMega Alakaz

In [26]:
# # compare whether there are significant differences of Attack vs Defense and  Sp. Atk vs Sp. Def of all pokemons

fA_fD = stats.ttest_rel(pokemon["Attack"],pokemon["Defense"]).pvalue
fA_fD

1.7140303479358558e-05

In [None]:
# The pokemons have the significant differences of Attack vs Defense feature.

In [28]:
fSA_fSD = stats.ttest_rel(pokemon["Sp. Atk"],pokemon["Sp. Def"]).pvalue
fSA_fSD

0.3933685997548122

In [None]:
# The pokemons don't have much significant differences of Sp. Atk vs Sp. Def feature.