# Setup

In [75]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import ttest_ind, chisquare, normaltest

In [12]:
### Importing Usage DFs and Merging Them
recentdf = pd.read_csv("Pokemon Usage/SeptemberData.csv",index_col=0).rename(columns = {'index':"name"})
olddf =  pd.read_csv("Pokemon Usage/OldData.csv",index_col=0).rename(columns = {'index':"name"})
usagedf = recentdf.merge(olddf, how='outer')
print(usagedf.shape)
usagedf.head()

(5916, 7)


Unnamed: 0,name,Moves,Usage,Gen,Format,Min Rating,Recent
0,Bronzong,"['gyroball', 'explosion', 'earthquake', 'steal...",0.124514,4.0,ou,0.0,1
1,Swampert,"['earthquake', 'stealthrock', 'waterfall', 'ic...",0.120051,4.0,ou,0.0,1
2,Snorlax,"['bodyslam', 'crunch', 'earthquake', 'curse', ...",0.040116,4.0,ou,0.0,1
3,Uxie,"['stealthrock', 'uturn', 'thunderwave', 'psych...",0.023561,4.0,ou,0.0,1
4,Machamp,"['dynamicpunch', 'payback', 'bulletpunch', 'ic...",0.102777,4.0,ou,0.0,1


In [13]:
### Importing Moves DF
movesdf = pd.read_csv("Pokedex and Moves/moves.csv", index_col=0).set_index('move')

### Standardize the Moves
def standardize_moves(df):

    ### List Representations of each Column
    type1 = []
    type2 = []
    type3 = []
    type4 = []
    type5 = []
    type6 = []

    ### List of Pointers to Column Lists
    type_list = [type1, type2, type3, type4, type5, type6]

    ### For Each Row
    for row in range(len(df["Moves"])):

        ### Strip the List Borders and Create a List by Splitting on Commas in the String
        moves = df["Moves"][row].strip('][').split(', ')

        ### Tracks which Column Each Move Belongs to
        list_num = 0

        ### For Each Column
        for each in range(6):

            ### If a Pokemon has less than 6 moves, return nan
            if each >= len(moves):
                move_type = np.nan
                type_list[list_num].append(move_type)
                list_num += 1

            else:
                ### Removes Outer Apostrophes, if any
                string = moves[each].strip("'")

                ### If a Popular Move is an Empty Slot, return nan
                if (string) == "":
                    move_type = np.nan
                    type_list[list_num].append(move_type)
                    list_num += 1
                else:

                    ### Find Move in Move DF containing Type
                    move_type = movesdf.loc[string]['type']

                    ### Add to Column
                    type_list[list_num].append(move_type)

                    ### Change Column
                    list_num += 1

    ### Piecing Everything Together
    df["Type 1"] = type_list[0]
    df["Type 2"] = type_list[1]
    df["Type 3"] = type_list[2]
    df["Type 4"] = type_list[3]
    df["Type 5"] = type_list[4]
    df["Type 6"] = type_list[5]

    return df

usagemovesdf = standardize_moves(usagedf)
usagemovesdf.head()

Unnamed: 0,name,Moves,Usage,Gen,Format,Min Rating,Recent,Type 1,Type 2,Type 3,Type 4,Type 5,Type 6
0,Bronzong,"['gyroball', 'explosion', 'earthquake', 'steal...",0.124514,4.0,ou,0.0,1,Steel,Normal,Ground,Rock,Psychic,Water
1,Swampert,"['earthquake', 'stealthrock', 'waterfall', 'ic...",0.120051,4.0,ou,0.0,1,Ground,Rock,Water,Ice,Ice,Normal
2,Snorlax,"['bodyslam', 'crunch', 'earthquake', 'curse', ...",0.040116,4.0,ou,0.0,1,Normal,Dark,Ground,Ghost,Psychic,Fire
3,Uxie,"['stealthrock', 'uturn', 'thunderwave', 'psych...",0.023561,4.0,ou,0.0,1,Rock,Bug,Electric,Psychic,Psychic,Psychic
4,Machamp,"['dynamicpunch', 'payback', 'bulletpunch', 'ic...",0.102777,4.0,ou,0.0,1,Fighting,Dark,Steel,Ice,Rock,Normal


In [14]:
### Import Usage Data
usagedf = recentdf.merge(olddf, how='outer')

### Import Pokemon Types Data
pokedexdf = pd.read_csv("Pokedex and Moves/pokemon.csv", index_col=0).set_index('name')[['types','type2']]

### Left Join Usage DF with Pokemon Types DF on "name" while renaming the additional columns
usagetypesdf = usagedf.join(pokedexdf, on="name").rename(columns={'types': "Type 1", "type2": "Type 2"})
usagetypesdf

Unnamed: 0,name,Moves,Usage,Gen,Format,Min Rating,Recent,Type 1,Type 2
0,Bronzong,"['gyroball', 'explosion', 'earthquake', 'steal...",0.124514,4.0,ou,0.0,1,Steel,Psychic
1,Swampert,"['earthquake', 'stealthrock', 'waterfall', 'ic...",0.120051,4.0,ou,0.0,1,Water,Ground
2,Snorlax,"['bodyslam', 'crunch', 'earthquake', 'curse', ...",0.040116,4.0,ou,0.0,1,Normal,
3,Uxie,"['stealthrock', 'uturn', 'thunderwave', 'psych...",0.023561,4.0,ou,0.0,1,Psychic,
4,Machamp,"['dynamicpunch', 'payback', 'bulletpunch', 'ic...",0.102777,4.0,ou,0.0,1,Fighting,
...,...,...,...,...,...,...,...,...,...
5911,Charizard,"['solarbeam', 'fireblast', 'hurricane', 'airsl...",0.030482,8.0,ubers,0.0,0,Fire,Flying
5912,Shuckle,"['stickyweb', 'stealthrock', 'encore', 'finalg...",0.045773,8.0,ubers,0.0,0,Bug,Rock
5913,Hatterene,"['mysticalfire', 'psychic', 'dazzlinggleam', '...",0.026064,8.0,ubers,0.0,0,Psychic,Fairy
5914,Zacian-Crowned,"['behemothblade', 'playrough', 'swordsdance', ...",0.921806,8.0,ubers,0.0,0,Fairy,Steel


In [17]:
### Finds all Super Effective Moves in the column
def SE_moves(move_type, df, col, binary=False):

    ### Checks if Moves are supereffecctive against Dragon
    if move_type == "Dragon":
        SE_moves = ["Ice","Dragon"]
    
    ### Checks if Moves are supereffective against Fairy
    else:
        SE_moves = ["Steel","Poison"]

    ### Creates New Variable
    is_SE = None

    ### For each Super Effective Move
    for each in SE_moves:

        ### First Iteration
        if is_SE is None:
            
            ### Finds Supereffective Moves and marks them as 1
            is_SE = (df[col] == each)*1
        else:
            ### Keeps count of Supereffective Moves
            is_SE += (df[col] == each)*1
    
    ### Returns Column with Count of Supereffective Moves per Pokemon
    return is_SE

### Function that checks through all columns and types
def find_all_SE(df, binary = False):
    types = ["Dragon", "Fairy"]
    col = ["Type 1", "Type 2", "Type 3", "Type 4", "Type 5", "Type 6"]
    SE_col = ["SE Dragon", "SE Fairy"]

    for each in types:

        all_SE = None

        for col_name in col:

            if all_SE is None:
                all_SE = SE_moves(each, df, col_name)
            else:
                all_SE += SE_moves(each, df, col_name)

        ### Changes to binary results, if specified
        if binary:
            all_SE = (all_SE > 0) * 1
        df[SE_col[0]] = all_SE
        SE_col = SE_col[1:]
    
    return df

usagemovesdf = find_all_SE(usagemovesdf)
usagemovesdf.head()

Unnamed: 0,name,Moves,Usage,Gen,Format,Min Rating,Recent,Type 1,Type 2,Type 3,Type 4,Type 5,Type 6,SE Dragon,SE Fairy
0,Bronzong,"['gyroball', 'explosion', 'earthquake', 'steal...",0.124514,4.0,ou,0.0,1,Steel,Normal,Ground,Rock,Psychic,Water,0,1
1,Swampert,"['earthquake', 'stealthrock', 'waterfall', 'ic...",0.120051,4.0,ou,0.0,1,Ground,Rock,Water,Ice,Ice,Normal,2,0
2,Snorlax,"['bodyslam', 'crunch', 'earthquake', 'curse', ...",0.040116,4.0,ou,0.0,1,Normal,Dark,Ground,Ghost,Psychic,Fire,0,0
3,Uxie,"['stealthrock', 'uturn', 'thunderwave', 'psych...",0.023561,4.0,ou,0.0,1,Rock,Bug,Electric,Psychic,Psychic,Psychic,0,0
4,Machamp,"['dynamicpunch', 'payback', 'bulletpunch', 'ic...",0.102777,4.0,ou,0.0,1,Fighting,Dark,Steel,Ice,Rock,Normal,1,1


In [84]:
beforeFairy = usagetypesdf[usagetypesdf["Gen"] < 6]
afterFairy = usagetypesdf[usagetypesdf["Gen"] >= 6]

In [86]:
beforemean = beforeFairy["Usage"].mean()
aftermean = afterFairy["Usage"].mean()
print(beforemean,aftermean)

0.11470787731746822 0.08413819640176587


In [87]:
ttest_ind(beforeFairy["Usage"], afterFairy["Usage"])

Ttest_indResult(statistic=10.650545173438335, pvalue=2.9941530577567206e-26)

In [88]:
beforeFairy = usagemovesdf[usagemovesdf["Gen"] < 6]
afterFairy = usagemovesdf[usagemovesdf["Gen"] >= 6]

In [90]:
beforemean = beforeFairy["SE Dragon"].mean()
aftermean = afterFairy["SE Dragon"].mean()
print(beforemean,aftermean)

0.6717171717171717 0.528476821192053


In [91]:
ttest_ind(beforeFairy["SE Dragon"], afterFairy["SE Dragon"])


Ttest_indResult(statistic=5.477339385599182, pvalue=4.4957687612994866e-08)

In [92]:
beforemean = beforeFairy["SE Fairy"].mean()
aftermean = afterFairy["SE Fairy"].mean()
print(beforemean,aftermean)

0.33116883116883117 0.5408388520971302


In [93]:
ttest_ind(beforeFairy["SE Fairy"], afterFairy["SE Fairy"])


Ttest_indResult(statistic=-9.371191325236575, pvalue=9.977344019613692e-21)