In [161]:
import numpy as np
import pandas as pd
import matplotlib as plot
import math

# Pokemon Go Meta Analysis Notebook
As a statistically-driven individual, I care heavily about the Pokemon that are statistically significant to the Pokemon Go meta. However, there is no active site that contains a combination of the statistics of Pokemon that are prominent both in the PvP meta and in the raid attacker meta. This notebook works to combine the csv files for both the PVP rankings from PvPoke.com and the Raid Attacker rankings from GamePress.

First, load in the Little League, Great League, Ultra League, and Master League csvs from PvPoke.com. Rename the score column to the correct league's name for the future step of combining the scores into one set.

In [162]:
great_league = pd.read_csv('cp1500_all_overall_rankings.csv')
great_league.rename(columns={'Score': 'GL Score'}, inplace=True)
great_league = great_league[['Pokemon', 'GL Score']]
ultra_league = pd.read_csv('cp2500_all_overall_rankings.csv')
ultra_league.rename(columns={'Score': 'UL Score'}, inplace=True)
ultra_league = ultra_league[['Pokemon', 'UL Score']]
little_league = pd.read_csv('custom_cp500_all_custom_rankings.csv')
little_league.rename(columns={'Score': 'LC Score'}, inplace=True)
little_league = little_league[['Pokemon', 'LC Score']]
master_league = pd.read_csv('cp10000_all_overall_rankings.csv')
master_league.rename(columns={'Score': 'ML Score'}, inplace=True)
master_league = master_league[['Pokemon', 'ML Score']]

# Combining PvP Rankings
Now, merge the four sets into one big PvP Rankings dataset.

In [163]:
PvP_Rankings = pd.merge(great_league, ultra_league, on='Pokemon', how='outer')
PvP_Rankings = pd.merge(PvP_Rankings, master_league, on='Pokemon', how='outer')
PvP_Rankings = pd.merge(PvP_Rankings, little_league, on='Pokemon', how='outer')
PvP_Rankings = PvP_Rankings[['Pokemon', 'LC Score', 'GL Score', 'UL Score', 'ML Score']]

From the csv files, there is just the overall score provided for the given leagues. Using the .rank() function, add in the rank for each league.

In [164]:
PvP_Rankings['GL Rank'] = PvP_Rankings['GL Score'].rank(ascending=False)
PvP_Rankings['UL Rank'] = PvP_Rankings['UL Score'].rank(ascending=False)
PvP_Rankings['ML Rank'] = PvP_Rankings['ML Score'].rank(ascending=False)
PvP_Rankings['LC Rank'] = PvP_Rankings['LC Score'].rank(ascending=False)
display(PvP_Rankings)

Unnamed: 0,Pokemon,LC Score,GL Score,UL Score,ML Score,GL Rank,UL Rank,ML Rank,LC Rank
0,Carbink,,96.0,64.4,,1.0,493.0,,
1,Registeel,,94.4,98.9,75.9,2.0,1.0,95.0,
2,Stunfisk (Galarian),,93.8,89.4,60.0,3.0,28.0,254.0,
3,Swampert (Shadow),,93.3,93.2,83.9,4.0,7.0,48.0,
4,Swampert,,93.1,92.9,86.2,5.0,8.5,37.5,
...,...,...,...,...,...,...,...,...,...
1138,Abra (Shadow),19.2,,,,,,,373.0
1139,Abra,19.0,,,,,,,374.0
1140,Cranidos,18.8,,,,,,,375.0
1141,Magikarp,5.6,,,,,,,377.0


# Loading in Raid Attacker Data
Now it is time to load in the raid rankings from GamePress. 

In [165]:
counters = pd.read_csv('comprehensive_dps.csv')
display(counters)

Unnamed: 0,Pokemon,Fast Move,Charged Move,DPS,TDO,ER,CP
0,Mega Rayquaza,Dragon Tail,Dragon Ascent,29.698,1122.2,73.63,5713
1,Mega Rayquaza,Air Slash,Dragon Ascent,28.503,1077.0,70.67,5713
2,Mega Mewtwo Y,Confusion,Psystrike,26.510,914.2,64.24,5610
3,Mega Mewtwo Y,Psycho Cut,Psystrike,26.374,909.5,63.91,5610
4,Mega Mewtwo X,Confusion,Psystrike,25.664,885.0,62.19,5429
...,...,...,...,...,...,...,...
14439,Shadow Shuckle,Rock Throw,Frustration,0.724,17.2,1.60,405
14440,Shadow Magikarp,Splash,Frustration,0.591,3.4,0.92,274
14441,Lycanroc (Dusk Form),Rock Throw,Crush Claw,0.369,7.8,0.79,2678
14442,Lycanroc (Dusk Form),Sucker Punch,Crush Claw,0.369,7.8,0.79,2678


As seen above, this data contains every moveset combination for every Pokemon in the game. This is great, but for the purpose of this data, we just care about the maximum ER value of a unique mon. So we only keep the relevant rows and then condense accordingly. 

In [166]:
counters = counters[['Pokemon', 'ER', 'DPS']]
counters['Name'] = counters['Pokemon']
counters = counters.groupby('Name')[['Pokemon','DPS','ER']].max()
display(counters)


Unnamed: 0_level_0,Pokemon,DPS,ER
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Flabébé,Flabébé,7.460,14.25
5th Anniversary Pikachu,5th Anniversary Pikachu,8.366,14.77
Abomasnow,Abomasnow,12.607,28.65
Abra,Abra,11.707,19.22
Absol,Absol,15.134,30.58
...,...,...,...
Zubat,Zubat,6.319,10.71
Zweilous,Zweilous,10.036,21.19
Zygarde (10% Forme),Zygarde (10% Forme),13.620,29.07
Zygarde (50% Forme),Zygarde (50% Forme),13.508,34.70


At this point, the two main data sets contain the information we need, so we combine the raid attacker data with the PvP data. 

In [167]:
Master_Rankings = pd.merge(PvP_Rankings, counters, on='Pokemon', how='outer')
Master_Rankings = Master_Rankings[['Pokemon', 'LC Rank', 'GL Rank', 'UL Rank', 'ML Rank', 'ER']]
display(Master_Rankings)

Unnamed: 0,Pokemon,LC Rank,GL Rank,UL Rank,ML Rank,ER
0,Carbink,,1.0,493.0,,15.54
1,Registeel,,2.0,1.0,95.0,22.74
2,Stunfisk (Galarian),,3.0,28.0,254.0,
3,Swampert (Shadow),,4.0,7.0,48.0,
4,Swampert,,5.0,8.5,37.5,37.02
...,...,...,...,...,...,...
1724,Zacian - Crowned Sword,,,,,45.66
1725,Zacian - Hero of Many Battles,,,,,38.73
1726,Zamazenta - Crowned Shield,,,,,34.39
1727,Zamazenta - Hero of Many Battles,,,,,33.70


Now that everything is in one dataset, we add in the Raid Attacker rankings for each Pokemon.

In [168]:
Master_Rankings = Master_Rankings.groupby('Pokemon')[['LC Rank', 'GL Rank', 'UL Rank', 'ML Rank', 'ER']].mean()
Master_Rankings['Raid Rank'] = Master_Rankings['ER'].rank(ascending=False)
Master_Rankings['Name'] = Master_Rankings.index
Final_Rankings = Master_Rankings[['Name', 'LC Rank', 'GL Rank', 'UL Rank', 'ML Rank', 'Raid Rank']]
display(Final_Rankings)

Unnamed: 0_level_0,Name,LC Rank,GL Rank,UL Rank,ML Rank,Raid Rank
Pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Flabébé,Flabébé,,,,,1111.0
5th Anniversary Pikachu,5th Anniversary Pikachu,,,,,1075.0
Abomasnow,Abomasnow,,84.5,47.0,214.0,462.0
Abomasnow (Shadow),Abomasnow (Shadow),,48.0,36.0,217.0,
Abra,Abra,374.0,,,,881.0
...,...,...,...,...,...,...
Zubat (Shadow),Zubat (Shadow),42.0,,,,
Zweilous,Zweilous,,62.5,392.5,,797.0
Zygarde (10% Forme),Zygarde (10% Forme),,556.0,407.5,457.0,450.0
Zygarde (50% Forme),Zygarde (50% Forme),,202.5,142.5,46.0,258.0


# Combining the Mega and Shadow Stats
The final issue with the code provided in these two data sets is that the way they label shadow and mega Pokemon is inconsistent. PvPoke uses Name (Shadow), whereas GamePress uses Shadow Name. Therefore, some modification is required to successfully combine the raid and PvP statistics.

This next code accounts for cases of: 
* Shadow
* Mega
* Hisuian forms
* Galarian forms
* Alolan Forms
* Shadow Alolan and Galarian forms

More cases will be added as PvPoke includes them in their rankings, as GamePress already has a large database created for future releases to the game.

In [169]:
def standardize_name(name):
    name = name.replace(' (Mega)', '').replace(' (Shadow)', '').strip()
    return name

# Apply standardization to both datasets
Final_Rankings['Standard_Name'] = Final_Rankings['Name'].apply(standardize_name)

# Update "Name" column with Shadow/Mega prefixes only for specific cases
for index, row in Final_Rankings.iterrows():
    if " (Alolan) (Shadow)" in row['Name']:
        Final_Rankings.at[index, 'Name'] = f"Shadow Alolan {row['Name']}"
    elif " (Shadow)" in row['Name']:
        Final_Rankings.at[index, 'Name'] = f"Shadow {row['Name']}"
    elif " (Mega)" in row['Name']:
        Final_Rankings.at[index, 'Name'] = f"Mega {row['Name']}"
    elif " (Hisuian)" in row['Name']:
        Final_Rankings.at[index, 'Name'] = f"Hisuian {row['Name']}"
    elif " (Galarian)" in row['Name']:
        Final_Rankings.at[index, 'Name'] = f"Galarian {row['Name']}"
    elif " (Alolan)" in row['Name']:
        Final_Rankings.at[index, 'Name'] = f"Alolan {row['Name']}"

display(Final_Rankings)

Unnamed: 0_level_0,Name,LC Rank,GL Rank,UL Rank,ML Rank,Raid Rank,Standard_Name
Pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Flabébé,Flabébé,,,,,1111.0,Flabébé
5th Anniversary Pikachu,5th Anniversary Pikachu,,,,,1075.0,5th Anniversary Pikachu
Abomasnow,Abomasnow,,84.5,47.0,214.0,462.0,Abomasnow
Abomasnow (Shadow),Shadow Abomasnow (Shadow),,48.0,36.0,217.0,,Abomasnow
Abra,Abra,374.0,,,,881.0,Abra
...,...,...,...,...,...,...,...
Zubat (Shadow),Shadow Zubat (Shadow),42.0,,,,,Zubat
Zweilous,Zweilous,,62.5,392.5,,797.0,Zweilous
Zygarde (10% Forme),Zygarde (10% Forme),,556.0,407.5,457.0,450.0,Zygarde (10% Forme)
Zygarde (50% Forme),Zygarde (50% Forme),,202.5,142.5,46.0,258.0,Zygarde (50% Forme)


Finally, the names can be rewritten in the same format and the data can be combined for viewing. I personally find that viewing this data set in VS Code is the most beneficial, as you can easily search the table in the "Data Viewer" mode.

In [170]:
Final_Rankings['Name'] = Final_Rankings['Name'].str.replace(r'\s*\(Mega\)$|\s*\(Shadow\)$|\s*\(Galarian\)$|\s*\(Hisuian\)$|\s*\(Alolan\)|\s*\(Alolan\)\s*|\s*\(Shadow\)\s*$', '', regex=True)
Final_Rankings = Final_Rankings.groupby('Name')[['LC Rank', 'GL Rank', 'UL Rank', 'ML Rank', 'Raid Rank']].mean().reset_index()
display(Final_Rankings)

Unnamed: 0,Name,LC Rank,GL Rank,UL Rank,ML Rank,Raid Rank
0,Flabébé,,,,,1111.0
1,5th Anniversary Pikachu,,,,,1075.0
2,Abomasnow,,84.5,47.0,214.0,462.0
3,Abra,374.0,,,,881.0
4,Absol,,736.0,553.0,439.0,399.0
...,...,...,...,...,...,...
1404,Zubat,36.0,,,,1263.0
1405,Zweilous,,62.5,392.5,,797.0
1406,Zygarde (10% Forme),,556.0,407.5,457.0,450.0
1407,Zygarde (50% Forme),,202.5,142.5,46.0,258.0


For exportation purposes, the combined data set can be downloaded into a csv file via the following chunk of code:

In [171]:
Final_Rankings.to_csv('PGO_combined_rankings.csv', index=False)