In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib
import ast
import os
import matplotlib.pyplot as plt

font = {'family': 'sans-serif',
        'weight': 'normal',
        'size': 18}

label_font =  {'family': 'sans-serif',
               'weight': 'bold',
               'size': 24}

title_font =  {'family': 'sans-serif',
               'weight': 'bold',
               'size': 28}


matplotlib.rc('font', **font)

def apply_plot_style(ax, x_label = "", y_label = "", x_range = "keep",  y_range = "keep", title = "", grid_axis = "y"):
    if x_range != "keep":
        ax.set_xlim(x_range[0], x_range[1])  
    if y_range != "keep":
        ax.set_ylim(y_range[0], y_range[1])
    
    ax.set_xlabel(x_label, fontdict = label_font)
    ax.set_ylabel(y_label, fontdict = label_font)
    
    ax.set_facecolor("#F2F3F4")
    ax.grid(axis = grid_axis, zorder = 0, color = "#404040", ls = "-.", linewidth = 0.3)
    
    ax.set_title(title, fontdict = title_font, pad = 10)

In [2]:
poketypes = ["Normal", "Fire", "Water", "Electric", "Grass", "Ice", "Fighting", "Poison", "Ground", 
             "Flying", "Psychic", "Bug", "Rock", "Ghost", "Dragon", "Dark", "Steel", "Fairy"]

In [14]:
pokedex = pd.read_csv("../pokescraper/data/pokedex.csv", dtype = {"number": str})
pokedex["abilities"] = pokedex["abilities"].apply(lambda x: ast.literal_eval(x))

pokedex.set_index("number", inplace = True)

pokedex.loc[pokedex["type2"] == "-", "type2"] = pokedex.loc[pokedex["type2"] == "-"]["type1"] 

In [15]:
types_heatmap = pd.DataFrame(index = poketypes, columns = poketypes, dtype = int)
types_to_study = []

pokedex_types = pokedex.groupby(["type1", "type2"])["name"].count()
for type1 in poketypes:
    for type2 in poketypes:
        try:
            type_count = pokedex_types.loc[type1, type2]
        except KeyError:
            type_count = 0
        types_heatmap.loc[type1, type2] = type_count
        if type_count >= 16:
            if type1 == type2:
                types_to_study.append(type1)
            else:
                types_to_study.append(type1 + " - " + type2)

types_to_study.remove("Poison")
types = plt.figure(1, figsize = (12.5,12.5), facecolor = "white")
ax_types = types.add_axes([0, 0, 1, 1])

sns.heatmap(types_heatmap, annot = True, cmap = "viridis")

ax_types.set_xlabel("type2", fontdict = label_font)
ax_types.set_ylabel("type1", fontdict = label_font)
ax_types.set_title("Type Distribution - All Pokemon", fontdict = title_font, pad = 10)

filepath = os.path.join("type", "types_heatmap.png")
types.savefig(filepath, dpi = 200, bbox_inches='tight')
types.clf()

<Figure size 900x900 with 0 Axes>

In [5]:
gens_types_data = pokedex.groupby(["generation", "type1"])["name"].count()

cmap = matplotlib.cm.get_cmap('plasma')
colors = [cmap(x) for x in np.linspace(0.0, 1.0, len(poketypes))]

for gen in range(1, 9):
    gens_types_graph = plt.figure(1, figsize = [5, 8], facecolor = "white")
    ax = gens_types_graph.add_axes([0,0,1,1])
    
    gen_types_data = gens_types_data.loc[gen]
    gen_types_data = gen_types_data.reindex(poketypes)
    
    gen_types_data.plot(kind = "barh", ax = ax, width = 1.0, xlabel = "Count",
                        color = colors, linewidth = 2.0, edgecolor = "k", zorder = 3)
    
    apply_plot_style(ax, y_range = [-0.5, 17.5], x_range = [0,30], x_label = "Count",
                     title = f"Generation {gen}: {int(gen_types_data.sum())} Pokemon",
                     grid_axis = "x")
    
    filepath = os.path.join("type", "Generation" + str(gen) + ".png")
    gens_types_graph.savefig(filepath, dpi = 200, bbox_inches='tight')
    gens_types_graph.clf()

<Figure size 360x576 with 0 Axes>

In [6]:
def get_comb_type(types):
    if types[0] == types[1]:
        return types[0]
    else:
        return types[0] + " - " + types[1]
    
pokedex["comb_type"] = pokedex[["type1", "type2"]].apply(get_comb_type, axis = 1)
pokedex_to_study = pokedex[pokedex["comb_type"].isin(types_to_study)].copy()

In [7]:
stats = ["base_HP", "base_attack", "base_defense", "base_sp_attack", "base_sp_defense", "base_speed"]
stats_abbrev = ["HP", "atk", "def", "sp_atk", "sp_def", "speed"]

cmap = matplotlib.cm.get_cmap('plasma')
colors = [cmap(x) for x in np.linspace(0.0, 1.0, len(stats_abbrev) + 1)]

current_ax = 0
for poketype in types_to_study:
    types_stats_graph = plt.figure(1, figsize = [8, 5], facecolor = "white")
    ax = types_stats_graph.add_axes([0,0,1,1])
    type_stats_data = pokedex_to_study[pokedex_to_study["comb_type"] == poketype][stats]
    bp = ax.boxplot(x = type_stats_data, zorder = 5, patch_artist = True, 
                    boxprops = dict(linewidth = 2, color = "black"), 
                    medianprops = dict(ls = "--", linewidth = 2, color = "black"),
                    flierprops = dict(marker = "o", markerfacecolor = "black"),
                    whiskerprops = dict(linewidth = 2, color = "black"),
                    capprops = dict(linewidth = 2, color = "black"))
    
    c_index = 1
    
    for patch in bp["boxes"]:
        patch.set(facecolor = colors[c_index])
        c_index = c_index + 1
    
    apply_plot_style(ax, y_range = [0, 200], title = poketype)
    ax.set_xticklabels(stats_abbrev)

    filepath = os.path.join("stats", poketype + ".png")
    types_stats_graph.savefig(filepath, dpi = 200, bbox_inches='tight')
    types_stats_graph.clf()
    
    current_ax = current_ax + 1

<Figure size 576x360 with 0 Axes>

In [16]:
combat_stats_scatter, axes = plt.subplots(nrows = 1, ncols = 2, figsize = [20, 10], facecolor = "white")
combat_stats_scatter.suptitle("Combat Stats Distribution", size = 48, weight = "bold")

atk_ax = axes[0]
def_ax = axes[1]

sns.scatterplot(data = pokedex_to_study[pokedex_to_study["type1"].isin(["Fighting", "Ground", "Rock", "Fairy", "Psychic"])],
                      x = "base_sp_attack", y = "base_attack", ax = atk_ax, hue = "comb_type", s = 100, legend = False, zorder = 2)

atk_ax.plot([0, 180], [0, 180], ls = "--", color = "black", linewidth = 2, alpha = 0.5, zorder = 1)
apply_plot_style(atk_ax, x_range = [0, 180], x_label = "sp_attack", y_range = [0, 180], y_label = "attack", grid_axis = "both")


sct = sns.scatterplot(data = pokedex_to_study[pokedex_to_study["type1"].isin(["Fighting", "Ground", "Rock", "Fairy", "Psychic"])],
                x = "base_sp_defense", y = "base_defense", ax = def_ax, hue = "comb_type", s = 100, legend = "auto", zorder = 2)
def_ax.plot([0, 180], [0, 180], ls = "--", color = "black", linewidth = 2, alpha = 0.5, zorder = 1)
apply_plot_style(def_ax, x_range = [0, 180], x_label = "sp_defense", y_range = [0, 180], y_label = "defense", grid_axis = "both")


labels = sct.get_legend_handles_labels()
label_index = 0
    
for handle in labels[0]:
    handle._sizes = [256]
    
sct.legend(title = "Pokemon Type", handles = labels[0], labels = labels[1], 
           ncol = len(labels[0]), bbox_to_anchor = (-0.1, -0.2), loc = "center",
           title_fontproperties = label_font, edgecolor = "black", handletextpad = 0)

filepath = os.path.join("stats", "combat stats.png")
combat_stats_scatter.savefig(filepath, dpi = 200, bbox_inches='tight')
combat_stats_scatter.clf()

<Figure size 1440x720 with 0 Axes>

In [9]:
reg_ability_counter = pd.DataFrame(columns = types_to_study)
hidden_ability_counter = pd.DataFrame(columns = types_to_study)

for pokenumber in pokedex_to_study.index:
    pokemon = pokedex_to_study.loc[pokenumber]
    abilities = pokemon["abilities"]
    for ability in abilities:
        if "Hidden Ability" in ability:
            try:
                hidden_ability_counter.loc[ability][pokemon["comb_type"]] += 1
            except KeyError:
                hidden_ability_counter.loc[ability] = 0
                hidden_ability_counter.loc[ability][pokemon["comb_type"]] += 1
        else:
            try:
                reg_ability_counter.loc[ability][pokemon["comb_type"]] += 1
            except KeyError:
                reg_ability_counter.loc[ability] = 0
                reg_ability_counter.loc[ability][pokemon["comb_type"]] += 1

In [17]:
avg_n_abilities = pd.DataFrame(columns = ["Regular", "Hidden"])

types_count = pokedex_to_study.groupby("comb_type")["comb_type"].count()
avg_n_abilities["Regular"] = (reg_ability_counter.sum(axis = 0)/types_count)
avg_n_abilities["Hidden"] = (hidden_ability_counter.sum(axis = 0)/types_count)

avg_n_abilities_fig = plt.figure(figsize = [10, 8], facecolor = "white")
ax = avg_n_abilities_fig.add_axes([0,0,1,1])

avg_n_abilities.plot(kind = "barh", stacked = True, ax = ax, width = 0.6,
                     color = ["#599ad3", "#f1595f"], edgecolor = "k", linewidth = 2,
                     zorder = 2)
apply_plot_style(ax, x_label = "Average Number of Abilities", grid_axis = "x")

filepath = os.path.join("abilities", "avg n of abilities.png")
avg_n_abilities_fig.savefig(filepath, dpi = 200, bbox_inches='tight')
avg_n_abilities_fig.clf()

<Figure size 720x576 with 0 Axes>

In [18]:
top_reg_abilities = reg_ability_counter.sum(axis = 1).sort_values(ascending = False)[0:10].index

reg_abilities_fig = plt.figure(figsize = [10, 8], facecolor = "white")
ax = reg_abilities_fig.add_axes([0,0,1,1])

cmap = matplotlib.cm.get_cmap('Paired')
colors = [cmap(x) for x in np.linspace(0.0, 1.0, len(types_to_study))]

reg_ability_counter.loc[top_reg_abilities].plot(kind = "barh", stacked = True, ax = ax, width = 0.6,
                                                color = colors, edgecolor = "k", linewidth = 2, zorder = 2)
apply_plot_style(ax, x_label = "Number of Pokemon", grid_axis = "x")

filepath = os.path.join("abilities", "reg abilities distribution.png")
reg_abilities_fig.savefig(filepath, dpi = 200, bbox_inches='tight')
reg_abilities_fig.clf()

<Figure size 720x576 with 0 Axes>

In [19]:
top_hidden_abilities = hidden_ability_counter.sum(axis = 1).sort_values(ascending = False)[0:10].index

hidden_abilities_fig = plt.figure(figsize = [10, 8], facecolor = "white")
ax = hidden_abilities_fig.add_axes([0,0,1,1])

cmap = matplotlib.cm.get_cmap('Paired')
colors = [cmap(x) for x in np.linspace(0.0, 1.0, len(types_to_study))]

hidden_ability_counter.loc[top_hidden_abilities].plot(kind = "barh", stacked = True, ax = ax, width = 0.6,
                                                      color = colors, edgecolor = "k", linewidth = 2, zorder = 2)
apply_plot_style(ax, x_label = "Number of Pokemon", grid_axis = "x")
ax.set_yticklabels([x.replace(" (Hidden Ability)", "") for x in top_hidden_abilities])

filepath = os.path.join("abilities", "hidden abilities distribution.png")
hidden_abilities_fig.savefig(filepath, dpi = 200, bbox_inches='tight')
hidden_abilities_fig.clf()

<Figure size 720x576 with 0 Axes>