In [1]:
import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize

# https://github.com/route1rodent/swordshield-data
with open('data/pokemon_gen8.json') as json_file:
    rows_json = json.load(json_file)
    print(len(rows_json))

524


In [2]:
rows_json[110]

{'id': 236,
 'name': 'Tyrogue',
 'stage': 1,
 'galar_dex': '107',
 'base_stats': [35, 35, 35, 35, 35, 35],
 'ev_yield': [0, 1, 0, 0, 0, 0],
 'abilities': ['Guts', 'Steadfast', 'Vital Spirit'],
 'types': ['Fighting'],
 'items': [],
 'exp_group': 'MediumFast',
 'egg_groups': ['Undiscovered'],
 'hatch_cycles': 25,
 'height': 0.7,
 'weight': 21,
 'color': 'Purple',
 'level_up_moves': [[1, 'Tackle'],
  [1, 'Helping Hand'],
  [1, 'Fake Out'],
  [1, 'Focus Energy']],
 'egg_moves': ['Rapid Spin',
  'High Jump Kick',
  'Mach Punch',
  'Mind Reader',
  'Counter',
  'Vacuum Wave',
  'Bullet Punch',
  'Feint'],
 'tms': [0, 1, 21, 22, 23, 24, 25, 31, 33, 34, 39, 40, 41, 43, 75, 76, 79, 81],
 'trs': [1, 7, 10, 13, 20, 26, 27, 35, 48, 85],
 'evolutions': [{'species': 'Hitmonchan-0',
   'method': 'LevelUpAeqD',
   'method_value': '0'},
  {'species': 'Hitmonlee-0', 'method': 'LevelUpATK', 'method_value': '0'},
  {'species': 'Hitmontop-0', 'method': 'LevelUpDEF', 'method_value': '0'}],

In [3]:
def get_max_lengths(rows_json, cols):
    """ given json data and names of array columns, 
    return max array length for each column name
    """
    max_lengths = dict(zip(cols, [0]*len(cols))) # init all with zeros
    for row in rows_json:
        for col in cols:
            length = len(row[col])
            max_lengths[col] = max(max_lengths[col], length)
    return max_lengths

get_max_lengths(rows_json, cols = ['abilities', 'types', 'items', 'egg_groups','level_up_moves'])

{'abilities': 3, 'types': 2, 'items': 3, 'egg_groups': 2, 'level_up_moves': 31}

In [4]:
full_df = json_normalize(rows_json)
cols = [
    'id','name','stage','galar_dex','exp_group',
    'hatch_cycles','height','weight','color',
    'description'
]
df = full_df[cols]

# derived column names
stats = ['hp','atk','def','spa','spd','spe']
ev_stats = ['ev_'+s for s in stats]
abilities = ['ability1','ability2','ability3'] 
types = ['type1','type2']
egg_groups = ['egg_group1','egg_group2']
item_colnames = [
    'item1_name','item1_chance','item2_name','item2_chance',
    'item3_name','item3_chance'
]

def build_df_from_array_series(s, colnames):
    """ convert series of arrays [[1,2],[3,4]] into df [[1,2],[3,4]] """
    # https://stackoverflow.com/a/35491399
    return pd.DataFrame(s.values.tolist(), columns=colnames)


def convert_item_series_element(e):
    """
    convert series [[], [['a', 50], ['b', 5], ['c', 1]]] 
    to df of 6 cols and 2 rows
    """
    
    output = []
    for item_chance in e:
        if len(item_chance) < 2:
            print(item_chance)
            continue
        item, chance = item_chance
        output += [item, chance]
    # fill up to 6 items
    while len(output) < 6:
        output.append(None)
    return output


df = pd.concat(
    [
        df, 
        build_df_from_array_series(full_df['base_stats'], stats),
        build_df_from_array_series(full_df['ev_yield'], ev_stats),
        build_df_from_array_series(full_df['abilities'], abilities),
        build_df_from_array_series(full_df['types'], types),
        build_df_from_array_series(full_df['items'].apply(convert_item_series_element), item_colnames),
        build_df_from_array_series(full_df['egg_groups'], egg_groups),
        pd.DataFrame({'n_levelup_moves': full_df['level_up_moves'].apply(len)}),
        pd.DataFrame({'n_egg_moves': full_df['egg_moves'].apply(len)}),
        pd.DataFrame({'n_tms': full_df['tms'].apply(len)}),
        pd.DataFrame({'n_trs': full_df['trs'].apply(len)}),
        pd.DataFrame({'n_evolutions': full_df['evolutions'].apply(len)}),
    ], 
    axis=1
)

df.head()

Unnamed: 0,id,name,stage,galar_dex,exp_group,hatch_cycles,height,weight,color,description,...,item2_chance,item3_name,item3_chance,egg_group1,egg_group2,n_levelup_moves,n_egg_moves,n_tms,n_trs,n_evolutions
0,1,Bulbasaur,1,foreign,MediumSlow,20,0.7,6.9,Green,"While it is young, it uses the nutrients that ...",...,,,,Monster,Grass,15,6,19,13,1
1,2,Ivysaur,2,foreign,MediumSlow,20,1.0,13.0,Green,Exposure to sunlight adds to its strength. Sun...,...,,,,Monster,Grass,15,0,19,13,1
2,3,Venusaur,3,foreign,MediumSlow,20,2.0,100.0,Green,A bewitching aroma wafts from its flower. The ...,...,,,,Monster,Grass,18,0,23,16,0
3,4,Charmander,1,378,MediumSlow,20,0.6,8.5,Red,"From the time it is born, a flame burns at the...",...,,,,Monster,Dragon,12,8,27,17,1
4,5,Charmeleon,2,379,MediumSlow,20,1.1,19.0,Red,"If it becomes agitated during battle, it spout...",...,,,,Monster,Dragon,12,0,27,17,1


In [5]:
df.to_csv('data/pokemon_gen8.csv',index=False)

In [6]:
ddf = pd.read_csv('data/pokemon_gen8.csv')
ddf.head()

Unnamed: 0,id,name,stage,galar_dex,exp_group,hatch_cycles,height,weight,color,description,...,item2_chance,item3_name,item3_chance,egg_group1,egg_group2,n_levelup_moves,n_egg_moves,n_tms,n_trs,n_evolutions
0,1,Bulbasaur,1,foreign,MediumSlow,20,0.7,6.9,Green,"While it is young, it uses the nutrients that ...",...,,,,Monster,Grass,15,6,19,13,1
1,2,Ivysaur,2,foreign,MediumSlow,20,1.0,13.0,Green,Exposure to sunlight adds to its strength. Sun...,...,,,,Monster,Grass,15,0,19,13,1
2,3,Venusaur,3,foreign,MediumSlow,20,2.0,100.0,Green,A bewitching aroma wafts from its flower. The ...,...,,,,Monster,Grass,18,0,23,16,0
3,4,Charmander,1,378,MediumSlow,20,0.6,8.5,Red,"From the time it is born, a flame burns at the...",...,,,,Monster,Dragon,12,8,27,17,1
4,5,Charmeleon,2,379,MediumSlow,20,1.1,19.0,Red,"If it becomes agitated during battle, it spout...",...,,,,Monster,Dragon,12,0,27,17,1
