In [87]:
import pandas as pd

poke_data = pd.read_csv('./poke_data.csv')
poke_data.head(10)

Unnamed: 0,Name,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Type
0,Koffing,40,65,95,60,45,35,"""Poison"""
1,Pikachu,35,55,40,50,50,90,"""Electric"""
2,Shellder,30,65,100,45,25,40,"""Water"""
3,Krabby,30,105,90,25,25,50,"""Water"""
4,Voltorb,40,30,50,55,55,100,"""Electric"""
5,Cubone,50,50,95,40,50,35,"""Ground"""
6,Magikarp,20,10,55,15,20,80,"""Water"""
7,Pineco,50,65,90,35,35,15,"""Bug"""
8,Misdreavus,60,60,60,85,85,85,"""Ghost"""
9,Phanpy,90,60,60,40,40,40,"""Ground"""


In [88]:
type_data = poke_data["Type"].to_list()
type_data

[' "Poison"',
 ' "Electric"',
 ' "Water"',
 ' "Water"',
 ' "Electric"',
 ' "Ground"',
 ' "Water"',
 ' "Bug"',
 ' "Ghost"',
 ' "Ground"']

In [89]:
import numpy as np

def strings_to_onehot(categories_column):
    # First, we will build a list containing the categories
    # for that, we create an array with the unique elements in 
    unique_categories = list(set(categories_column))
    
    # We will create a one-hot matrix, the first step is to create a zero-matrix 
    # of dimmensions number of datapoints X number of categories
    one_hot_matrix = np.zeros( (len(categories_column), len(unique_categories)), dtype=int )
    
    # This loop sets to 1 the right slot in each row for every example in our data
    for row, category in zip(one_hot_matrix, categories_column):
        category_index = unique_categories.index(category)
        row[category_index] = 1

    # Now, let's build and return a DataFrame with the values
    return pd.DataFrame(columns = unique_categories, data = one_hot_matrix)

# onehot_types is the DataFrame with the categories in one-hot encoding
onehot_types = strings_to_onehot(type_data)
onehot_types

Unnamed: 0,"""Bug""","""Poison""","""Ghost""","""Water""","""Electric""","""Ground"""
0,0,1,0,0,0,0
1,0,0,0,0,1,0
2,0,0,0,1,0,0
3,0,0,0,1,0,0
4,0,0,0,0,1,0
5,0,0,0,0,0,1
6,0,0,0,1,0,0
7,1,0,0,0,0,0
8,0,0,1,0,0,0
9,0,0,0,0,0,1


In [90]:
# Now we will concatenate both pieces of data 
final_pokedata = pd.concat([poke_data, onehot_types],axis=1)
final_pokedata

Unnamed: 0,Name,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Type,"""Bug""","""Poison""","""Ghost""","""Water""","""Electric""","""Ground"""
0,Koffing,40,65,95,60,45,35,"""Poison""",0,1,0,0,0,0
1,Pikachu,35,55,40,50,50,90,"""Electric""",0,0,0,0,1,0
2,Shellder,30,65,100,45,25,40,"""Water""",0,0,0,1,0,0
3,Krabby,30,105,90,25,25,50,"""Water""",0,0,0,1,0,0
4,Voltorb,40,30,50,55,55,100,"""Electric""",0,0,0,0,1,0
5,Cubone,50,50,95,40,50,35,"""Ground""",0,0,0,0,0,1
6,Magikarp,20,10,55,15,20,80,"""Water""",0,0,0,1,0,0
7,Pineco,50,65,90,35,35,15,"""Bug""",1,0,0,0,0,0
8,Misdreavus,60,60,60,85,85,85,"""Ghost""",0,0,1,0,0,0
9,Phanpy,90,60,60,40,40,40,"""Ground""",0,0,0,0,0,1


## Alternative solution: Use built-in onehot encoding


In [91]:
poke_data = pd.read_csv('./poke_data.csv')
onehot_types = pd.get_dummies(poke_data['Type'])
poke_data = poke_data.join( onehot_types )
poke_data

Unnamed: 0,Name,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Type,"""Bug""","""Electric""","""Ghost""","""Ground""","""Poison""","""Water"""
0,Koffing,40,65,95,60,45,35,"""Poison""",0,0,0,0,1,0
1,Pikachu,35,55,40,50,50,90,"""Electric""",0,1,0,0,0,0
2,Shellder,30,65,100,45,25,40,"""Water""",0,0,0,0,0,1
3,Krabby,30,105,90,25,25,50,"""Water""",0,0,0,0,0,1
4,Voltorb,40,30,50,55,55,100,"""Electric""",0,1,0,0,0,0
5,Cubone,50,50,95,40,50,35,"""Ground""",0,0,0,1,0,0
6,Magikarp,20,10,55,15,20,80,"""Water""",0,0,0,0,0,1
7,Pineco,50,65,90,35,35,15,"""Bug""",1,0,0,0,0,0
8,Misdreavus,60,60,60,85,85,85,"""Ghost""",0,0,1,0,0,0
9,Phanpy,90,60,60,40,40,40,"""Ground""",0,0,0,1,0,0
