# Reading database into csv

In [1]:
import sqlite3
conn = sqlite3.connect('final.db')
cur = conn.cursor()

In [2]:
# Methods
def fetch_table_names(cur: sqlite3.Cursor) -> [str]:
    '''
    Retrieves all tables in the database
    '''
    
    cur.execute('SELECT name FROM sqlite_master WHERE type = \'table\';')
    return [x[0] for x in cur.fetchall()]

def fetch_column_names(cur: sqlite3.Cursor, table: str) -> [str]:
    '''
    Retrieves all columns in a table
    '''
    
    cur.execute('PRAGMA table_info(' + table + ');')
    return [x[1] for x in cur.fetchall()]

def fetch_col_values(cur: sqlite3.Cursor, table: str, col: str) -> []:
    '''
    Returns all values for a specific column
    '''
    
    cur.execute('SELECT ' + col + ' FROM ' + table + ';')
    return [x[0] for x in cur.fetchall()]

In [3]:
fetch_table_names(cur)

['SCHOOL_TRAIN', 'SCHOOL_TEST', 'POKEMON_TRAIN', 'POKEMON_TEST']

In [4]:
names = fetch_column_names(cur, 'POKEMON_TRAIN')
names

['rowid',
 'unique_id',
 'types',
 'abilities',
 'base_happiness',
 'height_m',
 'weight_kg',
 'poke_stats',
 'is_legendary']

In [5]:
import pandas as pd
import numpy as np
unclean = pd.DataFrame()

In [6]:
for name in names:
    unclean[name] = fetch_col_values(cur, 'POKEMON_TRAIN', name)

In [8]:
unclean.to_csv('Pokemon_unclean.csv', index=False)

# Cleaning

In [15]:
unclean

Unnamed: 0,rowid,unique_id,abilities,base_happiness,height_m,weight_kg,poke_stats,is_legendary,fighting,flying,...,bug,ice,grass,dark,fairy,ghost,ground,poison,psychic,water
0,1,665,"['Guts', 'Sheer Force', 'Iron Fist']",70.0,0.6,12.5,"nan, 80.0, 55.0, 25.0, 35.0, 35.0",0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,288,"['Bulletproof', 'Soundproof', 'Overcoat']",70.0,0.6,29.7,"nan, nan, 65.0, 45.0, 45.0, 45.0",0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,349,"['Corrosion', 'Oblivious']",70.0,1.2,22.2,"nan, 64.0, 60.0, 111.0, 60.0, 117.0",0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,260,"['Rock Head', 'Lightningrod', 'Battle Armor']",70.0,0.4,6.5,"nan, 50.0, 95.0, 40.0, 50.0, 35.0",0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,5,536,"['Torrent', 'Sheer Force']",70.0,0.6,9.5,"nan, 65.0, 64.0, 44.0, 48.0, 43.0",0,0,0,...,0,0,0,0,0,0,0,0,0,1
5,6,293,['Turboblaze'],0.0,3.2,330.0,"nan, 120.0, 100.0, 150.0, 120.0, 90.0",1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,181,['Levitate'],70.0,1.1,15.3,"nan, 70.0, 50.0, 50.0, 50.0, 70.0",0,0,0,...,0,0,0,0,0,0,1,0,0,0
7,8,554,"['Poison Point', 'Rivalry', 'Sheer Force']",70.0,1.3,60.0,"nan, 92.0, 87.0, 75.0, 85.0, 76.0",0,0,0,...,0,0,0,0,0,0,1,1,0,0
8,9,507,['Battery'],70.0,0.5,10.5,"nan, 82.0, 95.0, 55.0, 75.0, 36.0",0,0,0,...,1,0,0,0,0,0,0,0,0,0
9,10,140,['Beast Boost'],0.0,1.2,55.5,"109, 53.0, 47.0, 127.0, 131.0, 103.0",1,0,0,...,0,0,0,0,0,0,0,1,0,0


### Get unique pokemon types in this df

In [10]:
unq = set()
main = []
for row in unclean['types']:
    temp = row.split(', ')
    main.append(temp)
    for typ in temp:
        unq.add(typ)
print(unq)

for typ in unq:
    unclean[typ] = 0

{'fighting', 'nan', 'flying', 'fire', 'steel', 'dragon', 'rock', 'normal', 'electric', 'bug', 'ice', 'grass', 'dark', 'fairy', 'ghost', 'ground', 'poison', 'psychic', 'water'}


### Fills in pokemon types in df
1 if pokemon is of that type, 0 else

In [12]:
for typ in unq:
    i = 0
    for var in unclean[typ]:
        first,second = main[i][0],main[i][1]
        if (first == typ) or (second == typ):
            unclean[typ][i] = 1
        i += 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
unclean['poke_stats'][0]

'nan, 80.0, 55.0, 25.0, 35.0, 35.0'

In [43]:
test = unclean['poke_stats'][0].split(', ')
test

['nan', '80.0', '55.0', '25.0', '35.0', '35.0']

In [44]:
for i in range(0,len(test)):
    ex = test[i][:-2]
print(test)

['nan', '80.0', '55.0', '25.0', '35.0', '35.0']


del unclean['nan']
del unclean['types']
unclean.to_csv('Pokemon_clean_0.csv', index=False)