In [1]:
import pandas as pd
import numpy as np

In [2]:
import sqlite3
conn = sqlite3.connect('final.db')
cur = conn.cursor()

In [3]:
def fetch_table_names(cur: sqlite3.Cursor) -> [str]:
    '''
    Retrieves all tables in the database
    '''
    
    cur.execute('SELECT name FROM sqlite_master WHERE type = \'table\';')
    return [x[0] for x in cur.fetchall()]

def fetch_column_names(cur: sqlite3.Cursor, table: str) -> [str]:
    '''
    Retrieves all columns in a table
    '''
    
    cur.execute('PRAGMA table_info(' + table + ');')
    return [x[1] for x in cur.fetchall()]

def fetch_col_values(cur: sqlite3.Cursor, table: str, col: str) -> []:
    '''
    Returns all values for a specific column
    '''
    
    cur.execute('SELECT ' + col + ' FROM ' + table + ';')
    return [x[0] for x in cur.fetchall()]

In [4]:
columns_train = fetch_column_names(cur, 'POKEMON_TRAIN')

In [5]:
columns_test = fetch_column_names(cur, 'POKEMON_TEST')

In [6]:
def create_df(cursor, db_name, columns):
    df = pd.DataFrame()
    for i in columns:
        df[i] = i
        df[i] = np.array(fetch_col_values(cursor, db_name, i))
    
    return df

In [7]:
testDF = create_df(cur, 'POKEMON_TEST', columns_test)

In [8]:
trainDF = create_df(cur, 'POKEMON_TRAIN', columns_train)

In [9]:
testDF.head()

Unnamed: 0,rowid,unique_id,types,abilities,base_happiness,height_m,weight_kg,poke_stats
0,1,602,"grass, nan","['Overgrow', 'Contrary']",70.0,0.6,8.1,"nan, nan, 55.0, 45.0, nan, 63.0"
1,2,756,"fairy, flying","['Hustle', 'Serene Grace', 'Super Luck']",70.0,1.5,38.0,"nan, nan, 95.0, 120.0, 115.0, 80.0"
2,3,191,"electric, nan","['Static', 'Plus']",70.0,1.4,61.5,"nan, 95.0, 105.0, 165.0, nan, 45.0"
3,4,16,"psychic, nan",['Levitate'],,0.2,0.6,"nan, 30.0, 50.0, 65.0, nan, 45.0"
4,5,718,"fighting, psychic","['Pure Power', 'Telepathy']",,1.3,31.5,"nan, 100.0, 85.0, 80.0, 85.0, 100.0"


In [10]:
poke_stats = trainDF['poke_stats']

In [11]:
#type(poke_stats), poke_stats.dtype

In [12]:
stat_type = ['HP', 'Atk', 'Def', 'SpA', 'SpD', 'Spe']

In [13]:
#def seperate_stats(stats, df, col_names):
#    col_pos = 0
#    for string in stats.size:
#        for type in stat_type:
#            for attr in string:
#                df[type] = attr
#            

In [14]:
#for i in stat_type:
#    trainDF[i] = i

In [15]:
stat_list = []
for i in range(520):
    stat_list.append(poke_stats.iloc[i].split(', '))

In [16]:
#hi[0]

In [17]:
trainDF1 = pd.DataFrame(stat_list, columns=stat_type)

In [18]:
trainDF1.head()


Unnamed: 0,HP,Atk,Def,SpA,SpD,Spe
0,,80.0,55.0,25.0,35.0,35.0
1,,,65.0,45.0,45.0,45.0
2,,64.0,60.0,111.0,60.0,117.0
3,,50.0,95.0,40.0,50.0,35.0
4,,65.0,64.0,44.0,48.0,43.0


In [20]:
def combine_dfs(df1, df2):
    #adds df2 columns to the end of df1
    cols = df2.columns
    for col_names in cols:
        df1[col_names] = df2[col_names]
    return df1

In [21]:
trainDF = combine_dfs(trainDF, trainDF1)

In [24]:
trainDF = trainDF.drop(columns='poke_stats')

In [25]:
trainDF.head()

Unnamed: 0,rowid,unique_id,types,abilities,base_happiness,height_m,weight_kg,is_legendary,HP,Atk,Def,SpA,SpD,Spe
0,1,665,"fighting, nan","['Guts', 'Sheer Force', 'Iron Fist']",70.0,0.6,12.5,0,,80.0,55.0,25.0,35.0,35.0
1,2,288,"dragon, nan","['Bulletproof', 'Soundproof', 'Overcoat']",70.0,0.6,29.7,0,,,65.0,45.0,45.0,45.0
2,3,349,"poison, fire","['Corrosion', 'Oblivious']",70.0,1.2,22.2,0,,64.0,60.0,111.0,60.0,117.0
3,4,260,"ground, nan","['Rock Head', 'Lightningrod', 'Battle Armor']",70.0,0.4,6.5,0,,50.0,95.0,40.0,50.0,35.0
4,5,536,"water, nan","['Torrent', 'Sheer Force']",70.0,0.6,9.5,0,,65.0,64.0,44.0,48.0,43.0


In [32]:
trainDF1.head()

Unnamed: 0,HP,Atk,Def,SpA,SpD,Spe
0,,80.0,55.0,25.0,35.0,35.0
1,,,65.0,45.0,45.0,45.0
2,,64.0,60.0,111.0,60.0,117.0
3,,50.0,95.0,40.0,50.0,35.0
4,,65.0,64.0,44.0,48.0,43.0


In [131]:
trainDF = trainDF.drop(columns='poke_stats')

Unnamed: 0,rowid,unique_id,types,abilities,base_happiness,height_m,weight_kg,HP,Atk,SpA,SpD,Spe
0,1,665,"fighting, nan","['Guts', 'Sheer Force', 'Iron Fist']",70.0,0.6,12.5,,134.0,95.0,100.0,100.0
1,2,288,"dragon, nan","['Bulletproof', 'Soundproof', 'Overcoat']",70.0,0.6,29.7,,134.0,95.0,100.0,100.0
2,3,349,"poison, fire","['Corrosion', 'Oblivious']",70.0,1.2,22.2,,134.0,95.0,100.0,100.0
3,4,260,"ground, nan","['Rock Head', 'Lightningrod', 'Battle Armor']",70.0,0.4,6.5,,134.0,95.0,100.0,100.0
4,5,536,"water, nan","['Torrent', 'Sheer Force']",70.0,0.6,9.5,,134.0,95.0,100.0,100.0


In [142]:
abilities = trainDF['abilities']
type(abilities)

pandas.core.series.Series

In [146]:
trainDF.HP[:50]

0     nan
1     nan
2     nan
3     nan
4     nan
5     nan
6     nan
7     nan
8     nan
9     nan
10    nan
11    nan
12    nan
13    nan
14    nan
15    nan
16    nan
17    nan
18    nan
19    nan
20    nan
21    nan
22    nan
23    nan
24    nan
25    nan
26    nan
27    nan
28    nan
29    nan
30    nan
31    nan
32    nan
33    nan
34    nan
35    nan
36    nan
37    nan
38    nan
39    nan
40    nan
41    nan
42    nan
43    nan
44    nan
45    nan
46    nan
47    nan
48    nan
49    nan
Name: HP, dtype: object

In [45]:
trainDF.isna().any()

rowid             False
unique_id         False
types             False
abilities         False
base_happiness     True
height_m           True
weight_kg          True
poke_stats        False
dtype: bool

AttributeError: 'numpy.ndarray' object has no attribute 'info'