In [2]:
import sqlite3
conn = sqlite3.connect('final.db')
cur = conn.cursor()

In [3]:
def fetch_table_names(cur: sqlite3.Cursor) -> [str]:
    '''
    Retrieves all tables in the database
    '''
    
    cur.execute('SELECT name FROM sqlite_master WHERE type = \'table\';')
    return [x[0] for x in cur.fetchall()]

def fetch_column_names(cur: sqlite3.Cursor, table: str) -> [str]:
    '''
    Retrieves all columns in a table
    '''
    
    cur.execute('PRAGMA table_info(' + table + ');')
    return [x[1] for x in cur.fetchall()]

def fetch_col_values(cur: sqlite3.Cursor, table: str, col: str) -> []:
    '''
    Returns all values for a specific column
    '''
    
    cur.execute('SELECT ' + col + ' FROM ' + table + ';')
    return [x[0] for x in cur.fetchall()]

In [4]:
# Review:
# Database contains tables
# You can think of these tables as a database version of a dataframe
# Therefore, tables contain columns and rows (each entry)

In [5]:
# Retrieves all possible tables in the database
fetch_table_names(cur)

['SCHOOL_TRAIN', 'SCHOOL_TEST', 'POKEMON_TRAIN', 'POKEMON_TEST']

In [6]:
# Retrieves all possible columns in a table
fetch_column_names(cur, 'POKEMON_TRAIN')[:10]

['rowid',
 'unique_id',
 'types',
 'abilities',
 'base_happiness',
 'height_m',
 'weight_kg',
 'poke_stats',
 'is_legendary']

In [7]:
fetch_column_names(cur, 'POKEMON_TEST')[:10]

['rowid',
 'unique_id',
 'types',
 'abilities',
 'base_happiness',
 'height_m',
 'weight_kg',
 'poke_stats']

In [8]:
# Retrieves all values in a column
fetch_col_values(cur, 'POKEMON_TRAIN', 'poke_stats')[:1]

['nan, 80.0, 55.0, 25.0, 35.0, 35.0']

In [9]:
# Note: You can fetch all the values at once with SELECT * FROM TABLE_NAME;
# but this is NOT a good idea if you have a ton of data
# And since I have 1500 columns, I will not do such

In [10]:
import pandas as pd
train = pd.DataFrame()

# You now know how to easily query (fetch) column data from a database
# And also how to find all possible column names
# How can you use this knowledge to construct a dataframe?
# Good luck

In [11]:
train = pd.read_sql_query(
'''select
rowid,
unique_id,
types,
abilities,
base_happiness,
height_m,
weight_kg,
poke_stats,
is_legendary
from POKEMON_TRAIN''', conn)
test = pd.read_sql_query(
'''select
rowid,
unique_id,
types,
abilities,
base_happiness,
height_m,
weight_kg,
poke_stats
from POKEMON_TEST''', conn)

In [12]:
train.head()

Unnamed: 0,rowid,unique_id,types,abilities,base_happiness,height_m,weight_kg,poke_stats,is_legendary
0,1,665,"fighting, nan","['Guts', 'Sheer Force', 'Iron Fist']",70.0,0.6,12.5,"nan, 80.0, 55.0, 25.0, 35.0, 35.0",0
1,2,288,"dragon, nan","['Bulletproof', 'Soundproof', 'Overcoat']",70.0,0.6,29.7,"nan, nan, 65.0, 45.0, 45.0, 45.0",0
2,3,349,"poison, fire","['Corrosion', 'Oblivious']",70.0,1.2,22.2,"nan, 64.0, 60.0, 111.0, 60.0, 117.0",0
3,4,260,"ground, nan","['Rock Head', 'Lightningrod', 'Battle Armor']",70.0,0.4,6.5,"nan, 50.0, 95.0, 40.0, 50.0, 35.0",0
4,5,536,"water, nan","['Torrent', 'Sheer Force']",70.0,0.6,9.5,"nan, 65.0, 64.0, 44.0, 48.0, 43.0",0


In [13]:
test.head()

Unnamed: 0,rowid,unique_id,types,abilities,base_happiness,height_m,weight_kg,poke_stats
0,1,602,"grass, nan","['Overgrow', 'Contrary']",70.0,0.6,8.1,"nan, nan, 55.0, 45.0, nan, 63.0"
1,2,756,"fairy, flying","['Hustle', 'Serene Grace', 'Super Luck']",70.0,1.5,38.0,"nan, nan, 95.0, 120.0, 115.0, 80.0"
2,3,191,"electric, nan","['Static', 'Plus']",70.0,1.4,61.5,"nan, 95.0, 105.0, 165.0, nan, 45.0"
3,4,16,"psychic, nan",['Levitate'],,0.2,0.6,"nan, 30.0, 50.0, 65.0, nan, 45.0"
4,5,718,"fighting, psychic","['Pure Power', 'Telepathy']",,1.3,31.5,"nan, 100.0, 85.0, 80.0, 85.0, 100.0"


In [14]:
print(len(train),len(test))

520 281


What to do: separate the different list categories(types, abilities, poke_stats), plot things out, fill null values

'i'