In [1]:
### Utility Functions
from IPython.display import display, HTML
import pandas as pd
import sqlite3
from sqlite3 import Error

def create_connection(db_file, delete_db=False):
    import os
    if delete_db and os.path.exists(db_file):
        os.remove(db_file)

    conn = None
    try:
        conn = sqlite3.connect(db_file)
        conn.execute("PRAGMA foreign_keys = 1")
    except Error as e:
        print(e)

    return conn


def create_table(conn, create_table_sql, drop_table_name=None):
    
    if drop_table_name: # You can optionally pass drop_table_name to drop the table. 
        try:
            c = conn.cursor()
            c.execute("""DROP TABLE IF EXISTS %s""" % (drop_table_name))
        except Error as e:
            print(e)
    
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
def execute_sql_statement(sql_statement, conn):
    cur = conn.cursor()
    cur.execute(sql_statement)

    rows = cur.fetchall()

    return rows


In [3]:
def create_postal_table(data_filename, normalized_database_filename):

    conn = create_connection('income.db')
    
    create_table_postal_sql = """ CREATE TABLE [income] (
    [State] TEXT NOT NULL PRIMARY KEY,
    [Median_Household_Income] TEXT NOT NULL
    ); """
    
    def insert_postal(conn, values):
        insert_postal_sql = ''' INSERT INTO income(State,Median_Household_Income) 
        VALUES(?, ?) '''
        cur = conn.cursor()
        cur.executemany(insert_postal_sql, values)
        return cur.lastrowid        

    with open(data_filename, 'r') as f:
        postal_lst = []
        for line in f:
            postal_lst.append(line.rstrip('\n').split('\t'))
        postal_lst=sorted(postal_lst[1:])
        #print(postal_lst[1:10])
        
    with conn:
        lst_tup_reg=[]
        lst=[]
        create_table(conn, create_table_postal_sql,'income')
        for i in range(len(postal_lst)):
            lst_tup_reg.append(tuple(''.join(postal_lst[i]).split(',')))
        #print(lst_tup_reg[1:10])
        insert_postal(conn,lst_tup_reg)

In [4]:
data_filename = 'median_household_income.csv'
normalized_database_filename = 'income.db'
create_postal_table(data_filename, normalized_database_filename)
conn = create_connection(normalized_database_filename)
cur = conn.cursor()
df = pd.read_sql_query("""SELECT * FROM income""", conn)
print(df)
conn.close()

                   State Median_Household_Income
0                Alabama                   54393
1                 Alaska                   74476
2                Arizona                   66628
3               Arkansas                   50540
4             California                   77358
5               Colorado                   82611
6            Connecticut                   79043
7               Delaware                   69132
8   District of Columbia                   88311
9                Florida                   57435
10               Georgia                   58952
11                Hawaii                   80729
12                 Idaho                   66499
13              Illinois                   73753
14               Indiana                   66360
15                  Iowa                   68469
16                Kansas                   72815
17              Kentucky                   56525
18             Louisiana                   50935
19                 M