In [1]:
### Utility Functions
from IPython.display import display, HTML
import pandas as pd
import sqlite3
from sqlite3 import Error

def create_connection(db_file, delete_db=False):
    import os
    if delete_db and os.path.exists(db_file):
        os.remove(db_file)

    conn = None
    try:
        conn = sqlite3.connect(db_file)
        conn.execute("PRAGMA foreign_keys = 1")
    except Error as e:
        print(e)

    return conn


def create_table(conn, create_table_sql, drop_table_name=None):
    
    if drop_table_name: # You can optionally pass drop_table_name to drop the table. 
        try:
            c = conn.cursor()
            c.execute("""DROP TABLE IF EXISTS %s""" % (drop_table_name))
        except Error as e:
            print(e)
    
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
def execute_sql_statement(sql_statement, conn):
    cur = conn.cursor()
    cur.execute(sql_statement)

    rows = cur.fetchall()

    return rows


In [9]:
def create_postal_table(data_filename, normalized_database_filename):

    conn = create_connection('postal.db')
    
    create_table_postal_sql = """ CREATE TABLE [postal] (
    [State] TEXT NOT NULL PRIMARY KEY,
    [State_Code] TEXT NOT NULL,
    [Region] TEXT NOT NULL,
    [Division] TEXT NOT NULL
    ); """
    
    def insert_postal(conn, values):
        insert_postal_sql = ''' INSERT INTO postal(State,State_Code,Region,Division) 
        VALUES(?, ?, ?, ?) '''
        cur = conn.cursor()
        cur.executemany(insert_postal_sql, values)
        return cur.lastrowid        

    with open(data_filename, 'r') as f:
        postal_lst = []
        for line in f:
            postal_lst.append(line.rstrip('\n').split('\t'))
        postal_lst=sorted(postal_lst[1:])
        print(postal_lst[1:10])
        
    with conn:
        lst_tup_reg=[]
        lst=[]
        create_table(conn, create_table_postal_sql,'postal')
        for i in range(len(postal_lst)):
            lst_tup_reg.append(tuple(''.join(postal_lst[i]).split(',')))
        #print(lst_tup_reg[1:10])
        insert_postal(conn,lst_tup_reg)

In [10]:
data_filename = 'us_census_bureau_regions_and_divisions.csv'
normalized_database_filename = 'postal.db'
create_postal_table(data_filename, normalized_database_filename)
conn = create_connection(normalized_database_filename)
cur = conn.cursor()
df = pd.read_sql_query("""SELECT * FROM postal""", conn)
print(df)
conn.close()

[['Alaska,AK,West,Pacific'], ['Arizona,AZ,West,Mountain'], ['Arkansas,AR,South,West South Central'], ['California,CA,West,Pacific'], ['Colorado,CO,West,Mountain'], ['Connecticut,CT,Northeast,New England'], ['Delaware,DE,South,South Atlantic'], ['District of Columbia,DC,South,South Atlantic'], ['Florida,FL,South,South Atlantic']]
                   State State_Code     Region            Division
0                Alabama         AL      South  East South Central
1                 Alaska         AK       West             Pacific
2                Arizona         AZ       West            Mountain
3               Arkansas         AR      South  West South Central
4             California         CA       West             Pacific
5               Colorado         CO       West            Mountain
6            Connecticut         CT  Northeast         New England
7               Delaware         DE      South      South Atlantic
8   District of Columbia         DC      South      South Atlantic