# Set-up SQLite Database

In [1]:
import sqlite3
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

print(sqlite3.version)
print(sqlite3.sqlite_version)

2.6.0
3.36.0


In [2]:
product = pd.read_csv('../data/product.csv')
product_price = pd.read_csv('../data/product_price.csv')
vendor = pd.read_csv('../data/vendor.csv')
store = pd.read_csv('../data/store.csv')
transaction = pd.read_csv('../data/transaction.csv')
county_table = pd.read_csv('../data/county.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
county_table.head()

Unnamed: 0,County,Year,Population,Income_PerCapita,LABORFORCE,EMPLOYMENT,UNEMPLOYMENT,UNEMPLOYMENT RATE
0,adair,2010,7679,35209.0,4190,3970,220,5.2
1,adams,2010,4023,37156.0,2270,2170,100,4.5
2,allamakee,2010,14377,34590.0,7960,7410,550,6.9
3,appanoose,2010,12861,29930.0,6340,5930,410,6.5
4,audubon,2010,6094,39455.0,3570,3380,180,5.1


In [4]:
store.rename(columns = {'Store Number':'StoreID',
                        'Store Name':'StoreName',
                        'Zip Code':'ZipCode'}, inplace = True)
store['StoreName'] = store['StoreName'].str.lower()

# Create store type
store['StoreType'] = 'General Store'
store.loc[store.StoreName.str.contains('liquor|spirits|wine|bottle|cellar|cork|casino|bar'), 'StoreType'] = 'Wine & Spirits'
store.loc[store.StoreName.str.contains('food|supermarket|grocery|market|dahl|super valu|sam|pantry|target|wal-mart'), 'StoreType'] = 'Food/General Store'
store.loc[store.StoreName.str.contains('drugstore|drug|pharmacy|drug store'), 'StoreType'] = 'Drugstore'
store.loc[store.StoreName.str.contains('convenience|honk|beverage|quick|gas|quik|pit stop|news and tobacco'), 'StoreType'] = 'Convenience'

In [5]:
# remove these columns
product = product.drop(['CategoryCode','CategoryName'], axis=1)

In [6]:
# rename to remove space
county_table.rename(columns = {'UNEMPLOYMENT RATE':'UNEMPLOYMENT_RATE'}, inplace = True)

In [7]:
print('product: ', product.columns)
print('product_price: ', product_price.columns)
print('vendor: ', vendor.columns)
print('store: ', store.columns)
print('transaction: ', transaction.columns)
print('county: ', county_table.columns)

product:  Index(['ProductID', 'ProductDescription', 'CategoryGroup', 'Proof',
       'VendorID'],
      dtype='object')
product_price:  Index(['ProductID', 'Date', 'StateBottleRetail', 'StateBottleCost'], dtype='object')
vendor:  Index(['VendorID', 'VendorName'], dtype='object')
store:  Index(['StoreID', 'StoreName', 'Address', 'City', 'ZipCode', 'County', 'lat',
       'lng', 'max', 'min', 'StoreType'],
      dtype='object')
transaction:  Index(['TransactionID', 'Date', 'StoreID', 'ProductID', 'VendorID', 'Pack',
       'BottleVolume_ml', 'BottlesSold', 'VolumeSold_Liters', 'Sale_Dollars'],
      dtype='object')
county:  Index(['County', 'Year', 'Population', 'Income_PerCapita', 'LABORFORCE',
       'EMPLOYMENT', 'UNEMPLOYMENT', 'UNEMPLOYMENT_RATE'],
      dtype='object')


In [8]:
# delete database (in folder) if you have added a table previously 
dbconn = sqlite3.connect('../data/IowaLiquorSales.db') # database
cursor = dbconn.cursor() # create a cursor to use to execute sql statements

In [9]:
# update to include final columns for each table
cursor.execute('''CREATE TABLE Product
                ([ProductID] TEXT PRIMARY KEY, 
                [ProductDescription] text, 
                [CategoryGroup] text, 
                [Proof] real, 
                [VendorID] integer)''')

cursor.execute('''CREATE TABLE Price
                ([ProductID] text, 
                [Date] date, 
                [StateBottleRetail] real, 
                [StateBottleCost] real,
                PRIMARY KEY(ProductID, Date, StateBottleRetail))''')

cursor.execute('''CREATE TABLE Vendor
                ([VendorID] INTEGER PRIMARY KEY, 
                [VendorName] text)''')

cursor.execute('''CREATE TABLE Store
                ([StoreID] INTEGER PRIMARY KEY, 
                [StoreName] text, 
                [StoreType] text,
                [Address] text, 
                [City] text, 
                [ZipCode] text, 
                [County] text, 
                [lat] real, 
                [lng] real, 
                [max] date, 
                [min] date)''')

cursor.execute('''CREATE TABLE Sales
                ([TransactionID]  TEXT PRIMARY KEY, 
                [Date] date, 
                [StoreID] integer, 
                [ProductID] text, 
                [VendorID] integer, 
                [Pack] integer,
                [BottleVolume_ml] real,
                [BottlesSold] integer, 
                [VolumeSold_Liters] real, 
                [Sale_Dollars] real)''')

cursor.execute('''CREATE TABLE County      
                ([County] text, 
                [Year] text, 
                [Population] integer, 
                [Income_PerCapita] integer, 
                [LABORFORCE] integer, 
                [EMPLOYMENT] integer,
                [UNEMPLOYMENT] real,
                [UNEMPLOYMENT_RATE] real,
                PRIMARY KEY(County, Year))''')

<sqlite3.Cursor at 0x7f166602c030>

In [10]:
product.to_sql('Product', dbconn, if_exists = 'append', index = False)
product_price.to_sql('Price', dbconn, if_exists = 'append', index = False)
vendor.to_sql('Vendor', dbconn, if_exists = 'append', index = False)
store.to_sql('Store', dbconn, if_exists = 'append', index = False)
transaction.to_sql('Sales', dbconn, if_exists = 'append', index = False)
county_table.to_sql('County', dbconn, if_exists = 'append', index = False)

In [11]:
dbconn.commit()

In [12]:
dbconn.close()