In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20})

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Import Liquor Sales database

In [None]:
sales = pd.read_csv('./data/Iowa_Liquor_Sales.csv') 

In [None]:
sales.columns

In [None]:
sales.head(10)

In [None]:
product_df = pd.read_csv('data/Iowa_Liquor_Products.csv')

In [None]:
sales.columns = sales.columns.str.replace(' ','')
product_df.columns = product_df.columns.str.replace(' ','')

# Products Table

In [None]:
product_col = ['ItemNumber',
            'ItemDescription',
            'Category',
            'CategoryName',
            'Pack',
            'BottleVolume(ml)',
            'VendorNumber']

product = sales[product_col].copy()

In [None]:
# Data Cleaning
product.rename(columns = {'BottleVolume(ml)':'BottleVolume'}, inplace = True)


In [None]:
product.to_csv('data/product_final.csv', index_label = False)

# Product Price Table

In [None]:
product_price_col = ['ItemNumber',
                  'Date',
                  'StateBottleRetail',
                  'StateBottleCost']

product_price = sales[product_price_col].copy()

In [None]:
# Data Cleaning

In [None]:
product_price.to_csv('data/product_price_final.csv', index_label = False)

# Vendors Table

In [None]:
vendor_col = ['VendorNumber',
              'VendorName']

vendor = sales[vendor_col].copy()

In [None]:
# Data Cleaning

In [None]:
vendor.to_csv('data/vendor_final.csv', index_label = False)

# Stores Table

In [None]:
store_col = ['StoreNumber',
             'StoreName',
             'Address',
             'City',
             'ZipCode',
             'StoreLocation',
             'CountyNumber',
             'County']

store = sales[store_col].drop_duplicates(store_col, keep = 'first').copy()

store.dropna(inplace = True)

In [None]:
# Data Cleaning

In [None]:
store.to_csv('data/store_final.csv', index_label = False)

# Transactions Table

In [None]:
transaction_col = ['Invoice/ItemNumber',
                   'Date',
                   'StoreNumber',
                   'ItemNumber',
                   'VendorNumber',
                   'Category',
                   'BottlesSold',
                   'VolumeSold(Gallons)',
                   'Sale(Dollars)']

transaction = sales[transaction_col].copy()

In [None]:
# Data Cleaning
transaction.rename(columns = {'Invoice/ItemNumber':'ItemNumber',
                              'ItemNumber':'ProductNumber',
                            'VolumeSold(Gallons)':'VolumeSold',
                             'Sale(Dollars)':'Sale'}, inplace = True)

In [None]:
transaction.to_csv('data/transaction_final.csv', index_label = False)

# Set-up SQLite Database

In [1]:
import sqlite3
import pandas as pd

print(sqlite3.version)
print(sqlite3.sqlite_version)

2.6.0
3.35.4


In [2]:
product = pd.read_csv('data/product_final.csv')
product_price = pd.read_csv('data/product_price_final.csv')
vendor = pd.read_csv('data/vendor_final.csv')
store = pd.read_csv('data/store_final.csv')
transaction = pd.read_csv('data/transaction_final.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [3]:
dbconn = sqlite3.connect('IowaLiquorSales.db') # database
cursor = dbconn.cursor() # create a cursor to use to execute sql statements

In [4]:
cursor.execute('''CREATE TABLE Product
                (ItemNumber, ItemDescription, Category, CategoryName, Pack, BottleVolume, VendorNumber)''')

cursor.execute('''CREATE TABLE Price
                (ItemNumber, Date, StateBottleRetail, StateBottleCost)''')

cursor.execute('''CREATE TABLE Vendor
                (VendorNumber, VendorName)''')

cursor.execute('''CREATE TABLE Store
                (StoreNumber, StoreName, Address, City, ZipCode, StoreLocation, CountyNumber, County)''')

cursor.execute('''CREATE TABLE Transactions
                (ItemNumber, Date, StoreNumber, ProductNumber, VendorNumber, Category, BottlesSold, VolumeSold, Sale)''')

<sqlite3.Cursor at 0x7fb6391b4f10>

In [None]:
transaction.head()

In [5]:
product.to_sql('Product', dbconn, if_exists = 'append', index = False)
product_price.to_sql('Price', dbconn, if_exists = 'append', index = False)
vendor.to_sql('Vendor', dbconn, if_exists = 'append', index = False)
store.to_sql('Store', dbconn, if_exists = 'append', index = False)
transaction.to_sql('Transactions', dbconn, if_exists = 'append', index = False)

In [7]:
dbconn.commit()

In [8]:
dbconn.commit()

In [9]:
dbconn.close()