In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sqlite3

plt.style.use('ggplot')
plt.rcParams.update({'font.size': 12})

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Data Analysis

In [None]:
# Connect SQLite.
dbconn = sqlite3.connect('../data/IowaLiquorSales.db')
cursor = dbconn.cursor()

# Pull Store database.
query = '''SELECT Sales.Date, Sales.ProductID, Product.CategoryGroup, Sales.VendorID, Product.Proof, Sales.StoreID, Sales.VolumeSold_Liters, Sales.Sale_Dollars, Price.StateBottleRetail
              FROM ((Sales 
              INNER JOIN Product ON Sales.ProductID=Product.ProductID)
              INNER JOIN Price ON Sales.ProductID=Price.ProductID and Sales.Date=Price.Date)
              WHERE Sales.Date BETWEEN '2019-01-01' AND '2019-12-31';'''
cursor.execute(query)
result = cursor.fetchall()
cursor.close()
dbconn.close()

In [None]:
# Put the database in a data frame format.
temp = pd.DataFrame(result, columns = ['Date',
                                            'ProductID',
                                            'CategoryGroup',
                                            'VendorID',
                                            'Proof',
                                            'StoreID',
                                            'VolumeSold_Gallons',
                                            'Sale_Dollars',
                                            'StateBottleRetail'])

In [None]:
temp.head()

In [None]:
temp.shape[0]

In [None]:
temp['Date'].min()

In [None]:
temp['Date'].max()

In [None]:
fig = plt.figure(figsize = (12,6))
temp.groupby('CategoryGroup').size().sort_values(ascending=False).plot.bar()

plt.ylabel('Product Sold',fontsize=22, color = 'black')

# Store Density by County

In [None]:
# Connect SQLite.
dbconn = sqlite3.connect('../data/IowaLiquorSales.db')
cursor = dbconn.cursor()

# Pull Store database.
query = '''SELECT Sales.TransactionID, Sales.Date, Sales.StoreID, Sales.ProductID, Store.County, Store.lat, Store.lng, County.Year, County.Population, County.UNEMPLOYMENT_RATE
              FROM ((Sales 
              INNER JOIN Store ON Sales.StoreID=Store.StoreID)
              INNER JOIN County ON Store.County=County.County)
              WHERE Sales.Date BETWEEN '2019-01-01' AND '2019-12-31' 
              AND County.Year = '2019';'''
cursor.execute(query)
result1 = cursor.fetchall()
cursor.close()
dbconn.close()

In [None]:
# Put the database in a data frame format.
temp1 = pd.DataFrame(result1, columns = ['TransactionID',
                                            'Date',
                                            'StoreID',
                                            'ProductID',
                                            'County',
                                            'lat',
                                            'lng','Year','Population','Unemployment_Rate'])

In [None]:
temp1.head()

In [None]:
county = pd.read_csv('../data/county.csv')
county.rename(columns={'UNEMPLOYMENT RATE': 'UNEMPLOYMENT_RATE'}, inplace=True)
county_2019 = county.loc[county['Year']==2019]
county_2019.head()

In [None]:
StoreCount_temp = temp1.groupby(['County'])['StoreID'].nunique().sort_values(ascending=False).to_frame(name='Store_Count').reset_index()
StoreCount_temp.head()

In [None]:
StoreCount = StoreCount_temp.merge(county_2019, on = ['County'], how = 'left')
StoreCount['StoresPer10K'] = (StoreCount['Store_Count']/StoreCount['Population'])*10000
StoreCount.head()

In [None]:
import geopandas as gpd
from shapely.geometry import Point, Polygon

In [None]:
import matplotlib.colors as colors
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap


cmap = plt.get_cmap('magma_r')
new_cmap = truncate_colormap(cmap, 0, 0.6)

In [None]:
# County Boundaries Shape File
map_counties = gpd.read_file(
    '../data/County_Boundaries_of_Iowa/geo_export_b0962504-4b8e-4a81-9f03-586d790445ed.shp')

map_counties.rename(columns={'county_nam': 'County'}, inplace=True)
map_counties["County"] = map_counties["County"].str.upper()
map_counties = map_counties.sort_values(by=['County'])

map_counties['County'] = map_counties['County'].str.lower()

In [None]:
map_counties.head()

In [None]:
map_counties.loc[map_counties['County']=="obrien",'County']="o'brien"

In [None]:
# Digging into the discrepancies 
temp2 = list(map_counties['County'].unique())
temp1 = list(StoreCount['County'].unique())
s = set(temp2)
temp3 = [x for x in temp1 if x not in s]
temp3

In [None]:
map_counties2 = map_counties.merge(StoreCount, on = ['County'], how = 'left')
map_counties2.head()

In [None]:
fig,ax = plt.subplots(figsize = (15,15))

# County Boundaries
map_counties2.to_crs(epsg=4326).plot(ax = ax, alpha=0.2, color="gray",edgecolor='black')

ax.set_title('Stores in 2019')

map_counties.plot(color='white', edgecolor='red', ax=ax)
map_counties2.plot(column='Store_Count', cmap=new_cmap, ax=ax, legend=True)
ax.legend(prop={'size':1}, loc='best')

In [None]:
fig,ax = plt.subplots(figsize = (15,15))

# County Boundaries
map_counties2.to_crs(epsg=4326).plot(ax = ax, alpha=0.2, color="gray",edgecolor='black')

ax.set_title('Stores per 10K in 2019')

map_counties.plot(color='white', edgecolor='red', ax=ax)
map_counties2.plot(column='StoresPer10K', cmap=new_cmap, ax=ax, legend=True)
ax.legend(prop={'size':1}, loc='best')

# Store Information???

In [None]:
# Connect SQLite.
dbconn = sqlite3.connect('../data/IowaLiquorSales.db')
cursor = dbconn.cursor()

# Pull Store database.
query = '''SELECT Sales.TransactionID, Sales.Date, Sales.StoreID, Store.StoreType, Sales.ProductID, Store.County, Store.lat, Store.lng
              FROM (Sales 
              INNER JOIN Store ON Sales.StoreID=Store.StoreID)
              WHERE Sales.Date BETWEEN '2019-01-01' AND '2019-12-31';'''
cursor.execute(query)
result2 = cursor.fetchall()
cursor.close()
dbconn.close()

In [None]:
# Put the database in a data frame format.
temp2 = pd.DataFrame(result2, columns = ['TransactionID',
                                            'Date',
                                            'StoreID',
                                            'StoreType',
                                            'ProductID',
                                            'County',
                                            'lat',
                                            'lng'])

In [None]:
temp2.head()

In [None]:
temp2['StoreID'].nunique()