In [1]:
from sqlalchemy.engine import create_engine

In [2]:
engine = create_engine('bigquery://', credentials_path= 'key/iowa-liquor-sales-365322-12ff7a5498e6.json')

FileNotFoundError: [Errno 2] No such file or directory: 'key/iowa-liquor-sales-365322-12ff7a5498e6.json'

In [None]:
import pandas as pd
import matplotlib.ticker as tick
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
import plotly.express as px

In [None]:
sql_statement = """
   SELECT 
  #EXTRACT(YEAR FROM date) AS year,
  SUM(volume_sold_liters) AS liters_sold,
  IF (county ='POTTAWATTA','POTTAWATTAMIE', county) AS county
  FROM `bigquery-public-data.iowa_liquor_sales.sales`
  WHERE EXTRACT(YEAR FROM date) IN (2018, 2019, 2020,2021) AND county IN ("POLK","LINN","SCOTT","JOHNSON","BLACK HAWK","WOODBURY","DUBUQUE","STORY","DALLAS","POTTAWATTA")
GROUP BY county 
ORDER BY liters_sold DESC


  """.strip()

In [None]:
df = pd.read_sql_query(sql_statement, engine)

In [None]:
sns.set(font_scale=1.4)

def reformat_large_tick_values(tick_val,pos):
    """
    Turns large tick values (in the billions, millions and thousands) such as 4500 into 4.5K and also appropriately turns 4000 into 4K (no zero after the decimal).
    """
    if tick_val >= 1000000000:
        val = round(tick_val/1000000000, 1)
        new_tick_format = '{:}B'.format(val)
    elif tick_val >= 1000000:
        val = round(tick_val/1000000, 1)
        new_tick_format = '{:}M'.format(val)
    elif tick_val >= 1000:
        val = round(tick_val/1000, 1)
        new_tick_format = '{:}K'.format(val)
    elif tick_val < 1000:
        new_tick_format = round(tick_val, 1)
    else:
        new_tick_format = tick_val

    # make new_tick_format into a string value
    new_tick_format = str(new_tick_format)
    
    # code below will keep 4.5M as is but change values such as 4.0M to 4M since that zero after the decimal isn't needed
    index_of_decimal = new_tick_format.find(".")
    
    if index_of_decimal != -1:
        value_after_decimal = new_tick_format[index_of_decimal+1]
        if value_after_decimal == "0":
            # remove the 0 after the decimal point since it's not needed
            new_tick_format = new_tick_format[0:index_of_decimal] + new_tick_format[index_of_decimal+2:]
            
    return new_tick_format

In [None]:
df

In [None]:
#remove plot grey default 
sns.set(rc={'axes.facecolor':'white'})
#add box and ticks
#sns.set_style("ticks")
sns.set_style("whitegrid")


In [None]:
sns.set_palette("Set2")
sns.barplot( x = 'liters_sold', y = 'county', data=df)
ax = plt.gca()
# Set label for x-axis
ax.set_xlabel( "Liters Sold" , size = 20 )
# Set label for y-axis
ax.set_ylabel(None, size = 20 )
  
# Set title for plot
ax.set_title( "Liquor Liters Sold 2018-2021"  , size = 24 )
#rename ax example: ax2
ax.xaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 45) 
 
plt.show()

In [None]:
sns.set_palette("Set2")
sns.barplot( x = 'county', y = 'liters_sold', data=df)
ax = plt.gca()
# Set label for x-axis
ax.set_xlabel( "                           Iowa Counties" , size = 20 )
# Set label for y-axis
ax.set_ylabel("Liters Sold" , size = 20 )
  
# Set title for plot
ax.set_title( "Liquor Liters Sold 2018-2021"  , size = 24 )
#rename ax example: ax2
ax.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
plt.xticks(rotation= 90) 
 
plt.show()

In [None]:
df2 = pd.read_csv('data/census_data.csv')

In [None]:
df3 = pd.merge(df,df2, on ='county')
df3

In [None]:
population_over_18 = df3['population']*(1-df3['percent_minors'])

df3['population_over_18'] = population_over_18
df3['population_over_18'] = df3['population_over_18'].astype(int)


liters_sold_per_adult_capita = df3['liters_sold']/df3['population_over_18']
df3['liters_sold_per_adult_capita'] = liters_sold_per_adult_capita
df3

In [None]:
pd.options.display.float_format = '{:,}'.format

df31 = df3[['county','liters_sold', 'population_over_18', 'liters_sold_per_adult_capita']]
df31.round(2)
#df31.style.format(thousands=',')
df31.sort_values(by= ['population_over_18'], ascending = False)

In [None]:
df31.dtypes

In [None]:
sns.set_palette("Set2")
sns.barplot( x = 'liters_sold_per_adult_capita', y = 'county', data=df3)
ax2 = plt.gca()
# Set label for x-axis
ax2.set_xlabel( "Liters Sold per capita (18+) (2018-2021)" , size = 20 )
  
# Set label for y-axis
ax2.set_ylabel( "Counties" , size = 20 )
    
# Set title for plot
ax2.set_title( "Liquor Sales Per Capita (2018-2021)"  , size = 24 )
#rename ax example: ax2
#ax2.xaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 45) 
 
plt.show()

In [None]:
sns.set_palette("Set2")
sns.scatterplot( x ='income', y = 'liters_sold_per_adult_capita', data=df3, hue = 'county', s=200)
ax3 = plt.gca()
# Set label for x-axis
ax3.set_ylabel( "Liters Sold per capita (18+)" , size = 20 )
  
# Set label for y-axis
ax3.set_xlabel( "Median Household Income" , size = 20 )
    
# Set title for plot
ax3.set_title( "2021 Iowa State Liquor Sales"  , size = 24 )
#rename ax example: ax2
ax3.xaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 45) 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)

"""
#attempt to label points with county
cap = list(df3.county)
for i, 'liters_sold' in enumerate(cap):
    plt.text(i, 'liters_sold', s= 'county')
"""

plt.show()

In [None]:
sns.set_palette("Set2")
sns.scatterplot( x ='education', y = 'liters_sold_per_adult_capita', data=df3,  hue = 'county', s=200)
ax4 = plt.gca()
# Set label for x-axis
ax4.set_ylabel( "Liters Sold per capita (18+)" , size = 20 )
  
# Set label for x-axis
ax4.set_xlabel( "Bachelor's Degree or higher level of education" , size = 20 )
    
# Set title for plot
ax4.set_title( "2021 Iowa State Liquor Sales"  , size = 24 )
#rename ax example: ax2
ax4.xaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 45) 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)
#plt.text(s= 'county')



plt.show()

In [None]:
fig, axes = plt.subplots(1,2)
sns.scatterplot( x ='education', y = 'liters_sold_per_adult_capita', data=df3, ax=axes[0], legend = False, hue = 'county', s=200)
ax4 = plt.gca()
ax4.set_ylabel( "Liters Sold per capita (18+)" , size = 20 )
ax4.set_xlabel( "Bachelor's Degree or higher level of education" , size = 20 )
ax4.set_title( "2021 Iowa State Liquor Sales", size = 24 )
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)
#plt.text(s= 'county')

sns.set_palette("Set2")
sns.scatterplot( x ='income', y = 'liters_sold_per_adult_capita', data=df3, ax=axes[1], hue = 'county', s=200)
ax3 = plt.gca()
ax3.set_ylabel( "Liters Sold per capita (18+)" , size = 20 )
ax3.set_xlabel( "Median Household Income" , size = 20 )

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)

In [None]:
cat_sql_statement = """
with source_table AS(
  SELECT
  volume_sold_liters,
  county,
 IF (county ='POTTAWATTA','POTTAWATTAMIE', county),
item_description,
category_name
  FROM `bigquery-public-data.iowa_liquor_sales.sales`
  WHERE EXTRACT(YEAR FROM date) = 2021 AND county IN ("POLK","LINN","SCOTT","JOHNSON","BLACK HAWK","WOODBURY","DUBUQUE","STORY","DALLAS","POTTAWATTA")
),

liquor_categories AS 
  (SELECT
  volume_sold_liters,
  county,
  CASE 
WHEN category_name LIKE '%AMAR%' OR category_name LIKE '%CREME%'OR category_name LIKE '%LIQUEUR%' OR category_name LIKE '%CORDIAL%' OR category_name LIKE '%ANIS%' OR category_name LIKE '%TRIPLE SEC%' OR item_description LIKE '%JAGERM%' OR item_description LIKE '%LIQUEUR%' OR item_description LIKE '%SAINTS N SINNERS APPLE PIE%' THEN 'LIQUEUR' 
  WHEN category_name LIKE '%WHIS%' OR category_name LIKE '%BOUR%'OR category_name LIKE '%RYE%' OR item_description LIKE '%RYE%' OR item_description LIKE '%WHIS%'OR item_description LIKE '%SCOTCH%' OR category_name LIKE '%SCOTCH%' OR item_description LIKE '%BEAM%' OR item_description LIKE '%BOUR%' OR item_description LIKE '%CROWN ROYAL%' OR item_description LIKE '%JACK DAN%'OR item_description LIKE '%EVAN WILL%'OR item_description LIKE '%MAKER%MARK%' OR item_description LIKE '%SIR WINSTON%' OR item_description LIKE '%ELIJAH%' OR item_description LIKE '%JOHNNIE WALKER%'THEN 'WHISKEY'
  WHEN category_name LIKE '%VOD%' OR item_description LIKE '%VOD%'OR item_description LIKE '%SMIRN%'THEN 'VODKA'
  WHEN category_name LIKE '%BRANDI%' OR category_name LIKE '%BRANDY%' OR category_name LIKE '%SCHNAPPS%' OR item_description LIKE '%COGNAC%' OR item_description LIKE '%HENNESSY%'THEN 'BRANDY'
  WHEN category_name LIKE '%RUM%' OR item_description LIKE '%RUM%' OR item_description LIKE '%CAPTAIN MOR%'THEN 'RUM'
  WHEN category_name LIKE '%TEQ%' OR category_name LIKE '%MEZC%' OR item_description LIKE '%JUAREZ%' OR item_description LIKE '%TEQU%' OR item_description LIKE '%REPOSADO%' OR item_description LIKE '%TORTILLA GOLD%' OR item_description LIKE '%MONTEZUMA%' THEN 'TEQUILA AND MEZCAL'
  WHEN category_name LIKE '%GIN%' OR item_description LIKE '%GIN%' THEN 'GIN'

  WHEN category_name LIKE '%COCKT%' OR item_description LIKE '%TARANTULA%' THEN 'COCKTAILS_RTD'
  ELSE 'OTHER' END AS LIQUOR_TYPE
  FROM source_table)

SELECT  
  LIQUOR_TYPE,
 county,
sum(volume_sold_liters) AS liters_sold_per_cat
FROM liquor_categories
WHERE county = "POLK"
GROUP BY LIQUOR_TYPE, county
""" .strip()

In [None]:
df4 = pd.read_sql_query(cat_sql_statement, engine)
df4

In [None]:
df4['LIQUOR_TYPE']

In [None]:
sns.set_palette("Set2")
sns.barplot( x = 'liters_sold_per_cat', y = 'LIQUOR_TYPE', data=df4)
ax4  = plt.gca()
  
# Set label for x-axis
ax4.set_xlabel("Liters Sold" , size = 20 )

# Set label for y-axis

#ax4.set_ylabel("Liquor Type" , size = 20 )
ax4.set(ylabel=None)
    
# Set title for plot
ax4.set_title( "2021 Polk County Sales", size = 24 )
#rename ax example: ax2
ax4.xaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 90) 
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)
#plt.text(s= 'county')
plt.show()

In [None]:
cat_year_sql_statement = """
with source_table AS(
  SELECT
  volume_sold_liters,
  CAST(EXTRACT(YEAR FROM date) AS string) as year,
item_description,
category_name
  FROM `bigquery-public-data.iowa_liquor_sales.sales`
  WHERE EXTRACT(YEAR FROM date) IN (2018,2019,2020,2021) AND county = "POLK"),

liquor_categories AS 
  (SELECT
  volume_sold_liters,
year,
  CASE 
    WHEN category_name LIKE '%AMAR%' OR category_name LIKE '%CREME%'OR category_name LIKE '%LIQUEUR%' OR category_name LIKE '%CORDIAL%' OR category_name LIKE '%ANIS%' OR category_name LIKE '%TRIPLE SEC%' OR item_description LIKE '%JAGERM%' OR item_description LIKE '%LIQUEUR%' OR item_description LIKE '%SAINTS N SINNERS APPLE PIE%' THEN 'LIQUEUR' 
  WHEN category_name LIKE '%WHIS%' OR category_name LIKE '%BOUR%'OR category_name LIKE '%RYE%' OR item_description LIKE '%RYE%' OR item_description LIKE '%WHIS%'OR item_description LIKE '%SCOTCH%' OR category_name LIKE '%SCOTCH%' OR item_description LIKE '%BEAM%' OR item_description LIKE '%BOUR%' OR item_description LIKE '%CROWN ROYAL%' OR item_description LIKE '%JACK DAN%'OR item_description LIKE '%EVAN WILL%'OR item_description LIKE '%MAKER%MARK%' OR item_description LIKE '%SIR WINSTON%' OR item_description LIKE '%ELIJAH%' OR item_description LIKE '%JOHNNIE WALKER%'THEN 'WHISKEY'
  WHEN category_name LIKE '%VOD%' OR item_description LIKE '%VOD%'OR item_description LIKE '%SMIRN%'THEN 'VODKA'
  WHEN category_name LIKE '%BRANDI%' OR category_name LIKE '%BRANDY%' OR category_name LIKE '%SCHNAPPS%' OR item_description LIKE '%COGNAC%' OR item_description LIKE '%HENNESSY%'THEN 'BRANDY'
  WHEN category_name LIKE '%RUM%' OR item_description LIKE '%RUM%' OR item_description LIKE '%CAPTAIN MOR%'THEN 'RUM'
  WHEN category_name LIKE '%TEQ%' OR category_name LIKE '%MEZC%' OR item_description LIKE '%JUAREZ%' OR item_description LIKE '%TEQU%' OR item_description LIKE '%REPOSADO%' OR item_description LIKE '%TORTILLA GOLD%' OR item_description LIKE '%MONTEZUMA%' THEN 'TEQUILA AND MEZCAL'
  WHEN category_name LIKE '%GIN%' OR item_description LIKE '%GIN%' THEN 'GIN'

  WHEN category_name LIKE '%COCKT%' OR item_description LIKE '%TARANTULA%' THEN 'COCKTAILS_RTD'
  ELSE 'OTHER' END AS LIQUOR_TYPE
  FROM source_table)

SELECT  
  LIQUOR_TYPE,
year,
sum(volume_sold_liters) AS liters_sold_per_cat
FROM liquor_categories

GROUP BY LIQUOR_TYPE, year
ORDER BY year 

""" .strip()

In [None]:
df5 = pd.read_sql_query(cat_year_sql_statement, engine)

In [None]:
sns.set_palette("Set2")
sns.scatterplot( x = 'year', y = 'liters_sold_per_cat', data=df5, hue = 'LIQUOR_TYPE', s=200, hue_order = ["VODKA", "WHISKEY", "RUM", "TEQUILA AND MEZCAL", "GIN",  "COCKTAILS_RTD",  "LIQUEUR", "BRANDY", "OTHER" ])
ax5  = plt.gca()
  
# Set label for x-axis
#ax5.set_xlabel("year" , size = 20 )
ax5.set(xlabel=None)
# Set label for y-axis

ax5.set_ylabel("Liters Sold" , size = 20 )
#ax5.set(ylabel=None)
    
# Set title for plot
ax5.set_title( "Polk County Sales", size = 24 )
#rename ax example: ax2
ax5.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 90) 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)
#plt.text(s= 'county')

#set the ticks first
#ax5.set_xticks(range([6])
  
# set the labels
#ax5.set_xticklabels(['2017','2018', '2019', '2020', '2021', '2022'])

In [None]:
#calculate percent change column from previous year using window function OR Python, maybe numpy?


In [None]:
sns.set_palette("husl")

#df5.plot.bar( stacked = True, x = 'year',  color = sns.color_palette('Set2'))
ax5  = plt.gca()
  
# Set label for x-axis
#ax5.set_xlabel("year" , size = 20 )
ax5.set(xlabel=None)
# Set label for y-axis

ax5.set_ylabel("Liters Sold" , size = 20 )
#ax5.set(ylabel=None)
    
# Set title for plot
ax5.set_title( "Polk County Sales", size = 24 )
#rename ax example: ax2
ax5.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 90) 
plt.legend(bbox_to_anchor=(1.05, 1),title='Liquor Categories', loc='upper left',  borderaxespad=0, markerscale=2)
#plt.legend(title='Smoker', loc='upper left', labels=['Hell Yeh', 'Nah Bruh'])
#plt.text(s= 'county')

#set the ticks first
#ax5.set_xticks(range([6])
  
# set the labels
#ax5.set_xticklabels(['2017','2018', '2019', '2020', '2021', '2022'])

In [None]:
sns.set_palette("husl")
sns.lineplot(data = df5, x = 'year', y = 'liters_sold_per_cat', hue= 'LIQUOR_TYPE', hue_order = ["VODKA", "WHISKEY", "TEQUILA AND MEZCAL","GIN", "RUM", "COCKTAILS_RTD",  "LIQUEUR", "BRANDY", "OTHER" ])
#df5.plot.bar( stacked = True, x = 'year',  color = sns.color_palette('Set2'))
ax5  = plt.gca()
  
# Set label for x- 
#ax5.set_xlabel("year" , size = 20 )
ax5.set(xlabel=None)
# Set label for y-axis

ax5.set_ylabel("Liters Sold" , size = 20 )
#ax5.set(ylabel=None)
    
# Set title for plot
ax5.set_title( "Polk County Sales", size = 24 )
#rename ax example: ax2
ax5.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 90) 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)
#plt.text(s= 'county')

#set the ticks first
#ax5.set_xticks(range([6])
  
# set the labels
#ax5.set_xticklabels(['2017','2018', '2019', '2020', '2021', '2022'])

In [None]:
all_county_cat_year_sql_statement = """
with source_table AS(
  SELECT
  volume_sold_liters,
 CAST(EXTRACT(YEAR FROM date) AS string) as year,
item_description,
category_name
  FROM `bigquery-public-data.iowa_liquor_sales.sales`
  WHERE EXTRACT(YEAR FROM date) IN (2018,2019,2020,2021)),

liquor_categories AS 
  (SELECT
  volume_sold_liters,
year,
  CASE 
    WHEN category_name LIKE '%AMAR%' OR category_name LIKE '%CREME%'OR category_name LIKE '%LIQUEUR%' OR category_name LIKE '%CORDIAL%' OR category_name LIKE '%ANIS%' OR category_name LIKE '%TRIPLE SEC%' OR item_description LIKE '%JAGERM%' OR item_description LIKE '%LIQUEUR%' OR item_description LIKE '%SAINTS N SINNERS APPLE PIE%' THEN 'LIQUEUR' 
  WHEN category_name LIKE '%WHIS%' OR category_name LIKE '%BOUR%'OR category_name LIKE '%RYE%' OR item_description LIKE '%RYE%' OR item_description LIKE '%WHIS%'OR item_description LIKE '%SCOTCH%' OR category_name LIKE '%SCOTCH%' OR item_description LIKE '%BEAM%' OR item_description LIKE '%BOUR%' OR item_description LIKE '%CROWN ROYAL%' OR item_description LIKE '%JACK DAN%'OR item_description LIKE '%EVAN WILL%'OR item_description LIKE '%MAKER%MARK%' OR item_description LIKE '%SIR WINSTON%' OR item_description LIKE '%ELIJAH%' OR item_description LIKE '%JOHNNIE WALKER%'THEN 'WHISKEY'
  WHEN category_name LIKE '%VOD%' OR item_description LIKE '%VOD%'OR item_description LIKE '%SMIRN%'THEN 'VODKA'
  WHEN category_name LIKE '%BRANDI%' OR category_name LIKE '%BRANDY%' OR category_name LIKE '%SCHNAPPS%' OR item_description LIKE '%COGNAC%' OR item_description LIKE '%HENNESSY%'THEN 'BRANDY'
  WHEN category_name LIKE '%RUM%' OR item_description LIKE '%RUM%' OR item_description LIKE '%CAPTAIN MOR%'THEN 'RUM'
  WHEN category_name LIKE '%TEQ%' OR category_name LIKE '%MEZC%' OR item_description LIKE '%JUAREZ%' OR item_description LIKE '%TEQU%' OR item_description LIKE '%REPOSADO%' OR item_description LIKE '%TORTILLA GOLD%' OR item_description LIKE '%MONTEZUMA%' THEN 'TEQUILA AND MEZCAL'
  WHEN category_name LIKE '%GIN%' OR item_description LIKE '%GIN%' THEN 'GIN'

  WHEN category_name LIKE '%COCKT%' OR item_description LIKE '%TARANTULA%' THEN 'COCKTAILS_RTD'
  ELSE 'OTHER' END AS LIQUOR_TYPE
  FROM source_table)

SELECT  
  LIQUOR_TYPE,
year,
sum(volume_sold_liters) AS liters_sold_per_cat
FROM liquor_categories

GROUP BY LIQUOR_TYPE, year
ORDER BY year 

""" .strip()

In [None]:
df6 = pd.read_sql_query(all_county_cat_year_sql_statement, engine)

In [None]:
df6.dtypes

In [None]:
sns.set_palette("husl")
sns.lineplot(data = df6, x = 'year', y = 'liters_sold_per_cat', hue= 'LIQUOR_TYPE', hue_order = ["VODKA", "WHISKEY", "TEQUILA AND MEZCAL","GIN", "RUM", "COCKTAILS_RTD",  "LIQUEUR", "BRANDY", "OTHER" ])

#df5.plot.bar( stacked = True, x = 'year',  color = sns.color_palette('Set2'))
ax5  = plt.gca()
  
# Set label for x- 
#ax5.set_xlabel("year" , size = 20 )
ax5.set(xlabel=None)
# Set label for y-axis

ax5.set_ylabel("Liters Sold" , size = 20 )
#ax5.set(ylabel=None)


# Set title for plot
ax5.set_title( "Liters sold by category 2018-2021", size = 24 )
#rename ax example: ax2
ax5.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 90) 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)
#plt.text(s= 'county')
#ax5.add_legend(label_order = ["VODKA", "WHISKEY", "TEQUILA AND MEZCAL","GIN", "RUM", "COCKTAILS_RTD",  "LIQUEUR", "BRANDY", "OTHER" ])

#set the ticks first
#ax5.set_xticks(range([6])
  
# set the labels
#ax5.set_xticks(range(len(df6))) # <--- set the ticks first

#ax5.set_xticklabels(['2017','2018', '2019', '2020', '2021', '2022'])

In [None]:
cat_month_year_sql_statement = """ 
with source_table AS(
  SELECT
  EXTRACT(YEAR FROM date) AS year,
   EXTRACT(MONTH FROM date) AS month,
  category_name, item_description,
  volume_sold_liters,
  date
  #,
 #IF (county ='POTTAWATTA','POTTAWATTAMIE', county)
  FROM `bigquery-public-data.iowa_liquor_sales.sales`
  #WHERE county IN ("POLK","LINN","SCOTT","JOHNSON","BLACK HAWK","WOODBURY","DUBUQUE","STORY","DALLAS","POTTAWATTA")
),

liquor_categories AS 
  (SELECT
  volume_sold_liters,
year, month,
  CASE
    WHEN category_name LIKE '%AMAR%' OR category_name LIKE '%CREME%'OR category_name LIKE '%LIQUEUR%' OR category_name LIKE '%CORDIAL%' OR category_name LIKE '%ANIS%' OR category_name LIKE '%TRIPLE SEC%' OR item_description LIKE '%JAGERM%' OR item_description LIKE '%LIQUEUR%' OR item_description LIKE '%SAINTS N SINNERS APPLE PIE%' THEN 'LIQUEUR' 
  WHEN category_name LIKE '%WHIS%' OR category_name LIKE '%BOUR%'OR category_name LIKE '%RYE%' OR item_description LIKE '%RYE%' OR item_description LIKE '%WHIS%'OR item_description LIKE '%SCOTCH%' OR category_name LIKE '%SCOTCH%' OR item_description LIKE '%BEAM%' OR item_description LIKE '%BOUR%' OR item_description LIKE '%CROWN ROYAL%' OR item_description LIKE '%JACK DAN%'OR item_description LIKE '%EVAN WILL%'OR item_description LIKE '%MAKER%MARK%' OR item_description LIKE '%SIR WINSTON%' OR item_description LIKE '%ELIJAH%' OR item_description LIKE '%JOHNNIE WALKER%'THEN 'WHISKEY'
  WHEN category_name LIKE '%VOD%' OR item_description LIKE '%VOD%'OR item_description LIKE '%SMIRN%'THEN 'VODKA'
  WHEN category_name LIKE '%BRANDI%' OR category_name LIKE '%BRANDY%' OR category_name LIKE '%SCHNAPPS%' OR item_description LIKE '%COGNAC%' OR item_description LIKE '%HENNESSY%'THEN 'BRANDY'
  WHEN category_name LIKE '%RUM%' OR item_description LIKE '%RUM%' OR item_description LIKE '%CAPTAIN MOR%'THEN 'RUM'
  WHEN category_name LIKE '%TEQ%' OR category_name LIKE '%MEZC%' OR item_description LIKE '%JUAREZ%' OR item_description LIKE '%TEQU%' OR item_description LIKE '%REPOSADO%' OR item_description LIKE '%TORTILLA GOLD%' OR item_description LIKE '%MONTEZUMA%' THEN 'TEQUILA AND MEZCAL'
  WHEN category_name LIKE '%GIN%' OR item_description LIKE '%GIN%' THEN 'GIN'

  WHEN category_name LIKE '%COCKT%' OR item_description LIKE '%TARANTULA%' THEN 'COCKTAILS_RTD'
  ELSE 'OTHER' END AS LIQUOR_TYPE
  FROM source_table)

SELECT ROUND(SUM(volume_sold_liters)) AS liters_sold, year, month, LIQUOR_TYPE
FROM liquor_categories
WHERE year IN (2018,2019,2020,2021)
GROUP BY year, month, LIQUOR_TYPE
ORDER BY month, year

   """ .strip()

In [None]:
df61 = pd.read_sql_query(cat_month_year_sql_statement, engine)
df61['Date'] = pd.to_datetime(df61.assign(Day=1).loc[:, ['year','month','Day']])

df61

In [None]:
sns.set_palette("husl")
sns.lineplot(data = df61, x = 'Date', y = 'liters_sold', hue= 'LIQUOR_TYPE', hue_order = ["VODKA", "WHISKEY", "TEQUILA AND MEZCAL","GIN", "RUM", "COCKTAILS_RTD",  "LIQUEUR", "BRANDY", "OTHER" ])

#df5.plot.bar( stacked = True, x = 'year',  color = sns.color_palette('Set2'))
ax5  = plt.gca()
  
# Set label for x- 
#ax5.set_xlabel("year" , size = 20 )
ax5.set(xlabel=None)
# Set label for y-axis

ax5.set_ylabel("Liters Sold" , size = 20 )
#ax5.set(ylabel=None)


# Set title for plot
ax5.set_title( "Liters sold by category 2018-2021", size = 24 )
#rename ax example: ax2
ax5.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 90) 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)
plt.xticks(rotation= 90) 

In [None]:
liter_sold_per_cat_lag_year_sql_statement = """
WITH source_table AS(
  SELECT
    volume_sold_liters,
    CAST(EXTRACT(YEAR FROM date) AS string) AS year,
    item_description,
    category_name
  FROM
    `bigquery-public-data.iowa_liquor_sales.sales`
  WHERE
    EXTRACT(YEAR
    FROM
      date) IN (2017,2018,2019,2020,2021)),
  
  liquor_categories AS (
  SELECT
    volume_sold_liters,
    year,
    CASE
     WHEN category_name LIKE '%AMAR%' OR category_name LIKE '%CREME%'OR category_name LIKE '%LIQUEUR%' OR category_name LIKE '%CORDIAL%' OR category_name LIKE '%ANIS%' OR category_name LIKE '%TRIPLE SEC%' OR item_description LIKE '%JAGERM%' OR item_description LIKE '%LIQUEUR%' OR item_description LIKE '%SAINTS N SINNERS APPLE PIE%' THEN 'LIQUEUR'
      WHEN category_name LIKE '%WHIS%' OR category_name LIKE '%BOUR%'OR category_name LIKE '%RYE%' OR item_description LIKE '%RYE%' OR item_description LIKE '%WHIS%'OR item_description LIKE '%SCOTCH%' OR category_name LIKE '%SCOTCH%' OR item_description LIKE '%BEAM%' OR item_description LIKE '%BOUR%' OR item_description LIKE '%CROWN ROYAL%' OR item_description LIKE '%JACK DAN%'OR item_description LIKE '%EVAN WILL%'OR item_description LIKE '%MAKER%MARK%' OR item_description LIKE '%SIR WINSTON%' OR item_description LIKE '%ELIJAH%' OR item_description LIKE '%JOHNNIE WALKER%'THEN 'WHISKEY'
      WHEN category_name LIKE '%VOD%'
    OR item_description LIKE '%VOD%'OR item_description LIKE '%SMIRN%'THEN 'VODKA'
      WHEN category_name LIKE '%BRANDI%' OR category_name LIKE '%BRANDY%' OR category_name LIKE '%SCHNAPPS%' OR item_description LIKE '%COGNAC%' OR item_description LIKE '%HENNESSY%'THEN 'BRANDY'
      WHEN category_name LIKE '%RUM%'
    OR item_description LIKE '%RUM%'
    OR item_description LIKE '%CAPTAIN MOR%'THEN 'RUM'
      WHEN category_name LIKE '%TEQ%' OR category_name LIKE '%MEZC%' OR item_description LIKE '%JUAREZ%' OR item_description LIKE '%TEQU%' OR item_description LIKE '%REPOSADO%' OR item_description LIKE '%TORTILLA GOLD%' OR item_description LIKE '%MONTEZUMA%' THEN 'TEQUILA AND MEZCAL'
      WHEN category_name LIKE '%GIN%'
    OR item_description LIKE '%GIN%' THEN 'GIN'
     
      WHEN category_name LIKE '%COCKT%'
    OR item_description LIKE '%TARANTULA%' THEN 'COCKTAILS_RTD'
    ELSE
    'OTHER' END AS LIQUOR_TYPE
  FROM
    source_table),
  
  liters_cat_sum AS (
  SELECT
    LIQUOR_TYPE,
    year,
    SUM(volume_sold_liters) AS liters_sold_per_cat
  FROM
    liquor_categories
  GROUP BY
    LIQUOR_TYPE,
    year),

  liters_cat_year_sum AS (
  SELECT
    LIQUOR_TYPE,
    year,
    liters_sold_per_cat,
    LAG(liters_sold_per_cat) OVER(PARTITION BY LIQUOR_TYPE ORDER BY year) AS prev_liter_sold_per_cat
  FROM
    liters_cat_sum),

percent_change_per_year AS (
SELECT  
  LIQUOR_TYPE,
  year,
  ROUND(((liters_sold_per_cat - prev_liter_sold_per_cat)/liters_sold_per_cat)*100) AS percent_change_year

FROM liters_cat_year_sum)


SELECT
  *
FROM
percent_change_per_year
  
ORDER BY
  year,
  LIQUOR_TYPE
  
""" .strip()

In [None]:
df7 = pd.read_sql_query(liter_sold_per_cat_lag_year_sql_statement, engine)
df7

In [None]:
sns.set_palette("husl")
sns.lineplot(data = df7, x = 'year', y = 'percent_change_year', hue= 'LIQUOR_TYPE', hue_order = ["VODKA", "WHISKEY", "TEQUILA AND MEZCAL","GIN", "RUM", "COCKTAILS_RTD",  "LIQUEUR", "BRANDY", "OTHER" ])

#df5.plot.bar( stacked = True, x = 'year',  color = sns.color_palette('Set2'))
ax5  = plt.gca()
  
# Set label for x- 
#ax5.set_xlabel("year" , size = 20 )
ax5.set(xlabel=None)
# Set label for y-axis

ax5.set_ylabel("Percent Change in Liters Sold" , size = 20 )



# Set title for plot
ax5.set_title( "Percent Change in Liters Sold by Category", size = 24 )
#rename ax example: ax2
ax5.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))
#plt.xticks(rotation= 90) 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0, markerscale=2)