# Exploratory Data Analysis - AGM's Products

AGM sells 8 different types of ready-made meals. 

In [1]:
# Import necessary packages
import math
import numpy as np
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    '''This function will run a selected query in Postgres and 
    return the rows in a pandas dataframe'''
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # Below converts float columns that integer columns, for those that should be integer columns.
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)

connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

cursor = connection.cursor()

### Total Meals Purchased

In [3]:
rollback_before_flag = True
rollback_after_flag = True

query = """
select sum(line_items.quantity) as total_meals_purchased
from line_items
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,total_meals_purchased
0,8228284


### Total Meals Purchased, by Meal

In [4]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select products.description, sum(line_items.quantity) as total_meals_purchased
from line_items
     join products
          on line_items.product_id = products.product_id
group by products.description
order by products.description asc
"""

df3 = my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)
df3

Unnamed: 0,description,total_meals_purchased
0,Brocolli Stir Fry,913984
1,Chicken Salad,228561
2,Curry Chicken,1368884
3,Eggplant Lasagna,1599058
4,Pistachio Salmon,1828778
5,Spinach Orzo,456769
6,Teriyaki Chicken,1145013
7,Tilapia Piccata,687237


### Total Meals Purchased, by Meal and Store

In [5]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select stores.city, products.description, sum(line_items.quantity) as total_meals_purchased
from line_items
     join products
          on line_items.product_id = products.product_id
     join stores
          on line_items.store_id = stores.store_id       
group by stores.city, products.description
order by stores.city asc, products.description asc
"""

df_3 = my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)
df_3

Unnamed: 0,city,description,total_meals_purchased
0,Berkeley,Brocolli Stir Fry,232038
1,Berkeley,Chicken Salad,57719
2,Berkeley,Curry Chicken,346508
3,Berkeley,Eggplant Lasagna,405637
4,Berkeley,Pistachio Salmon,464274
5,Berkeley,Spinach Orzo,115469
6,Berkeley,Teriyaki Chicken,290858
7,Berkeley,Tilapia Piccata,174252
8,Dallas,Brocolli Stir Fry,179885
9,Dallas,Chicken Salad,44756


### Total Meals Purchased, by Month

In [33]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select extract(month from sales.sale_date) as month_number,to_char(sales.sale_date, 'Month') as month, sum(line_items.quantity) as total_meals_purchased
from line_items
join sales 
     on line_items.sale_id = sales.sale_id
group by month_number, month
order by month_number, month
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,month_number,month,total_meals_purchased
0,1,January,3252637
1,2,February,3152564
2,3,March,3660355
3,4,April,3435052
4,5,May,3330027
5,6,June,3380074
6,7,July,3326135
7,8,August,3591531
8,9,September,2785597
9,10,October,2876508


### Total Meals Purchased, by Month and Meal

In [6]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select extract(month from sales.sale_date) as month_number,
       to_char(sales.sale_date, 'Month') as month,products.description, sum(line_items.quantity) as total_meals_purchased
from line_items
join sales 
     on line_items.sale_id = sales.sale_id
join products
     on line_items.product_id = products.product_id
group by month_number, month, products.description
order by month_number, month, products.description asc
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,month_number,month,description,total_meals_purchased
0,1,January,Brocolli Stir Fry,360924
1,1,January,Chicken Salad,90123
2,1,January,Curry Chicken,541187
3,1,January,Eggplant Lasagna,631818
4,1,January,Pistachio Salmon,720970
...,...,...,...,...
91,12,December,Eggplant Lasagna,371340
92,12,December,Pistachio Salmon,425360
93,12,December,Spinach Orzo,105496
94,12,December,Teriyaki Chicken,266347


### Total Meals Purchased, by Day of Week and Meal

In [7]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select extract(dow from sales.sale_date) as dow,
       to_char(sales.sale_date, 'Day') as day_of_week, products.description, sum(line_items.quantity) as total_meals_purchased
from line_items
join sales 
     on line_items.sale_id = sales.sale_id
join products
     on line_items.product_id = products.product_id
group by dow, day_of_week, products.description
order by dow, day_of_week, products.description asc
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,dow,day_of_week,description,total_meals_purchased
0,0,Sunday,Brocolli Stir Fry,767460
1,0,Sunday,Chicken Salad,192077
2,0,Sunday,Curry Chicken,1149614
3,0,Sunday,Eggplant Lasagna,1343248
4,0,Sunday,Pistachio Salmon,1537853
5,0,Sunday,Spinach Orzo,383486
6,0,Sunday,Teriyaki Chicken,961561
7,0,Sunday,Tilapia Piccata,577641
8,1,Monday,Brocolli Stir Fry,545188
9,1,Monday,Chicken Salad,136507


### Average Meals Purchased Per Sale

In [8]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select sum(line_items.quantity)/count(line_items.sale_id) as avg_meals_overall
from line_items
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,avg_meals_overall
0,1.518701


### Average Meals Per Sale, by Store

In [9]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select stores.city, sum(line_items.quantity)/count(sales.sale_id) as avg_meals_overall
from line_items
     join stores
          on line_items.store_id = stores.store_id
     join sales
          on line_items.sale_id = sales.sale_id
group by stores.city
order by stores.city asc

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,city,avg_meals_overall
0,Berkeley,1.517387
1,Dallas,1.52047
2,Miami,1.519812
3,Nashville,1.515977
4,Seattle,1.519675
