# Exploratory Data Analysis - AGM's Sales

In [1]:
# Import necessary packages
import math
import numpy as np
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    '''This function will run a selected query in Postgres and 
    return the rows in a pandas dataframe'''
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # Below converts float columns that integer columns, for those that should be integer columns.
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)

connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

cursor = connection.cursor()

### Total Sales for AGM

In [5]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select sum(total_amount) as total_sales 
from sales
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,total_sales
0,98739408


### Total Sales, by Month

In [6]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select extract(month from sales.sale_date) as month_number, to_char(sale_date, 'Month') as month_name, sum(total_amount) as total_sales
from sales
group by month_number, month_name
order by month_number, month_name

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,month_number,month_name,total_sales
0,1,January,7803828
1,2,February,7574280
2,3,March,8779620
3,4,April,8251284
4,5,May,7977840
5,6,June,8124108
6,7,July,7993044
7,8,August,9029808
8,9,September,7578960
9,10,October,8895108


### Total Sales, by Store and Month

In [8]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select stores.city, extract(month from sales.sale_date) as month_number, to_char(sale_date, 'Month') as month_name, sum(sales.total_amount) as total_sales
from stores
    join sales
        on stores.store_id = sales.store_id
group by stores.city, month_number, month_name
order by stores.city asc, month_number, month_name

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,city,month_number,month_name,total_sales
0,Berkeley,1,January,1988904
1,Berkeley,2,February,1930272
2,Berkeley,3,March,2224500
3,Berkeley,4,April,2092056
4,Berkeley,5,May,2019264
5,Berkeley,6,June,2065140
6,Berkeley,7,July,2034708
7,Berkeley,8,August,2286732
8,Berkeley,9,September,1922256
9,Berkeley,10,October,2248008


### Total Sales, by Day of Week

In [9]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select extract(dow from sale_date) as dow, to_char(sale_date, 'Day') as day_of_week, sum(total_amount) as total_sales
from sales
group by dow, day_of_week
order by dow, day_of_week

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,dow,day_of_week,total_sales
0,0,Sunday,18589068
1,1,Monday,13167720
2,2,Tuesday,6895332
3,3,Wednesday,13952556
4,4,Thursday,13834644
5,5,Friday,12878628
6,6,Saturday,19421460


### Total Sales, by Store and Day of Week

In [10]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select stores.city, extract(dow from sales.sale_date) as dow, to_char(sales.sale_date, 'Day') as day_of_week, sum(sales.total_amount) as total_sales
from stores
    join sales
        on stores.store_id = sales.store_id
group by stores.city, dow, day_of_week
order by stores.city asc, dow, day_of_week

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,city,dow,day_of_week,total_sales
0,Berkeley,0,Sunday,4694640
1,Berkeley,1,Monday,3340116
2,Berkeley,2,Tuesday,1752036
3,Berkeley,3,Wednesday,3546144
4,Berkeley,4,Thursday,3507660
5,Berkeley,5,Friday,3273240
6,Berkeley,6,Saturday,4927224
7,Dallas,0,Sunday,3650748
8,Dallas,1,Monday,2602980
9,Dallas,2,Tuesday,1352760


### Total Number of Sales 

In [11]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select count(sale_id) as number_of_sales
from sales

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,number_of_sales
0,1537617


### Total Number of Sales, by Store

In [12]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select stores.city, count(sales.sale_id) as number_of_sales
from stores
    join sales
        on stores.store_id = sales.store_id
group by stores.city

"""

df_1 = my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)
df_1

Unnamed: 0,city,number_of_sales
0,Berkeley,390375
1,Dallas,302120
2,Miami,275074
3,Nashville,227721
4,Seattle,342327


### Average Dollar Amount Per Sale

In [13]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select sum(total_amount)/count(sale_id) as avg_usd_per_sale
from sales

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,avg_usd_per_sale
0,64.215866


### Average Dollar Amount Per Sale, by Store

In [14]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select stores.city, sum(sales.total_amount)/count(sales.sale_id) as avg_usd_per_sale
from stores
    join sales
        on stores.store_id = sales.store_id
group by stores.city


"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,city,avg_usd_per_sale
0,Berkeley,64.146167
1,Dallas,64.240236
2,Miami,64.318707
3,Nashville,63.995732
4,Seattle,64.337642
