# Preliminary analytics

In [1]:
import csv

import json

import math
import numpy as np
import pandas as pd

import psycopg2

In [2]:
#
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)
    

In [3]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [4]:
cursor = connection.cursor()

# Total dollar amount of sales

Write a query to sum the total_amount in the stage_1_peak_sales table and present the sum in a Pandas dataframe.

In [5]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select sum(total_amount::numeric) as total_sales from stage_1_peak_sales
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,total_sales
0,6480


# Total number of sales

Write a query to count the total number of sales in the stage_1_peak_sales table and present the sum in a Pandas dataframe.  Each record in the stage_1_peak_sales table is a sale.

In [6]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select count(*) as total_number_of_sales
from stage_1_peak_sales

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,total_number_of_sales
0,97


# Total dollar amount of sales, total cut paid to Peak, net to AGM

Write a query to calculate the total dollar amount of sales, the total cut paid to Peak, and the net to AGM.

In [7]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select sum(total_amount::numeric) as total_sales,
0.18*sum(total_amount::numeric) as cut_paid_to_peak,
0.82*sum(total_amount::numeric) net_to_agm
from stage_1_peak_sales

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,total_sales,cut_paid_to_peak,net_to_agm
0,6480,1166.4,5313.6


# Total number of meals sold

Write a query to sum the quantity in the stage_1_peak_line_items table and present the sum in a Pandas dataframe with appropriate column header name

In [8]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select sum(quantity::numeric) as total_number_of_meals_sold from stage_1_peak_line_items

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,total_number_of_meals_sold
0,540


# Total number of meals sold by meal

Expanding on the last query, group the sum of quantity by meal.  Display the meal followed by the number of meals sold.

In [33]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select d.meal as meal, sum(c.quantity::numeric) as total_number_meals_sold
from stage_1_peak_line_items as c
left join
(select b.description as meal, a.peak_product_id as peak_product_id
from
peak_product_mapping as a
left join 
products as b
on b.product_id = a.product_id) as d
on d.peak_product_id = c.product_id::numeric
group by meal
order by total_number_meals_sold DESC

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,meal,total_number_meals_sold
0,Pistachio Salmon,113
1,Eggplant Lasagna,107
2,Curry Chicken,101
3,Teriyaki Chicken,80
4,Brocolli Stir Fry,60
5,Tilapia Piccata,44
6,Spinach Orzo,27
7,Chicken Salad,8


# Average number of meals per sale

Write a query to find the average number of meals per sale, which should be equal to the total number of meals sold divided by the total number of sales, both of which we have calculated before.

In [36]:
rollback_before_flag = True
rollback_after_flag = True

query = """

WITH a AS (
    SELECT SUM(quantity::numeric) AS total_number_of_meals_sold
    FROM stage_1_peak_line_items
),
b AS (
    SELECT COUNT(*) AS total_number_of_sales
    FROM stage_1_peak_sales
)

SELECT round(a.total_number_of_meals_sold / b.total_number_of_sales, 1) AS average_meals_per_sale
FROM a, b;

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,average_meals_per_sale
0,5.6
