In [1]:
import os
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import polars as pl
import zipfile
from datetime import datetime
from pandas_gbq import to_gbq
from pandas_gbq import read_gbq
from google.cloud import bigquery
from google.oauth2 import service_account

In [2]:
## define project and set up paths for Google Big Query
service_path = "C:/Users/breni/Documents/"
service_file = 'niekampbreannawedge-8bbebeea1dda.json'
project_id = 'niekampbreannawedge'
data_id = 'wedge24'

beans_key = service_path + service_file

In [3]:

credentials = service_account.Credentials.from_service_account_file(beans_key)

client = bigquery.Client(credentials= credentials, project= project_id)

# Task 3: Building Summary Tables

In this task, summary files will be created that allow quick answers to certain questions. In order to do this, a single SQLite database will be created in python (in a .db file) containing three tables as follows: 
1. Sales by date by hour
2. Sales by Owner by Year by Month
3. Sales by Product Description by Year by Month

This database will be built with Python below!

In [4]:
## Query 1: Sales by Date by Hour: By calendar date (YYYY-MM-DD) and hour of the day, 
# determine the total spend in the store, 
# the number of transactions, 
# and a count of the number of items.

query1 = """
    SELECT 
        EXTRACT(DATE FROM datetime) as sale_date, 
        EXTRACT(HOUR FROM datetime) as sale_hour,
        ROUND(SUM(total), 2) as total_sales,
        COUNT(DISTINCT CONCAT(
            CAST(EXTRACT(DATE FROM datetime) AS STRING),
            CAST(register_no AS STRING),
            CAST(emp_no AS STRING),
            CAST(trans_no AS STRING)
        )) AS num_transactions,
        SUM(
            CASE
                WHEN trans_status IN ('V', 'R') THEN -1
                ELSE 1
            END
        ) AS total_items
    FROM `umt-msba.wedge_transactions.transArchive*` AS trans
    JOIN `umt-msba.wedge_transactions.department_lookup` AS depts
        ON trans.department = depts.department
    WHERE trans.department NOT IN (0, 15)
        AND card_no != 3
        AND (trans_status IS NULL
            OR trans_status IN ('V', 'R', '', ' '))
    GROUP BY sale_date, sale_hour
    ORDER BY sale_date, sale_hour;
    
"""

df_transactions = read_gbq(query1, project_id= project_id)

Downloading: 100%|[32m██████████[0m|


In [5]:
df_transactions

Unnamed: 0,sale_date,sale_hour,total_sales,num_transactions,total_items
0,2010-01-01,9,717.50,18,175
1,2010-01-01,10,2665.77,55,757
2,2010-01-01,11,2824.45,65,778
3,2010-01-01,12,2523.37,75,773
4,2010-01-01,13,3789.56,104,1091
...,...,...,...,...,...
38833,2017-01-31,18,5478.42,171,1312
38834,2017-01-31,19,3803.19,132,997
38835,2017-01-31,20,3577.06,102,954
38836,2017-01-31,21,2353.89,77,683


In [6]:
## TASK 2: Sales by Owner by Year by Month: A file that has the following columns: card_no, year, month, sales, transactions, and items.
query2 = """
    SELECT 
        EXTRACT(YEAR FROM datetime) as year, 
        EXTRACT(MONTH FROM datetime) as month,
        ROUND(SUM(total), 2) as total_sales,
        COUNT(DISTINCT CONCAT(
            CAST(EXTRACT(DATE FROM datetime) AS STRING),
            CAST(register_no AS STRING),
            CAST(emp_no AS STRING),
            CAST(trans_no AS STRING)
        )) AS num_transactions,
        SUM(
            CASE
                WHEN trans_status IN ('V', 'R') THEN -1
                ELSE 1
            END
        ) AS total_items
    FROM `umt-msba.wedge_transactions.transArchive*` AS trans
    JOIN `umt-msba.wedge_transactions.department_lookup` AS depts
        ON trans.department = depts.department
    WHERE trans.department NOT IN (0, 15)
        AND card_no != 3
        AND (trans_status IS NULL
            OR trans_status IN ('V', 'R', '', ' '))
    GROUP BY card_no, year, month
    ORDER BY year, month;
    
"""

df_sales_by_owner = read_gbq(query2, project_id= project_id)

Downloading: 100%|[32m██████████[0m|


In [7]:
df_sales_by_owner

Unnamed: 0,year,month,total_sales,num_transactions,total_items
0,2010,1,376.84,3,115
1,2010,1,184.47,4,72
2,2010,1,136.18,3,40
3,2010,1,194.51,2,61
4,2010,1,520.08,11,147
...,...,...,...,...,...
808806,2017,1,127.26,1,20
808807,2017,1,40.23,1,11
808808,2017,1,330.12,2,78
808809,2017,1,30.70,1,10


In [8]:
## TASK 3: Sales by Product Description by Year by Month: A file that has the following columns: 
# upc, description, department number, department name, year, month, sales, transactions, and items.