## Task 3

In [36]:
from google.cloud import bigquery
client = bigquery.Client(project="rhamre")

import sqlite3
import pandas as pd
from sqlalchemy import create_engine 



In [52]:
def run_bigquery_query(query):
    query_job = client.query(query)  # API request
    results = query_job.result()  # Wait for the job to complete.
    df = results.to_dataframe()
    return df

In [53]:
sales_by_date_by_hour = '''
SELECT
  CAST(datetime AS DATE) AS date,
  EXTRACT(HOUR FROM datetime) AS hour,
  ROUND(SUM(total), 2) AS total_spend,
  COUNT(CASE WHEN trans_type = 'I' AND unitPrice > 0 THEN 1 END) AS num_items,
  COUNT(DISTINCT CONCAT(CAST(trans_no AS STRING), CAST(register_no AS STRING), CAST(emp_no AS STRING), CAST(DATE(datetime) AS STRING))) AS num_transactions
FROM `rhamre.wedge.transArchive*`
GROUP BY date, hour
ORDER BY date, hour

'''

In [54]:
sales_by_owner_by_year_by_month = '''
SELECT
  card_no,
  EXTRACT(YEAR FROM datetime) AS year,
  EXTRACT(MONTH FROM datetime) AS month,
  ROUND(SUM(CASE WHEN trans_type = 'T' THEN ABS(total) ELSE 0 END), 2) AS sales,
  COUNT(DISTINCT CONCAT(CAST(trans_no AS STRING), CAST(register_no AS STRING), CAST(emp_no AS STRING), CAST(DATE(datetime) AS STRING))) AS num_transactions,
  COUNT(CASE WHEN trans_type = 'I' AND unitPrice > 0 THEN 1 END) AS num_items,
FROM `rhamre.wedge.transArchive*`
GROUP BY card_no, year, month
ORDER BY card_no DESC, year, month

'''

In [55]:
sales_by_product_description_by_year_by_month = '''
SELECT
    EXTRACT(YEAR FROM t.datetime) AS year,
    EXTRACT(MONTH FROM t.datetime) AS month,
    t.upc,
    t.description,
    t.department AS department_number,
    d.dept_name AS department_name,
    ROUND(SUM(CASE WHEN t.trans_type = 'T' THEN ABS(t.total) ELSE 0 END), 2) AS sales,
    COUNT(DISTINCT CONCAT(CAST(t.trans_no AS STRING), CAST(t.register_no AS STRING), CAST(t.emp_no AS STRING), CAST(DATE(t.datetime) AS STRING))) AS num_transactions,
    COUNT(CASE WHEN t.trans_type = 'I' AND t.unitPrice > 0 THEN 1 END) AS num_items
FROM `rhamre.wedge.transArchive*`t
LEFT JOIN `rhamre.wedge.department_lookup` d
    ON t.department = d.department
GROUP BY 
    t.upc, 
    t.description, 
    t.department, 
    d.dept_name, 
    year, 
    month
ORDER BY 
    year, 
    month, 
    t.upc
'''

Create .db

In [56]:
# Run the queries and store the results in DataFrames
sales_by_date_by_hour = run_bigquery_query(sales_by_date_by_hour)
sales_by_owner_by_year_by_month = run_bigquery_query(sales_by_owner_by_year_by_month)
sales_by_product_description_by_year_by_month = run_bigquery_query(sales_by_product_description_by_year_by_month)

# Connect to the SQLite database (or create one if it doesn't exist)
sqlite_conn = sqlite3.connect('Task_3_bigquery_results.db')

# Write the DataFrames to SQLite tables
sales_by_date_by_hour.to_sql('sales_by_date_by_hour', sqlite_conn, if_exists='replace', index=False)
sales_by_owner_by_year_by_month.to_sql('sales_by_owner_by_year_by_month', sqlite_conn, if_exists='replace', index=False)
sales_by_product_description_by_year_by_month.to_sql('sales_by_product_description_by_year_by_month', sqlite_conn, if_exists='replace', index=False)

# Close the connection
sqlite_conn.close()

print("All tables successfully written to SQLite database!")



All tables successfully written to SQLite database!
