# Weekly Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import pandas as pd
from datetime import datetime, timedelta
from cloud_sql_connector import DBConfig, getconn
import sqlalchemy

%load_ext sql
%config SqlMagic.displaylimit = 5

This will create the connection to the database and prep the jupyter magic for SQL

In [None]:
# Database connection setup
config = DBConfig(
    instance_name=os.getenv('DATABASE_INSTANCE_CONNECTION_NAME'),
    database=os.getenv('DATABASE_NAME'),
    user=os.getenv('DATABASE_USERNAME'),
    ip_type="public",
    schema=os.getenv('DATABASE_SCHEMA')
)

def get_conn():
    return getconn(config)

engine = sqlalchemy.create_engine(
    "postgresql+pg8000://",
    creator=get_conn
)

print("Cloud SQL engine created:", engine)

# Test connection
try:
    with engine.connect() as conn:
        result = conn.execute(sqlalchemy.text("SELECT 1 AS test"))
        print("✅ Connection test:", result.fetchone())
except Exception as e:
    print("❌ Connection failed:", e)

Weekly running time.

In [None]:
from pathlib import Path
from datetime import datetime, timedelta

# Always use notebook folder as base
notebook_dir = Path(".")  # papermill cwd will set this

# Output directory (same as notebook folder, no extra 'daily')
output_dir = notebook_dir
output_dir.mkdir(exist_ok=True)

datestr = (datetime.now() - timedelta(1)).strftime('%Y-%m-%d')
filename = output_dir / f"weekly_totals_till_{datestr}.csv"
print("CSV will be saved to:", filename)


# %%
# Weekly submitted query
weekly_submitted_query = """
SELECT count(r.*) AS number_of_names_submitted
FROM requests r
WHERE date(r.submitted_date at time zone 'utc' at time zone 'pst')  
      > date(current_date - 1 - interval '1 weeks')
  AND date(r.submitted_date at time zone 'utc' at time zone 'pst')  
      <= date(current_date - 1)
"""


In [None]:
try:
    stat_weekly_submitted_df = pd.read_sql(weekly_submitted_query, engine)
    display(stat_weekly_submitted_df.head())
except Exception as e:
    print("Error running weekly submitted query:", e)
    stat_weekly_submitted_df = pd.DataFrame()

# Save submitted section to CSV
with open(filename, 'w', newline='', encoding='utf-8') as f:
    if stat_weekly_submitted_df.empty:
        f.write('No Data Retrieved for Weekly Submitted\n')
    else:
        stat_weekly_submitted_df.to_csv(f, sep=',', index=False)


In [None]:
weekly_completed_query = """
WITH Detail AS 
(
    SELECT e.user_id,
           (SELECT username FROM users u WHERE u.id=e.user_id) AS examiner,
           count(e.*) FILTER (WHERE e.state_cd = 'APPROVED')  AS approved,
           count(e.*) FILTER (WHERE e.state_cd = 'REJECTED')  AS rejected,
           count(e.*) FILTER (WHERE e.state_cd = 'CONDITIONAL')  AS conditional,
           count(e.*) FILTER (WHERE e.state_cd = 'CANCELLED')  AS cancelled,
           count(r.*) FILTER (WHERE r.priority_cd = 'Y') AS priorities,
           count(e.*) + count(r.*) FILTER (WHERE r.priority_cd = 'Y') AS total
    FROM events e,
         requests r
    WHERE e.user_id != 1
      AND r.id = e.nr_id
      AND date(e.event_dt at time zone 'utc' at time zone 'pst')  
          > date(current_date - 1 - interval '1 weeks')
      AND date(e.event_dt at time zone 'utc' at time zone 'pst')  
          <= date(current_date - 1)
      AND e.state_cd IN ('APPROVED','REJECTED','CONDITIONAL','CANCELLED')
    GROUP BY e.user_id
)
SELECT * FROM Detail WHERE examiner LIKE '%idir%'
UNION ALL
SELECT null,
       'SUM' AS examiner,
       sum(approved) AS approved,
       sum(rejected) AS rejected,
       sum(conditional) AS conditional,
       sum(cancelled) AS cancelled,
       sum(priorities) AS priorities,
       sum(total) AS total
FROM Detail WHERE examiner LIKE '%idir%';
"""

In [None]:
try:
    stat_weekly_completed_df = pd.read_sql(weekly_completed_query, engine)
except Exception as e:
    print("Error running weekly completed query:", e)
    stat_weekly_completed_df = pd.DataFrame()

In [None]:
if not stat_weekly_completed_df.empty:
    stat_weekly_completed_df['examiner'] = stat_weekly_completed_df['examiner'].astype(str).str.replace('idir/', '', regex=False)
    stat_weekly_completed_df['approved+conditional_%'] = round(
        ((stat_weekly_completed_df['approved'] + stat_weekly_completed_df['conditional']) / stat_weekly_completed_df['total']) * 100, 2
    )
    stat_weekly_completed_df['rejected_%'] = round(
        (stat_weekly_completed_df['rejected'] / stat_weekly_completed_df['total']) * 100, 2
    )


Save to CSV

In [None]:
with open(filename, 'a', newline='', encoding='utf-8') as f:
    f.write('\n\nNumber of Names Processed:\n')
    if stat_weekly_completed_df.empty:
        f.write('No Data Retrieved for Weekly Completed\n')
    else:
        stat_weekly_completed_df.to_csv(f, sep=',', index=False)

# %%
# Verification
print("CSV saved at:", filename)
print("Exists:", os.path.exists(filename), "Size (bytes):", os.path.getsize(filename) if os.path.exists(filename) else "N/A")
