# NameX Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import csv
import pandas as pd
from datetime import datetime, timedelta
from cloud_sql_connector import DBConfig, getconn
import pg8000
import sqlalchemy

This will create the connection to the database and prep the jupyter magic for SQL...

In [None]:
config = DBConfig(
    instance_name=os.getenv('DATABASE_INSTANCE_CONNECTION_NAME'),
    database=os.getenv('DATABASE_NAME'),
    user=os.getenv('DATABASE_USERNAME'),
    ip_type="public",
    schema=os.getenv('DATABASE_SCHEMA')
)

def get_conn():
    """Create Cloud SQL connection using bcgov cloud-sql-connector."""
    return getconn(config)

# Create SQLAlchemy engine
engine = sqlalchemy.create_engine(
    "postgresql+pg8000://",
    creator=get_conn
)

print("Cloud SQL engine created.")
print(f"Engine: {engine}")

# Test connection
try:
    with engine.connect() as connection:
        result = connection.execute(sqlalchemy.text("SELECT 1 as test"))
        print("✅ Database connection successful!")
        print(result.fetchone())
except Exception as e:
    print(f"❌ Connection failed: {e}")

Daily totals for specified date: Following query, 'current_date - 0' means today, 'current_date - 1' means yesterday, 'current_date - 2' means the day before yesterday...

In [None]:
from pathlib import Path
from datetime import datetime, timedelta

# Always use notebook folder as base
notebook_dir = Path(".")  # papermill cwd will set this

# Output directory (same as notebook folder, no extra 'daily')
output_dir = notebook_dir
output_dir.mkdir(exist_ok=True)

datestr = (datetime.now() - timedelta(1)).strftime('%Y-%m-%d')
filename = output_dir / f"daily_totals_{datestr}.csv"
print("CSV will be saved to:", filename)


In [None]:
query_submitted = """
SELECT count(r.*) AS number_of_names_submitted
FROM requests r
WHERE date(r.submitted_date at time zone 'utc' at time zone 'pst') = date(current_date - 1)
"""
df_submitted = pd.read_sql(query_submitted, engine)

with open(filename, 'w', newline='', encoding='utf-8') as f:
    if df_submitted.empty:
        f.write('No Data Retrieved for Daily Submitted\n')
    else:
        df_submitted.to_csv(f, index=False)

In [None]:
query_completed = """
WITH Detail AS (
    SELECT
        to_char(date(current_date -1), 'YYYY-Mon-DD') AS examined_date,
        (SELECT username FROM users u WHERE u.id=e.user_id) AS examiner,
        count(e.*) + count(r.*) FILTER (WHERE r.priority_cd = 'Y') AS total
    FROM events e,
         requests r
    WHERE e.user_id != 1
      AND r.id = e.nr_id
      AND date(e.event_dt at time zone 'utc' at time zone 'pst') = date(current_date - 1)
      AND e.state_cd IN ('APPROVED','REJECTED','CONDITIONAL','CANCELLED')
    GROUP BY e.user_id
    ORDER BY total DESC
)
SELECT * FROM Detail WHERE examiner LIKE '%idir%'
UNION ALL
SELECT null, 'SUM' AS examiner, sum(total) AS total
FROM Detail WHERE examiner LIKE '%idir%'
"""

In [None]:
df_completed = pd.read_sql(query_completed, engine)

if not df_completed.empty:
    df_completed['examiner'] = df_completed['examiner'].str.replace('idir/', '', regex=False)

Save to CSV
    

In [None]:
with open(filename, 'a', newline='', encoding='utf-8') as f:
    f.write('\n\nNumber of Names Processed:\n')
    if df_completed.empty:
        f.write('No Data Retrieved for Daily Completed\n')
    else:
        df_completed.to_csv(f, index=False)