# Monthly stats from warehouse


We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import csv
import cloud_sql_connector
import pandas as pd
from datetime import datetime, timedelta
from cloud_sql_connector import DBConfig, getconn
import pg8000
import sqlalchemy

ModuleNotFoundError: No module named 'cloud_sql_connector'

This will create the connection to the database

In [None]:
# Initialize DBConfig for Cloud SQL connection
config = DBConfig(
    instance_name=os.getenv('DB_INSTANCE_CONNECTION_NAME'),
    database=os.getenv('DB_NAME'),
    user=os.getenv('DB_USER'),
    ip_type="public",
    schema="auth"
)

def get_conn():
    """Create a connection to Google Cloud SQL using the custom cloud-sql-connector."""
    return getconn(config)

# Create SQLAlchemy engine using Cloud SQL connector
engine = sqlalchemy.create_engine(
    "postgresql+pg8000://",
    creator=get_conn,
)

print("Cloud SQL engine created successfully!")

# Test the connection
try:
    with engine.connect() as connection:
        result = connection.execute(sqlalchemy.text("SELECT 1"))
        print("✅ DB connection OK:", result.fetchone())
except Exception as e:
    print("❌ Connection failed:", e)

In [None]:
#%% File setup
datestr = datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d')
filename =  os.path.join(os.getcwd(), r'data/') + 'dwh_monthly_stats_' + datestr + '.csv'


In [None]:
#%% Accounts created in the current month
query = """
WITH bounds AS (
  SELECT
    DATE_TRUNC('month', NOW() AT TIME ZONE 'America/Vancouver') AS this_month_start
)
SELECT
    TO_CHAR(
        DATE_TRUNC('month', created),
        'FMMonth YYYY'
    ) AS month,
    COUNT(*) AS record_count
FROM auth.orgs o, bounds
WHERE created >= this_month_start - INTERVAL '1 month'
  AND created <  this_month_start
GROUP BY 1
"""
stat_accounts_monthly_completed = pd.read_sql(query, engine)

with open(filename, 'w') as f:
    stat_accounts_monthly_completed.insert(0, "Monthly Number of New Accounts:", [''], True)
    stat_accounts_monthly_completed.to_csv(f, sep=',', encoding='utf-8', index=False)


In [None]:
#%% Names request submitted in the current month
query = """
WITH bounds AS (
  SELECT
    DATE_TRUNC('month', NOW() AT TIME ZONE 'America/Vancouver') AS this_month_start
)
SELECT
    TO_CHAR(
        DATE_TRUNC('month', submitted_date),
        'FMMonth YYYY'
    ) AS month,
    COUNT(*) AS record_count
FROM namex.requests, bounds
WHERE submitted_date >= this_month_start - INTERVAL '1 month'
  AND submitted_date <  this_month_start
GROUP BY 1
"""
stat_nr_total_completed = pd.read_sql(query, engine)

with open(filename, 'a') as f:      
    f.write('\n\n')
    stat_nr_total_completed.insert(0, "Monthly Number of New NRs:", [''], True)
    stat_nr_total_completed.to_csv(f, sep=',', encoding='utf-8', index=False)

In [None]:
#%% Monthly SP/GP registrations
query = """
WITH bounds AS (
  SELECT
    DATE_TRUNC('month', NOW() AT TIME ZONE 'America/Vancouver') AS this_month_start
)
SELECT
    TO_CHAR(
        DATE_TRUNC('month', founding_date),
        'FMMonth YYYY'
    ) AS month,
    COUNT(*) AS record_count
FROM business.businesses b, bounds
WHERE founding_date >= this_month_start - INTERVAL '1 month'
  AND founding_date <  this_month_start
  AND legal_type IN ('SP', 'GP')
GROUP BY 1
"""
stat_sp_gp_registrations_completed = pd.read_sql(query, engine)

with open(filename, 'a') as f:      
    f.write('\n\n')
    stat_sp_gp_registrations_completed.insert(0, "Monthly SP/GP registrations:", [''], True)
    stat_sp_gp_registrations_completed.to_csv(f, sep=',', encoding='utf-8', index=False)