# Dye Durham Filing Statistics Report

In [None]:
import os
import pandas as pd
import csv
import matplotlib
from datetime import datetime, timedelta
from IPython.core.display import HTML
from cloud_sql_connector import DBConfig, getconn
import pg8000
import sqlalchemy

%load_ext sql
%config SqlMagic.displaylimit = 5

In [None]:
# Initialize DBConfig for Cloud SQL connection
config = DBConfig(
    instance_name=os.getenv('DB_INSTANCE_CONNECTION_NAME'),  # Cloud SQL instance connection name
    database=os.getenv('DB_NAME'),
    user=os.getenv('DB_USER'),
    ip_type="public",  # or "private" depending on your setup
    schema="business"  # Set the schema directly in config
)

def get_conn():
    """Create a connection to Google Cloud SQL using the custom cloud-sql-connector."""
    return getconn(config)

## Database Connection Setup

This notebook now uses the custom bcgov cloud-sql-connector library to connect to the database in GCP. 

Required environment variables:
- `DB_USER`: Database username
- `DB_NAME`: Database name  
- `DB_INSTANCE_CONNECTION_NAME`: Cloud SQL instance connection name (format: `project:region:instance`)

The connection automatically uses the `business` schema and handles IAM authentication through the custom connector.

This will create the connection to the Cloud SQL database using the custom bcgov cloud-sql-connector and prep the jupyter magic for SQL

In [None]:
# Create SQLAlchemy engine using custom cloud-sql-connector
engine = sqlalchemy.create_engine(
    "postgresql+pg8000://",
    creator=get_conn,
)

print("Custom cloud-sql-connector engine created successfully!")
print(f"Engine: {engine}")

# Test the connection
try:
    with engine.connect() as connection:
        result = connection.execute(sqlalchemy.text("SELECT 1 as test"))
        print("✅ Database connection test successful!")
        print(f"Test result: {result.fetchone()}")
except Exception as e:
    print(f"❌ Connection failed: {e}")

Dye Durham: Filing statistics for CP, SP, and GP entities

In [None]:
# Dye Durham Filing Query
query = """
SELECT filing_type AS Filing, COUNT(*) AS Count
FROM business.filings f, business.businesses b
WHERE f.business_id = b.id
  AND b.legal_type IN ('CP','SP','GP') -- Benefit Companies will be included in the email from CPRD
  AND filing_type <> 'lear_epoch'
  AND TO_CHAR(filing_date AT TIME ZONE 'America/Vancouver','yyyymm') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '27 days','yyyymm')
GROUP BY Filing
ORDER BY filing
"""

try:
    dye_durham_filing = pd.read_sql(query, engine)
    print(f"✅ Dye Durham filing query successful! Found {len(dye_durham_filing)} rows")
    if not dye_durham_filing.empty:
        display(dye_durham_filing.head())
except Exception as e:
    print(f"❌ Dye Durham filing query failed: {e}")
    dye_durham_filing = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
dye_durham = dye_durham_filing.copy()  # Use the DataFrame from the previous query
datestr = datetime.strftime(datetime.now()-timedelta(1), '%Y-%m-%d')
filename =  os.path.join(os.getcwd(), r'data/')+'Dye_and_Durham_' + datestr +'.csv'
with open(filename, 'w') as f:  
    if dye_durham.empty:
        f.write('No Data Retrieved for Dye and Durham filings ' + datestr)         
    else: 
        f.write('Coop, Sole Proprietorship, and General Partnership Filings for ' + datestr + ':\n')  
        dye_durham.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n')  

In [None]:
# Dye Durham Total Query
query = """
SELECT 'GRAND TOTAL' AS Total, COUNT(*)
FROM business.filings f, business.businesses b
WHERE f.business_id = b.id
  AND b.legal_type IN ('CP','SP','GP') -- Benefit Companies will be included in the email from CPRD
  AND filing_type <> 'lear_epoch'
  AND TO_CHAR(filing_date AT TIME ZONE 'America/Vancouver','yyyymm') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '27 days','yyyymm')
"""

try:
    dye_durham_total = pd.read_sql(query, engine)
    print(f"✅ Dye Durham total query successful! Found {len(dye_durham_total)} rows")
    if not dye_durham_total.empty:
        display(dye_durham_total.head())
except Exception as e:
    print(f"❌ Dye Durham total query failed: {e}")
    dye_durham_total = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
dye_durham_total_result = dye_durham_total.copy()  # Use the DataFrame from the previous query

with open(filename, 'a') as f:  
    if dye_durham_total_result.empty:
        f.write('No Data Retrieved for Dye and Durham filings ' + datestr)         
    else: 
        dye_durham_total_result.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n')  