# Filings Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import gzip
from pathlib import Path
from datetime import datetime
import pandas as pd
from cloud_sql_connector import DBConfig, getconn
import sqlalchemy

This will create the connection to the database and prep the jupyter magic for SQL

In [None]:
config = DBConfig(
    instance_name=os.getenv('DATABASE_INSTANCE_CONNECTION_NAME'),
    database=os.getenv('DATABASE_NAME'),
    user=os.getenv('DATABASE_USERNAME'),
    ip_type='public',
    schema=os.getenv('DATABASE_SCHEMA'),
)

def get_conn():
    """Create Cloud SQL connection using Cloud SQL connector"""
    return getconn(config)

# SQLAlchemy engine
engine = sqlalchemy.create_engine(
    'postgresql+pg8000://',
    creator=get_conn
)

print('✅ Cloud SQL engine created.')

In [None]:
request_class_type_mapping = {
    'AL': 'CORP', 'AS': 'CORP', 'ASO': 'CORP', 'BC': 'CORP', 'BEAM': 'CORP',
    'BEC': 'CORP', 'BECR': 'CORP', 'BECT': 'CORP', 'BECV': 'CORP', 'BERE': 'CORP',
    'CC': 'CORP', 'CCC': 'CORP', 'CCCT': 'CORP', 'CCP': 'CORP', 'CCR': 'CORP',
    'CCV': 'CORP', 'CEM': 'MISC', 'CFI': 'CORP', 'CFR': 'FIRM', 'CLC': 'CORP',
    'CLL': 'FIRM', 'CLP': 'FIRM', 'CP': 'CORP', 'CR': 'CORP', 'CSO': 'CORP',
    'CSSO': 'CORP', 'CT': 'CORP', 'CTC': 'CORP', 'CTSO': 'CORP', 'CUL': 'CORP',
    'FI': 'CORP', 'FR': 'FIRM', 'LC': 'CORP', 'LIB': 'MISC', 'LL': 'FIRM',
    'LP': 'FIRM', 'NON': 'MISC', 'PA': 'CORP', 'PAR': 'MISC', 'RCC': 'CORP',
    'RCP': 'CORP', 'RCR': 'CORP', 'RFI': 'CORP', 'RLC': 'CORP', 'RLY': 'MISC',
    'RSO': 'CORP', 'RUL': 'CORP', 'SO': 'CORP', 'TMY': 'MISC', 'UA': 'CORP',
    'UC': 'CORP', 'UL': 'CORP', 'ULCT': 'CORP', 'XASO': 'CORP', 'XCASO': 'CORP',
    'XCCP': 'CORP', 'XCCR': 'CORP', 'XCLL': 'FIRM', 'XCLP': 'FIRM', 'XCP': 'CORP',
    'XCR': 'CORP', 'XCSO': 'CORP', 'XCUL': 'CORP', 'XLL': 'FIRM', 'XLP': 'FIRM',
    'XRCP': 'CORP', 'XRCR': 'CORP', 'XRSO': 'CORP', 'XRUL': 'CORP', 'XSO': 'CORP',
    'XUL': 'CORP', 'ULBE': 'CORP', 'ULCB': 'CORP'
}

name_state_mapping = {
    'APPROVED': 'App',
    'CONDITION': 'Con',
    'REJECTED': 'Rej',
    'NE': 'Not'
}

In [None]:
query = """
SELECT DISTINCT ON (r.nr_num)
    r.nr_num,
    to_char(r.last_update at time zone 'America/Vancouver', 'YYYYMMDD') as last_update,
    r.request_type_cd,
    n.state,
    to_char(e.event_dt at time zone 'America/Vancouver', 'YYYYMMDD') as event_dt,
    a.city,
    n.name
FROM requests r
JOIN events e ON r.id = e.nr_id
JOIN names n ON r.id = n.nr_id
JOIN applicants a ON a.nr_id = r.id
WHERE r.state_cd in ('APPROVED', 'CONDITIONAL')
  AND r.expiration_date::date > CURRENT_DATE
  AND n.corp_num IS NULL
  AND n.state in ('APPROVED', 'CONDITION')
  AND e.action = 'patch'
  AND e.state_cd in ('APPROVED', 'CONDITIONAL')
ORDER BY r.nr_num, e.event_dt DESC;
"""

# Execute query
df = pd.read_sql(sqlalchemy.text(query), engine)
print(f'✅ Retrieved {len(df)} filings from the database.')

In [None]:
df['mapped_request_type'] = df['request_type_cd'].map(request_class_type_mapping).fillna('UNKNOWN')
df['mapped_state'] = df['state'].map(name_state_mapping).fillna('UNKNOWN')
df['city'] = df['city'].fillna(' ').str.pad(width=40, side='right', fillchar=' ')

df['formatted_output'] = (
    'BC9' +
    df['nr_num'].astype(str).str[3:9] +
    '0' +
    df['last_update'] +
    df['nr_num'].astype(str).str[:2] +
    df['mapped_request_type'].astype(str).str[:1] +
    df['mapped_state'] +
    df['event_dt'] +
    df['city'] +
    'BC' +
    df['name']
).str.pad(width=528, side='right', fillchar=' ')


In [None]:
datestr = datetime.now().strftime('%Y%m%d')
output_dir = Path.cwd() / 'sftp_nuans_report' / 'data'
output_dir.mkdir(parents=True, exist_ok=True)

def write_and_compress(df_filtered, filename_base):
    if df_filtered.empty:
        print(f'⚠️ No data to write for {filename_base}')
        return None
    
    temp_file = output_dir / f'{filename_base}'
    gz_file = temp_file.with_suffix(temp_file.suffix + '.gz')
    
    # Write to temporary file
    with open(temp_file, 'w', encoding='utf-8') as f:
        f.write('\n'.join(df_filtered['formatted_output'].tolist()) + '\n')
    
    # Count lines for renaming
    with open(temp_file, 'r', encoding='utf-8') as f:
        lines_count = len(f.readlines())
    
    renamed_file = output_dir / f'{filename_base}_{lines_count}_reserved'
    temp_file.rename(renamed_file)
    
    # Gzip
    with open(renamed_file, 'rb') as f_in, gzip.open(gz_file, 'wb') as f_out:
        f_out.writelines(f_in)
    
    # Remove uncompressed file
    renamed_file.unlink()
    
    return gz_file

# Write CORP and FIRM reports
corp_file = write_and_compress(df[df['mapped_request_type']=='CORP'], f'bccn_mr_{datestr}')
firm_file = write_and_compress(df[df['mapped_request_type']=='FIRM'], f'bcbn_mr_{datestr}')

print(f'✅ Reports generated: {corp_file} and {firm_file}')