# Filings Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import csv
import gzip
import pandas as pd
from datetime import datetime

%load_ext sql
%config SqlMagic.displaylimit = 5
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

This will create the connection to the database and prep the jupyter magic for SQL

In [None]:
user     = os.getenv('PG_USER', '')
pw       = os.getenv('PG_PASSWORD', '')
db       = os.getenv('PG_DB_NAME', '')
host     = os.getenv('PG_HOST', '')
port     = int(os.getenv('PG_PORT', '5432'))
socket   = os.getenv('PG_UNIX_SOCKET', None)
if socket:
  connect_to_db = f"postgresql+psycopg2://{user}:{pw}@/{db}?host={socket}"
else:
  connect_to_db = f"postgresql://{user}:{pw}@{host}:{port}/{db}"
    
%sql $connect_to_db

In [None]:
# Define the mappings as dictionaries
request_class_type_mapping = {
    "AL" : "CORP",
    "AS" : "CORP",
    "ASO" : "CORP",
    "BC" : "CORP",
    "BEAM" : "CORP",
    "BEC" : "CORP",
    "BECR" : "CORP",
    "BECT" : "CORP",
    "BECV" : "CORP",
    "BERE" : "CORP",
    "CC" : "CORP",
    "CCC" : "CORP",
    "CCCT" : "CORP",
    "CCP" : "CORP",
    "CCR" : "CORP",
    "CCV" : "CORP",
    "CEM" : "MISC",
    "CFI" : "CORP",
    "CFR" : "FIRM",
    "CLC" : "CORP",
    "CLL" : "FIRM",
    "CLP" : "FIRM",
    "CP" : "CORP",
    "CR" : "CORP",
    "CSO" : "CORP",
    "CSSO" : "CORP",
    "CT" : "CORP",
    "CTC" : "CORP",
    "CTSO" : "CORP",
    "CUL" : "CORP",
    "FI" : "CORP",
    "FR" : "FIRM",
    "LC" : "CORP",
    "LIB" : "MISC",
    "LL" : "FIRM",
    "LP" : "FIRM",
    "NON" : "MISC",
    "PA" : "CORP",
    "PAR" : "MISC",
    "RCC" : "CORP",
    "RCP" : "CORP",
    "RCR" : "CORP",
    "RFI" : "CORP",
    "RLC" : "CORP",
    "RLY" : "MISC",
    "RSO" : "CORP",
    "RUL" : "CORP",
    "SO" : "CORP",
    "TMY" : "MISC",
    "UA" : "CORP",
    "UC" : "CORP",
    "UL" : "CORP",
    "ULCT" : "CORP",
    "XASO" : "CORP",
    "XCASO" : "CORP",
    "XCCP" : "CORP",
    "XCCR" : "CORP",
    "XCLL" : "FIRM",
    "XCLP" : "FIRM",
    "XCP" : "CORP",
    "XCR" : "CORP",
    "XCSO" : "CORP",
    "XCUL" : "CORP",
    "XLL" : "FIRM",
    "XLP" : "FIRM",
    "XRCP" : "CORP",
    "XRCR" : "CORP",
    "XRSO" : "CORP",
    "XRUL" : "CORP",
    "XSO" : "CORP",
    "XUL" : "CORP",
    "ULBE" : "CORP",
    "ULCB" : "CORP"
}

name_state_mapping = {
    "APPROVED" : "App",
    "CONDITION" : "Con",
    "REJECTED" : "Rej",
    "NE" : "Not"
}

In [None]:
query = """
select DISTINCT ON (r.nr_num)
		r.nr_num,
		to_char(r.last_update at time zone 'America/Vancouver', 'YYYYMMDD') last_update,
		r.request_type_cd,
		n.state,
		to_char(e.event_dt at time zone 'America/Vancouver', 'YYYYMMDD') event_dt,
		a.city,
		n.name
from requests r, events e, names n, applicants a
where r.id = e.nr_id
and r.id = n.nr_id
and a.nr_id = r.id
and r.state_cd in ('APPROVED', 'CONDITIONAL')
and r.expiration_date::date > CURRENT_DATE
and n.corp_num is null
and n.state in ('APPROVED', 'CONDITION')
and e.action = 'patch' and e.state_cd in ('APPROVED', 'CONDITIONAL')
ORDER BY 
    r.nr_num, e.event_dt DESC; 
"""

# Run the query using the %%sql cell magic and store the result
result = %sql $query

# Convert result to DataFrame
if result:
    df = result.DataFrame()
else:
    df = pd.DataFrame(columns=[
        'nr_num', 'last_update', 'request_type_cd', 'state', 
        'event_dt', 'city', 'name'
    ])
#print(df)

In [None]:
# Apply the mappings
df['mapped_request_type'] = df['request_type_cd'].map(request_class_type_mapping).fillna('UNKNOWN')
df['mapped_state'] = df['state'].map(name_state_mapping).fillna('UNKNOWN')
# Apply RPAD equivalent transformation to 'city'
df['city'] = df['city'].fillna(' ').str.pad(width=40, side='right', fillchar=' ')

df['formatted_output'] = (
    'BC9' +
    df['nr_num'].astype(str).str[3:9] +
    '0' +
    df['last_update'] +
    df['nr_num'].astype(str).str[:2] +
    df['mapped_request_type'].astype(str).str[:1] +
    df['mapped_state'] +
    df['event_dt'] +
    df['city'] +
    'BC' +
    df['name']
)
# Apply RPAD to the entire formatted output string to ensure length is 528 and filled with spaces
df['formatted_output'] = df['formatted_output'].str.pad(width=528, side='right', fillchar=' ')

# Display or export the formatted outputs as needed
#for line in df['formatted_output']:
#    print(line)

In [None]:
datestr = datetime.strftime(datetime.now(), '%Y%m%d')

# Create the directory if it doesn't exist
output_dir = os.path.join(os.getcwd(), 'sftp_nuans_report/data')
os.makedirs(output_dir, exist_ok=True)

# Filter the DataFrame for 'CORP'
df_corp = df[df['mapped_request_type'] == 'CORP']
corp_filename = os.path.join(output_dir, f'bccn_mr_{datestr}')

with open(corp_filename, 'w') as f:
    if not df_corp.empty:
        df_corp_string = "\n".join(df_corp['formatted_output'].tolist()) + "\n"
        f.write(df_corp_string)

# Filter the DataFrame for 'FIRM'
df_firm = df[df['mapped_request_type'] == 'FIRM']
firm_filename = os.path.join(output_dir, f'bcbn_mr_{datestr}')

with open(firm_filename, 'w') as f:
    if not df_firm.empty:
        df_firm_string = "\n".join(df_firm['formatted_output'].tolist()) + "\n"
        f.write(df_firm_string)

print(f"Reports generated: {corp_filename} and {firm_filename}")

In [None]:
# open file in read mode and rename it with a line number at the end of the filename.
with open(corp_filename, 'r') as f:
    new_corp_filename=corp_filename+'_' + str(len(f.readlines())) + '_reserved'
    os.rename(corp_filename, new_corp_filename)

with open(firm_filename, 'r') as f:
    new_firm_filename=firm_filename+'_' + str(len(f.readlines())) + '_reserved'
    os.rename(firm_filename, new_firm_filename)  

In [None]:
# gzip the file
with open(new_corp_filename, 'rb') as f_in, gzip.open(new_corp_filename+'.gz', 'wb') as f_out:
    f_out.writelines(f_in) 
# delete this file and  keep a zip file only
os.remove(new_corp_filename)

with open(new_firm_filename, 'rb') as f_in, gzip.open(new_firm_filename+'.gz', 'wb') as f_out:
    f_out.writelines(f_in) 
# delete this file and  keep a zip file only
os.remove(new_firm_filename)    