# Filings Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import csv
import gzip
from datetime import datetime

%load_ext sql
%load_ext dotenv
%config SqlMagic.displaylimit = 5

In [None]:
from dotenv import find_dotenv
from dotenv import load_dotenv
load_dotenv(find_dotenv())

In [None]:
%dotenv -o

### DB Connection 

In [None]:
import google.auth
from google.cloud.sql.connector import Connector
import sqlalchemy

DB_USER = os.getenv("DATABASE_USERNAME", "")
DB_NAME = os.getenv("DATABASE_NAME", "")
DB_PASSWORD = os.getenv("DATABASE_PASSWORD", "")
DB_HOST = os.getenv("DATABASE_HOST", "")
DB_PORT = os.getenv("DATABASE_PORT", "")
DB_CONNECTION_NAME = os.getenv("LEAR_DB_CONNECTION_NAME")  # project:region:instance-name

# initialize Connector object
connector = Connector()

# function to return the database connection object
def get_conn():
    conn = connector.connect(
        DB_CONNECTION_NAME,
        "pg8000",
        ip_type="public",
        user=DB_USER,
        db=DB_NAME,
        enable_iam_auth=True
    )
    return conn

def get_pool():
    # create connection pool with 'creator' argument to our connection object function
    if not DB_CONNECTION_NAME:
        connect_string = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
        pool = sqlalchemy.create_engine(connect_string)
    else:
        # NOTE: also need GOOGLE_APPLICATION_CREDENTIALS to be set in the .env
        #    - when running the notebook on its own, the ^ cred path starts in this folder instead of the root of the service
        #    - if this path is invalid or the creds are invalid the line below will fail
        credentials, project_id = google.auth.default()
        pool = sqlalchemy.create_engine(
            "postgresql+pg8000://",
            creator=get_conn,
        )
    
    return pool

In [None]:
lear_pool = get_pool()
%sql lear_pool --alias lear
%sql lear

In [None]:
%sql --connections

Simplest query to run to ensure our libraries are loaded and our DB connection is working

In [None]:
%%sql 
select now() AT TIME ZONE 'PST' as current_date

### Collect data

In [None]:
%%sql  BCFN_MR  <<
select               -- CURRENT NAME AND ACTIVE
' FM'
||substr(identifier,3,7)
||to_char(founding_date at time zone 'America/Vancouver','yyyymmdd')
||' '
||rpad(legal_type,3)
||' 1'
||'00000000'
||rpad(' ',42)
||rpad(legal_name,454)
from businesses
where legal_type in ('SP','GP')
and state='ACTIVE'
UNION ALL
select               -- CURRENT NAME AND HISTORICAL
' FM'
||substr(identifier,3,7)
||to_char(founding_date at time zone 'America/Vancouver','yyyymmdd')
||' '
||rpad(legal_type,3)
||' 2'
||CASE WHEN dissolution_date at time zone 'America/Vancouver' is NULL THEN rpad('', 8)
       WHEN dissolution_date at time zone 'America/Vancouver' is NOT NULL THEN to_char(dissolution_date at time zone 'America/Vancouver','yyyymmdd')
  END 
||rpad(' ',42)
||rpad(legal_name,454)
from businesses
where legal_type in ('SP','GP')
and state='HISTORICAL'
UNION ALL
select               -- OLD NAME IN THE LAST 2 YEARS
distinct 
' CH'
||substr(b.identifier,3,7)
||'00000000'
||' '
||'CH '
||' 2'
||'00000000'
||rpad(' ',42)
||rpad(bv.legal_name,454)
from businesses         b
    ,businesses_version bv
    ,filings            f
where b.identifier=bv.identifier
and b.legal_name != bv.legal_name
and b.legal_type in ('SP','GP')
and f.transaction_id=bv.end_transaction_id
and f.effective_date at time zone 'America/Vancouver' > current_date at time zone 'America/Vancouver' - interval '2 years'; 

### Save data to file

In [None]:
datestr = datetime.strftime(datetime.now(), '%Y%m%d')
BCFN_MR_filename = os.path.join(os.getcwd(), r'data/')+'BCFN_MR_' + datestr

with open(BCFN_MR_filename, 'w') as f:
    if not BCFN_MR.DataFrame().empty:
        dfAsString = BCFN_MR.DataFrame().to_string(header=False, index=False)
        f.write(dfAsString)

In [None]:
# open file in read mode and rename it with a line number at the end of the filename.
with open(BCFN_MR_filename, 'r') as f:
    new_BCFN_MR_filename=BCFN_MR_filename+'_' + str(len(f.readlines()))
    os.rename(BCFN_MR_filename, new_BCFN_MR_filename)  

In [None]:
# gzip the file
with open(new_BCFN_MR_filename, 'rb') as f_in, gzip.open(new_BCFN_MR_filename+'.gz', 'wb') as f_out:
    f_out.writelines(f_in) 
# delete this file and  keep a zip file only
os.remove(new_BCFN_MR_filename)    