# Filings Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import csv
import gzip
from datetime import datetime

%load_ext sql
%config SqlMagic.displaylimit = 5

This will create the connection to the database and prep the jupyter magic for SQL

In [None]:
connect_to_db = 'postgresql://' + \
                os.getenv('PG_USER', '') + ":" + os.getenv('PG_PASSWORD', '') +'@' + \
                os.getenv('PG_HOST', '') + ':' + os.getenv('PG_PORT', '5432') + '/' + os.getenv('PG_DB_NAME', '');
    
%sql $connect_to_db

Simplest query to run to ensure our libraries are loaded and our DB connection is working

In [None]:
%%sql 
select now() AT TIME ZONE 'PST' as current_date

In [None]:
%%sql  BCFN_MR  <<
select               -- CURRENT NAME AND ACTIVE
' FM'
||substr(identifier,3,7)
||to_char(founding_date at time zone 'America/Vancouver','yyyymmdd')
||' '
||rpad(legal_type,3)
||' 1'
||'00000000'
||rpad(' ',42)
||rpad(legal_name,454)
from businesses
where legal_type in ('SP','GP')
and state='ACTIVE'
UNION ALL
select               -- CURRENT NAME AND HISTORICAL
' FM'
||substr(identifier,3,7)
||to_char(founding_date at time zone 'America/Vancouver','yyyymmdd')
||' '
||rpad(legal_type,3)
||' 2'
||CASE WHEN dissolution_date at time zone 'America/Vancouver' is NULL THEN rpad('', 8)
       WHEN dissolution_date at time zone 'America/Vancouver' is NOT NULL THEN to_char(dissolution_date at time zone 'America/Vancouver','yyyymmdd')
  END 
||rpad(' ',42)
||rpad(legal_name,454)
from businesses
where legal_type in ('SP','GP')
and state='HISTORICAL'
UNION ALL
select               -- OLD NAME IN THE LAST 2 YEARS
distinct 
' CH'
||substr(b.identifier,3,7)
||'00000000'
||' '
||'CH '
||' 2'
||'00000000'
||rpad(' ',42)
||rpad(bv.legal_name,454)
from businesses         b
    ,businesses_version bv
    ,filings            f
where b.identifier=bv.identifier
and b.legal_name != bv.legal_name
and b.legal_type in ('SP','GP')
and f.transaction_id=bv.end_transaction_id
and f.effective_date at time zone 'America/Vancouver' > current_date at time zone 'America/Vancouver' - interval '2 years'; 

In [None]:
datestr = datetime.strftime(datetime.now(), '%Y%m%d')
BCFN_MR_filename = os.path.join(os.getcwd(), r'data/')+'BCFN_MR_' + datestr

with open(BCFN_MR_filename, 'w') as f:
    if not BCFN_MR.DataFrame().empty:
        dfAsString = BCFN_MR.DataFrame().to_string(header=False, index=False)
        f.write(dfAsString)

In [None]:
# open file in read mode and rename it with a line number at the end of the filename.
with open(BCFN_MR_filename, 'r') as f:
    new_BCFN_MR_filename=BCFN_MR_filename+'_' + str(len(f.readlines()))
    os.rename(BCFN_MR_filename, new_BCFN_MR_filename)  

In [None]:
# gzip the file
with open(new_BCFN_MR_filename, 'rb') as f_in, gzip.open(new_BCFN_MR_filename+'.gz', 'wb') as f_out:
    f_out.writelines(f_in) 
# delete this file and  keep a zip file only
os.remove(new_BCFN_MR_filename)    