# Filings Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import csv
import gzip
from datetime import datetime

%load_ext sql
%load_ext dotenv
%config SqlMagic.displaylimit = 5

In [None]:
from dotenv import find_dotenv
from dotenv import load_dotenv
load_dotenv(find_dotenv())

In [None]:
%dotenv -o

### DB Connection 

In [None]:
import google.auth
from google.cloud.sql.connector import Connector
import sqlalchemy

DB_USER = os.getenv("DATABASE_USERNAME", "")
DB_NAME = os.getenv("DATABASE_NAME", "")
DB_PASSWORD = os.getenv("DATABASE_PASSWORD", "")
DB_HOST = os.getenv("DATABASE_HOST", "")
DB_PORT = os.getenv("DATABASE_PORT", "")
DB_CONNECTION_NAME = os.getenv("LEAR_DB_CONNECTION_NAME")  # project:region:instance-name

# initialize Connector object
connector = Connector()

# function to return the database connection object
def get_conn():
    conn = connector.connect(
        DB_CONNECTION_NAME,
        "pg8000",
        ip_type="public",
        user=DB_USER,
        db=DB_NAME,
        enable_iam_auth=True
    )
    return conn

def get_pool():
    # create connection pool with 'creator' argument to our connection object function
    if not DB_CONNECTION_NAME:
        connect_string = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
        pool = sqlalchemy.create_engine(connect_string)
    else:
        # NOTE: also need GOOGLE_APPLICATION_CREDENTIALS to be set in the .env
        #    - when running the notebook on its own, the ^ cred path starts in this folder instead of the root of the service
        #    - if this path is invalid or the creds are invalid the line below will fail
        credentials, project_id = google.auth.default()
        pool = sqlalchemy.create_engine(
            "postgresql+pg8000://",
            creator=get_conn,
        )
    
    return pool

In [None]:
lear_pool = get_pool()
%sql lear_pool --alias lear
%sql lear

In [None]:
%sql --connections

Simple query to run to ensure our libraries are loaded and our DB connection is working

In [None]:
%%sql 
select now() AT TIME ZONE 'PST' as current_date

### Collect data

In [None]:
%%sql  icbc_data  <<
select
 CASE WHEN state='ACTIVE' THEN '1'
            WHEN state='HISTORICAL' THEN '2'
  END 
|| 
legal_type
||' '
||substr(identifier,3,7)
||upper(legal_name)
from businesses
where legal_type in ('SP','GP');

### Save data to file

In [None]:
import functools

# datestr = datetime.strftime(datetime.now(), '%Y%m%d')
icbc_data_filename = os.path.join(os.getcwd(), r'data/') +'from_openshift.txt'
   
with open(icbc_data_filename, 'w') as f:
    if not icbc_data.DataFrame().empty:
        df=icbc_data.DataFrame()
        # folling for looper change the string output from right alignment to left alignment.
        formatters = {}        
        for li in list(df.columns):
            max = df[li].str.len().max()
            form = "{{:<{}s}}".format(max)
            formatters[li] = functools.partial(str.format, form)

        dfAsString = df.to_string(formatters=formatters, header=False, index=False) 
        f.write(dfAsString)
