# Filings Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import pandas as pd
import gzip
from datetime import datetime, timedelta
from cloud_sql_connector import DBConfig, getconn
import pg8000
import sqlalchemy

In [None]:
from dotenv import find_dotenv
from dotenv import load_dotenv
load_dotenv(find_dotenv())

### DB Connection 

In [None]:
config = DBConfig(
    instance_name=os.getenv('LEAR_DB_CONNECTION_NAME'),
    database=os.getenv('DATABASE_NAME'),
    user=os.getenv('DATABASE_USER'),
    ip_type="public",
    schema="business"
    )

def get_conn():
    """Create a connection to Google Cloud SQL using the custom cloud-sql-connector."""
    return getconn(config)

In [None]:
print('LEAR_DB_CONNECTION_NAME:', os.getenv('LEAR_DB_CONNECTION_NAME'))
print('DATABASE_NAME:', os.getenv('DATABASE_NAME'))
print('DATABASE_USER:', os.getenv('DATABASE_USER'))
print('GOOGLE_APPLICATION_CREDENTIALS:', os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))
print('DBConfig:', config)

engine = sqlalchemy.create_engine(
    "postgresql+pg8000://",
    creator=get_conn,
    )

print("Cloud SQL engine created successfully!")
print(f"Engine: {engine}")

# Test the connection
try:
    with engine.connect() as connection:
        result = connection.execute(sqlalchemy.text("SELECT 1 as test"))
        print("✅ Database connection test successful!")
        print(f"Test result: {result.fetchone()}")
except Exception as e:
    print(f"❌ Connection failed: {e}")

### Collect data

In [None]:
bcfn_mr_query = '''
select               -- CURRENT NAME AND ACTIVE
' FM'
||substr(identifier,3,7)
||to_char(founding_date at time zone 'America/Vancouver','yyyymmdd')
||' '
||rpad(legal_type,3)
||' 1'
||'00000000'
||rpad(' ',42)
||rpad(legal_name,454)
from businesses
where legal_type in ('SP','GP')
and state='ACTIVE'
UNION ALL
select               -- CURRENT NAME AND HISTORICAL
' FM'
||substr(identifier,3,7)
||to_char(founding_date at time zone 'America/Vancouver','yyyymmdd')
||' '
||rpad(legal_type,3)
||' 2'
||CASE WHEN dissolution_date at time zone 'America/Vancouver' is NULL THEN rpad('', 8)
       WHEN dissolution_date at time zone 'America/Vancouver' is NOT NULL THEN to_char(dissolution_date at time zone 'America/Vancouver','yyyymmdd')
  END 
||rpad(' ',42)
||rpad(legal_name,454)
from businesses
where legal_type in ('SP','GP')
and state='HISTORICAL'
UNION ALL
select               -- OLD NAME IN THE LAST 2 YEARS
distinct 
' CH'
||substr(b.identifier,3,7)
||'00000000'
||' '
||'CH '
||' 2'
||'00000000'
||rpad(' ',42)
||rpad(bv.legal_name,454)
from businesses         b
    ,businesses_version bv
    ,filings            f
where b.identifier=bv.identifier
and b.legal_name != bv.legal_name
and b.legal_type in ('SP','GP')
and f.transaction_id=bv.end_transaction_id
and f.effective_date at time zone 'America/Vancouver' > current_date at time zone 'America/Vancouver' - interval '2 years';
'''

bcfn_mr_df = pd.read_sql(bcfn_mr_query, engine)

### Save data to file

In [None]:
datestr = datetime.strftime(datetime.now(), '%Y%m%d')
BCFN_MR_filename = os.path.join(os.getcwd(), 'data', 'BCFN_MR_' + datestr)

# Ensure the directory exists
os.makedirs(os.path.dirname(BCFN_MR_filename), exist_ok=True)

with open(BCFN_MR_filename, 'w') as f:
    if not bcfn_mr_df.empty:
        dfAsString = bcfn_mr_df.to_string(header=False, index=False)
        f.write(dfAsString)

In [None]:
# open file in read mode and rename it with a line number at the end of the filename.
with open(BCFN_MR_filename, 'r') as f:
    new_BCFN_MR_filename = BCFN_MR_filename + '_' + str(len(f.readlines()))
    os.rename(BCFN_MR_filename, new_BCFN_MR_filename)

In [None]:
with open(new_BCFN_MR_filename, 'rb') as f_in, gzip.open(new_BCFN_MR_filename+'.gz', 'wb') as f_out:
    f_out.writelines(f_in) 
os.remove(new_BCFN_MR_filename)