### Load Business AR Data - Cohort 7

This will gather data, create the nanoid and load data into the Business AR database

## Setup

In [1]:
%load_ext sql
%load_ext dotenv
%config SqlMagic.named_parameters="enabled" 

There's a new jupysql version available (0.10.13), you're running 0.10.12. To upgrade: pip install jupysql --upgrade
Deploy Streamlit apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup


In [2]:
import pandas as pd
import numpy as np
from dotenv import find_dotenv
from dotenv import load_dotenv
from nanoid import generate
load_dotenv(find_dotenv())

True

In [3]:
import string
nanoid_charset = string.ascii_letters + string.digits

In [4]:
import google.auth
credentials, project_id = google.auth.default()

In [5]:
import os
from google.cloud.sql.connector import Connector
import sqlalchemy

# initialize Connector object
connector = Connector()

# function to return the database connection object
def get_conn():
    conn = connector.connect(
        INSTANCE_CONNECTION_NAME,
        "pg8000",
        user=DB_USER,
        password=DB_PASSWORD,
        db=DB_NAME
    )
    return conn

def get_pool():
    # create connection pool with 'creator' argument to our connection object function
    pool = sqlalchemy.create_engine(
        "postgresql+pg8000://",
        creator=get_conn,
    )
    
    return pool

## Reset Environment Variables

In [6]:
%dotenv -o

## Create Business Connection

In [7]:
DB_USER = os.getenv("BUSINESS_USERNAME", "")
DB_PASSWORD = os.getenv("BUSINESS_PASSWORD", "")
DB_NAME = os.getenv("BUSINESS_NAME", "")
INSTANCE_CONNECTION_NAME = os.getenv("BUSINESS_CONNECTION", "")

business_pool = get_pool()
%sql business_pool --alias business
%sql business

In [8]:
%%sql
SELECT current_database(), :INSTANCE_CONNECTION_NAME as Connection, now();

current_database,connection,now
business-ar,a083gt-dev:northamerica-northeast1:businesses-db-dev,2024-09-16 22:23:36.841589+00:00


## List Database Connections

In [9]:
%sql --connections

current,url,alias
*,postgresql+pg8000://,business


## Query Business Database

In [None]:
%sql business

In [None]:
%%sql business_data <<
select * from business where id >= 104

In [None]:
business_data

## Create Warehouse Connection

In [16]:
DB_USER = os.getenv("WAREHOUSE_USERNAME", "")
DB_PASSWORD = os.getenv("WAREHOUSE_PASSWORD", "")
DB_NAME = os.getenv("WAREHOUSE_NAME", "")
INSTANCE_CONNECTION_NAME = os.getenv("WAREHOUSE_CONNECTION", "")

warehouse_pool = get_pool()
%sql warehouse_pool --alias warehouse
%sql warehouse

In [17]:
%%sql
SELECT current_database(), :INSTANCE_CONNECTION_NAME as Connection, now();

current_database,connection,now
fin_warehouse,mvnjri-prod:northamerica-northeast1:fin-warehouse-prod,2024-09-18 21:27:09.068706+00:00


## Query Warehouse Database

In [18]:
%sql warehouse

In [20]:
%%sql colin_data <<
SELECT co.corp_num
     , co.recognition_dts
     , EXTRACT(YEAR FROM co.last_ar_filed_dt) AS last_ar_filed_year
     , co.admin_email
     , cn.CORP_NME
     , ct.corp_class
FROM "colin"."corporation"   co
   , "colin".corp_type       ct
   , "colin".corp_state      cs
   , "colin".corp_name       cn
WHERE co.corp_typ_cd    = ct.corp_typ_cd
  AND co.corp_num       = cs.corp_num
  AND co.corp_num       = cn.corp_num
  AND cs.end_event_id   IS NULL
  AND cs.state_typ_cd   = 'ACT'                                                                                -- active
  AND ct.corp_class     = 'BC'                                                                                 -- BC Corporations
  AND co.corp_typ_cd   <> 'BEN'                                                                                -- no Benefit Companies
  AND co.admin_email IS NOT NULL                                                                               -- they have an email
  AND co.send_ar_ind = 'Y'                                                                                     -- AR reminder indicator is "Y"
  AND NOT EXISTS (SELECT 'x'
                  FROM "colin".filing f, "colin".event e, "colin".filing_user u
                  WHERE f.event_id = e.event_id
                    AND f.event_id = u.event_id                                                                -- no previous BCOL filings
                    AND e.corp_num = co.corp_num
                    AND u.role_typ_cd = 'bcol')
  AND NOT EXISTS (SELECT 'x'
                  FROM "colin".corporation
                  WHERE admin_email = co.admin_email
                    AND corp_num <> co.corp_num)                                                               -- no other business using the same email
  AND TO_CHAR(co.recognition_dts, 'MMDD') = TO_CHAR(current_date, 'MMDD')                                      -- AR reminder on the anniversary date
  AND (
       -- Exclude companies founded in the current year, include those from previous year
       (EXTRACT(YEAR FROM co.recognition_dts) = EXTRACT(YEAR FROM current_date) - 1 AND co.last_ar_filed_dt IS NULL)
       -- Or include if last_ar_filed_dt is not NULL and was filed in the previous year
       OR (co.last_ar_filed_dt IS NOT NULL AND EXTRACT(YEAR FROM co.last_ar_filed_dt) = EXTRACT(YEAR FROM current_date) - 1)
      );


/opt/conda/lib/python3.11/site-packages/sql/connection/connection.py:867: JupySQLRollbackPerformed: Found invalid transaction. JupySQL executed a ROLLBACK operation.


In [21]:
colin_df = colin_data.DataFrame()
colin_df

Unnamed: 0,corp_num,recognition_dts,last_ar_filed_year,admin_email,corp_nme,corp_class
0,1179777,2018-09-18 11:24:17,2023,melissaxchen@outlook.com,LUNAR RAIN JEWELLERY LTD.,BC
1,1179777,2018-09-18 11:24:17,2023,melissaxchen@outlook.com,LUNAR RAIN JEWELLERY LTD.,BC
2,1134394,2017-09-18 15:24:14,2023,spa.thaivancouver@gmail.com,HATTHA BY KT SALES AND SERVICES LIMITED,BC
3,1266211,2020-09-18 08:09:09,2023,heritagesignworks1@gmail.com,1266211 B.C. LTD.,BC
4,1014000,2014-09-18 15:51:18,2023,lovetoski78@gmail.com,ELKHORN ENTERPRISES LTD.,BC
...,...,...,...,...,...,...
165,0980805,2013-09-18 22:40:13,2023,immcanda2013@gmail.com,SOUTH OF FRASER CONSULTING LTD.,BC
166,0980805,2013-09-18 22:40:13,2023,immcanda2013@gmail.com,0980805 B.C. LTD.,BC
167,1089865,2016-09-18 13:31:29,2023,macs@lakesideenvironmental.com,RAYSUNSHINE SECURITY DARALUZ INC.,BC
168,1089865,2016-09-18 13:31:29,2023,macs@lakesideenvironmental.com,1089865 B.C. LTD.,BC


In [24]:
# Print original columns to verify names
print("Original columns:", colin_df.columns.tolist())

# Rename columns (adjusted based on actual column names)
colin_df.rename(columns={
    'corp_num': 'identifier',
    'corp_nme': 'legal_name',  # Adjusted to match actual column name
    'recognition_dts': 'founding_date',
    'admin_email': 'email',
    'last_ar_filed_year': 'last_ar_reminder_year',
    'corp_class': 'legal_type'  # Use 'corp_class' as 'legal_type'
}, inplace=True)

# Print columns after renaming to confirm
print("Columns after renaming:", colin_df.columns.tolist())

# Ensure 'last_ar_reminder_year' is of string type
colin_df['last_ar_reminder_year'] = colin_df['last_ar_reminder_year'].astype(str)

# Format 'founding_date' as 'YYYY-MM-DD'
colin_df['founding_date'] = pd.to_datetime(colin_df['founding_date']).dt.strftime('%Y-%m-%d')

# Select the required columns
colin_df = colin_df[['identifier', 'legal_name', 'legal_type', 'founding_date', 'last_ar_reminder_year', 'email']]

# Escape single quotes in strings to prevent SQL syntax errors
def escape_quotes(s):
    return s.replace("'", "''") if isinstance(s, str) else s

colin_df['legal_name'] = colin_df['legal_name'].apply(escape_quotes)
colin_df['email'] = colin_df['email'].apply(escape_quotes)
colin_df['legal_type'] = colin_df['legal_type'].apply(escape_quotes)

# Generate the VALUES part of the INSERT statement
values = ",\n".join([
    f"('{row['identifier']}', '{row['legal_name']}', '{row['legal_type']}', '{row['founding_date']}', '{row['last_ar_reminder_year']}', '{row['email']}')"
    for _, row in colin_df.iterrows()
])

# Step 9: Complete the INSERT statement
insert_statement = f"""INSERT INTO "public"."business" (identifier, legal_name, legal_type, founding_date, last_ar_reminder_year, email)
VALUES 
{values};"""

# Print the INSERT statement
print(insert_statement)

Original columns: ['identifier', 'founding_date', 'last_ar_reminder_year', 'email', 'corp_nme', 'legal_type']
Columns after renaming: ['identifier', 'founding_date', 'last_ar_reminder_year', 'email', 'legal_name', 'legal_type']
INSERT INTO "public"."business" (identifier, legal_name, legal_type, founding_date, last_ar_reminder_year, email)
VALUES 
('1179777', 'LUNAR RAIN JEWELLERY LTD.', 'BC', '2018-09-18', '2023', 'melissaxchen@outlook.com'),
('1179777', 'LUNAR RAIN JEWELLERY LTD.', 'BC', '2018-09-18', '2023', 'melissaxchen@outlook.com'),
('1134394', 'HATTHA BY KT SALES AND SERVICES LIMITED', 'BC', '2017-09-18', '2023', 'spa.thaivancouver@gmail.com'),
('1266211', '1266211 B.C. LTD.', 'BC', '2020-09-18', '2023', 'heritagesignworks1@gmail.com'),
('1014000', 'ELKHORN ENTERPRISES LTD.', 'BC', '2014-09-18', '2023', 'lovetoski78@gmail.com'),
('1134424', 'MEL YACHT SERVICES LTD.', 'BC', '2017-09-18', '2023', 'MELYACHTSERVICES@GMAIL.COM'),
('1439961', 'GREENLINK EXPLORATION CORP.', 'BC', '202

In [None]:
%sql business
colin_df.to_sql(name='business', con=business_pool, if_exists='append', index=False)

In [None]:
colin_df

## Load from CSV

In [None]:
colin_df = pd.read_csv('cohort_7.csv') 
colin_df

In [None]:
# colin_df["identifier"] = colin_df["Jurisdiction"] + colin_df['corp_num'].apply('{:0>7}'.format)
# colin_df['nano_id'] = colin_df.apply(lambda row: generate(nanoid_charset), axis = 1)
# colin_df['tax_id'] = None
# colin_df['id'] = np.arange(1, colin_df.shape[0] + 1) + 7
#to_bar_df = pd.DataFrame()
#to_bar_df[['id','legal_name','legal_type','identifier','tax_id','nano_id']] = colin_df[['id','Name','Jurisdiction','identifier','tax_id','nano_id']]
#to_bar_df


In [None]:
# colin_df['tax_id'] = None
# colin_df['ar_reminder_flag'] = None
# colin_df['state'] = None
# colin_df['op_state'] = None
# colin_df['corp_class'] = None
to_bar_df = colin_df

In [None]:
to_bar_df.to_sql(name='b2', con=business_pool, if_exists='append', index=False)

1

In [None]:
%%sql
INSERT INTO business (id, legal_name, legal_type, identifier, email, founding_date, last_ar_reminder_year, state)
SELECT 
    (SELECT COALESCE(MAX(id), 0) FROM business) + ROW_NUMBER() OVER (), 
    legal_name, 
    legal_type, 
    identifier, 
    email, 
    founding_date::date, 
    last_ar_reminder_year, 
    'ACT'
FROM b2;


In [None]:
to_inv_email_df = pd.DataFrame()
to_inv_email_df[['id','recipients']] = colin_df[['id','email']]
to_inv_email_df['business_id'] = to_inv_email_df['id']
to_inv_email_df['token'] = colin_df.apply(lambda row: generate(nanoid_charset), axis = 1)
to_inv_email_df['message'] = colin_df.apply(lambda row: 'AR Invitation', axis = 1)
to_inv_email_df['status'] = colin_df.apply(lambda row: 'SENT', axis = 1)
to_inv_email_df

In [None]:
%%sql business_data <<
select * from business where id = 9

## If using csv files then no need to run following two cells

In [None]:
business_df = business_data.DataFrame()

In [None]:
to_inv_email_df = pd.DataFrame()
to_inv_email_df[['id','recipients']] = business_df[['id','email']]
to_inv_email_df['business_id'] = to_inv_email_df['id']
to_inv_email_df['token'] = business_df.apply(lambda row: generate(nanoid_charset), axis = 1)
to_inv_email_df['message'] = business_df.apply(lambda row: 'AR Invitation', axis = 1)
to_inv_email_df['status'] = business_df.apply(lambda row: 'SENT', axis = 1)
to_inv_email_df

Writes into inv_ch6 table

In [None]:
to_inv_email_df.to_sql(name='inv_ch6', con=business_pool, if_exists='append', index=False)

1

Insert into invitations table from inv_ch6 table

In [None]:
%%sql
insert into invitations (id,recipients,message,sent_date,token,status,business_id)
(select nextval('invitations_id_seq'::regclass), recipients, message, now(), token, 'SENT', business_id from inv_ch6)

## Output to CSV

In [None]:
%%sql business_data <<
select b.identifier, b.legal_name, b.legal_type, i.token, i.recipients from business b, invitations i where b.id=i.business_id and b.id >= 104

In [None]:
business_data

In [None]:
bdf = business_data.DataFrame()
# bdf = bdf[['legal_name','legal_type','identifier','nano_id']]
base_url='https://annualreport.business.bcregistry.gov.bc.ca/en-CA'
bdf['url'] = f'{base_url}?nanoid=' + bdf['token']
bdf

In [None]:
import time
from datetime import datetime
from datetime import timezone

time_stamp = time.time()
now = datetime.utcfromtimestamp(time_stamp).replace(tzinfo=timezone.utc)
# local_now = now.astimezone(Pacific)
local_now = now.astimezone()
local_now.strftime("%Y.%m.%d.%H")

In [None]:
with open('business-ar.'+local_now.strftime("%Y.%m.%d.%H")+'.csv', 'a') as f:      
    f.write('\n\n Business Annual Report\n')
    bdf.to_csv(f, sep=',', encoding='utf-8', index=False)    

## Set Invitations

In [None]:
%%sql
INSERT INTO invitations (id, recipients, message, sent_date, token, status, additional_message, business_id)
SELECT 
    nextval('invitations_id_seq'::regclass), 
    'test@example.com', 
    'Annual Report Due', 
    now(), 
    b2.nano_id,
    'SENT', 
    NULL, 
    b.id
FROM 
    business b
JOIN 
    b2 ON b.id = b2.id;
