### Load Business AR Data - Cohort 6

This will gather data, create the nanoid and load data into the Business AR database

## Setup

In [None]:
%load_ext sql
%load_ext dotenv
%config SqlMagic.named_parameters="enabled" 

Deploy Panel apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup


In [None]:
import pandas as pd
import numpy as np
from dotenv import find_dotenv
from dotenv import load_dotenv
from nanoid import generate
load_dotenv(find_dotenv())

True

In [None]:
import string
nanoid_charset = string.ascii_letters + string.digits

In [None]:
import google.auth
credentials, project_id = google.auth.default()

In [None]:
import os
from google.cloud.sql.connector import Connector
import sqlalchemy

# initialize Connector object
connector = Connector()

# function to return the database connection object
def get_conn():
    conn = connector.connect(
        INSTANCE_CONNECTION_NAME,
        "pg8000",
        user=DB_USER,
        password=DB_PASSWORD,
        db=DB_NAME
    )
    return conn

def get_pool():
    # create connection pool with 'creator' argument to our connection object function
    pool = sqlalchemy.create_engine(
        "postgresql+pg8000://",
        creator=get_conn,
    )
    
    return pool

## Reset Environment Variables

In [None]:
%dotenv -o

## Create Business Connection

In [None]:
DB_USER = os.getenv("BUSINESS_USERNAME", "")
DB_PASSWORD = os.getenv("BUSINESS_PASSWORD", "")
DB_NAME = os.getenv("BUSINESS_NAME", "")
INSTANCE_CONNECTION_NAME = os.getenv("BUSINESS_CONNECTION", "")

business_pool = get_pool()
%sql business_pool --alias business
%sql business

In [None]:
%%sql
SELECT current_database(), :INSTANCE_CONNECTION_NAME as Connection, now();

current_database,connection,now
business-ar,a083gt-test:northamerica-northeast1:businesses-db-test,2024-08-29 20:23:03.787894+00:00


## List Database Connections

In [None]:
%sql --connections

current,url,alias
*,postgresql+pg8000://,business


## Query Business Database

In [None]:
%sql business

In [None]:
%%sql business_data <<
select * from business where id >= 104

In [None]:
business_data

id,legal_name,legal_type,identifier,tax_id,email,founding_date,ar_reminder_flag,last_ar_reminder_year,state,op_state,corp_class
104,105377 B.C. LTD.,BC,1053777,,michael.kagis@gov.bc.ca,2022-12-16 00:00:00+00:00,True,,,,
105,BURGER FISHERIES LTD.,BC,1054220,,michael.kagis@gov.bc.ca,2023-01-04 00:00:00+00:00,True,,,,
106,LEBLANC INDUSTRIES,BC,1055486,,darci.denis@gov.bc.ca,2023-02-22 00:00:00+00:00,True,,,,
107,1056121 B.C. LTD.,BC,1056121,,darci.denis@gov.bc.ca,2023-03-20 00:00:00+00:00,False,,,,
108,1057322 B.C. LTD.,BC,1057322,,darci.denis@gov.bc.ca,2023-05-05 00:00:00+00:00,True,,,,
109,1059306 B.C. LTD.,BC,1059306,,michael.kagis@gov.bc.ca,2023-07-21 00:00:00+00:00,True,,,,


## Create Warehouse Connection

In [None]:
DB_USER = os.getenv("WAREHOUSE_USERNAME", "")
DB_PASSWORD = os.getenv("WAREHOUSE_PASSWORD", "")
DB_NAME = os.getenv("WAREHOUSE_NAME", "")
INSTANCE_CONNECTION_NAME = os.getenv("WAREHOUSE_CONNECTION", "")

warehouse_pool = get_pool()
%sql warehouse_pool --alias warehouse
%sql warehouse

UsageError: An error happened while creating the connection: Arg `instance_connection_string` must have format: PROJECT:REGION:INSTANCE, got ..

Perhaps you meant to use the 'postgresql' db 
To find more information regarding connection: https://jupysql.ploomber.io/en/latest/integrations/postgres-connect.html

To fix it:

Pass a valid connection string:
    Example: %sql postgresql://username:password@hostname/dbname

OR

Pass a connection key (one of: 'business')
    Example: %sql 'business'

For more details, see: https://jupysql.ploomber.io/en/latest/connecting.html
If you need help solving this issue, send us a message: https://ploomber.io/community


In [None]:
%%sql
SELECT current_database(), :INSTANCE_CONNECTION_NAME as Connection, now();

## Query Warehouse Database

In [None]:
%sql warehouse

RuntimeError: If using snippets, you may pass the --with argument explicitly.
For more details please refer: https://jupysql.ploomber.io/en/latest/compose.html#with-argument


Original error message from DB driver:
(pg8000.exceptions.DatabaseError) {'S': 'ERROR', 'V': 'ERROR', 'C': '42601', 'M': 'syntax error at or near "warehouse"', 'P': '1', 'F': 'scan.l', 'L': '1192', 'R': 'scanner_yyerror'}
[SQL: warehouse]
(Background on this error at: https://sqlalche.me/e/20/4xp6)

If you need help solving this issue, send us a message: https://ploomber.io/community


In [None]:
%%sql colin_data <<
select n.corp_nme as legal_name
    ,c.corp_typ_cd as legal_type
    ,CASE
         WHEN c.corp_typ_cd IN('BC')
         THEN c.corp_typ_cd || c.corp_num
         ELSE c.corp_num
    END AS identifier
    ,c.bn_15 as tax_id
from colin.corporation c, colin.corp_name n
where c.corp_num = n.corp_num
  and n.end_event_id is null
  and c.corp_num in ('1161685', '1249291', '1363120', '0787872', '0910741', '1357564', '0966461')
order by c.corp_num
limit 10;


In [None]:
colin_df = colin_data.DataFrame()
colin_df['nano_id'] = colin_df.apply(lambda row: generate(nanoid_charset), axis = 1)
colin_df['id'] = colin_df.index + 1
colin_df

In [None]:
%sql business
colin_df.to_sql(name='business', con=business_pool, if_exists='append', index=False)

In [None]:
colin_df

## Load from CSV

In [None]:
colin_df = pd.read_csv('cohort_7.csv') 
colin_df

Unnamed: 0,id,legal_name,legal_type,identifier,tax_id,email,founding_date,ar_reminder_flag,last_ar_reminder_year,state,op_state,corp_class
0,110,test6,BC,1059306,,michael.kagis@gov.bc.ca,2023-07-21 00:00:00+00:00,True,,,,


In [None]:
# colin_df["identifier"] = colin_df["Jurisdiction"] + colin_df['corp_num'].apply('{:0>7}'.format)
# colin_df['nano_id'] = colin_df.apply(lambda row: generate(nanoid_charset), axis = 1)
# colin_df['tax_id'] = None
# colin_df['id'] = np.arange(1, colin_df.shape[0] + 1) + 7
#to_bar_df = pd.DataFrame()
#to_bar_df[['id','legal_name','legal_type','identifier','tax_id','nano_id']] = colin_df[['id','Name','Jurisdiction','identifier','tax_id','nano_id']]
#to_bar_df


In [None]:
# colin_df['tax_id'] = None
# colin_df['ar_reminder_flag'] = None
# colin_df['state'] = None
# colin_df['op_state'] = None
# colin_df['corp_class'] = None
to_bar_df = colin_df

In [None]:
to_bar_df.to_sql(name='b2', con=business_pool, if_exists='append', index=False)

1

In [None]:
%%sql
INSERT INTO business (id, legal_name, legal_type, identifier, email, founding_date, last_ar_reminder_year, state)
SELECT 
    (SELECT COALESCE(MAX(id), 0) FROM business) + ROW_NUMBER() OVER (), 
    legal_name, 
    legal_type, 
    identifier, 
    email, 
    founding_date::date, 
    last_ar_reminder_year, 
    'ACT'
FROM b2;


In [None]:
to_inv_email_df = pd.DataFrame()
to_inv_email_df[['id','recipients']] = colin_df[['id','email']]
to_inv_email_df['business_id'] = to_inv_email_df['id']
to_inv_email_df['token'] = colin_df.apply(lambda row: generate(nanoid_charset), axis = 1)
to_inv_email_df['message'] = colin_df.apply(lambda row: 'AR Invitation', axis = 1)
to_inv_email_df['status'] = colin_df.apply(lambda row: 'SENT', axis = 1)
to_inv_email_df

Unnamed: 0,id,recipients,business_id,token,message,status
0,110,michael.kagis@gov.bc.ca,110,T8kMWGA35PPySGuDEDf3W,AR Invitation,SENT


In [None]:
%%sql business_data <<
select * from business where id = 9

## If using csv files then no need to run following two cells

In [None]:
business_df = business_data.DataFrame()

In [None]:
to_inv_email_df = pd.DataFrame()
to_inv_email_df[['id','recipients']] = business_df[['id','email']]
to_inv_email_df['business_id'] = to_inv_email_df['id']
to_inv_email_df['token'] = business_df.apply(lambda row: generate(nanoid_charset), axis = 1)
to_inv_email_df['message'] = business_df.apply(lambda row: 'AR Invitation', axis = 1)
to_inv_email_df['status'] = business_df.apply(lambda row: 'SENT', axis = 1)
to_inv_email_df

Unnamed: 0,id,recipients,business_id,token,message,status
0,25,Omid.X.Zamani@gov.bc.ca,25,ai4kJp39D6c5B7X3H4Z71,AR Invitation,SENT


Writes into inv_ch6 table

In [None]:
to_inv_email_df.to_sql(name='inv_ch6', con=business_pool, if_exists='append', index=False)

1

Insert into invitations table from inv_ch6 table

In [None]:
%%sql
insert into invitations (id,recipients,message,sent_date,token,status,business_id)
(select nextval('invitations_id_seq'::regclass), recipients, message, now(), token, 'SENT', business_id from inv_ch6)

## Output to CSV

In [None]:
%%sql business_data <<
select b.identifier, b.legal_name, b.legal_type, i.token, i.recipients from business b, invitations i where b.id=i.business_id and b.id >= 104

In [None]:
business_data

identifier,legal_name,legal_type,token,recipients
1053777,105377 B.C. LTD.,BC,Z6kF3DpM7w1wCHEabflch,test@example.com
1054220,BURGER FISHERIES LTD.,BC,IRv8EmjSIs64ME2HJWYPQ,test@example.com
1055486,LEBLANC INDUSTRIES,BC,NxvtyncFqpVt8Sob5EHmB,test@example.com
1056121,1056121 B.C. LTD.,BC,1Se32XORA6zLBIrhRwf65,test@example.com
1057322,1057322 B.C. LTD.,BC,U1LhAgjJ1AxtVaR7xBn75,test@example.com
1059306,1059306 B.C. LTD.,BC,HeYdLOTQBioPMWz855R0q,test@example.com


In [None]:
bdf = business_data.DataFrame()
# bdf = bdf[['legal_name','legal_type','identifier','nano_id']]
base_url='https://annualreport.business.bcregistry.gov.bc.ca/en-CA'
bdf['url'] = f'{base_url}?nanoid=' + bdf['token']
bdf

Unnamed: 0,identifier,legal_name,legal_type,token,recipients,url
0,733495,INGENOUS CONSULTING INC.,BC,A7xM7NzYu8bYNTmkWkMEJ,test@example.com,https://annualreport.business.bcregistry.gov.b...
1,733495,INGENOUS CONSULTING INC.,BC,A7xM7NzYu8bYNTmkWkMEJ,test@example.com,https://annualreport.business.bcregistry.gov.b...
2,733495,INGENOUS CONSULTING INC.,BC,A7xM7NzYu8bYNTmkWkMEJ,test@example.com,https://annualreport.business.bcregistry.gov.b...


In [None]:
import time
from datetime import datetime
from datetime import timezone

time_stamp = time.time()
now = datetime.utcfromtimestamp(time_stamp).replace(tzinfo=timezone.utc)
# local_now = now.astimezone(Pacific)
local_now = now.astimezone()
local_now.strftime("%Y.%m.%d.%H")

'2024.08.28.18'

In [None]:
with open('business-ar.'+local_now.strftime("%Y.%m.%d.%H")+'.csv', 'a') as f:      
    f.write('\n\n Business Annual Report\n')
    bdf.to_csv(f, sep=',', encoding='utf-8', index=False)    

## Set Invitations

In [None]:
%%sql
INSERT INTO invitations (id, recipients, message, sent_date, token, status, additional_message, business_id)
SELECT 
    nextval('invitations_id_seq'::regclass), 
    'test@example.com', 
    'Annual Report Due', 
    now(), 
    b2.nano_id,
    'SENT', 
    NULL, 
    b.id
FROM 
    business b
JOIN 
    b2 ON b.id = b2.id;
