# WorkSafe and Motor Vehicle audit report

In [None]:
import os
import pandas as pd
import csv
import matplotlib
from datetime import datetime, timedelta
from IPython.core.display import HTML
from cloud_sql_connector import DBConfig, getconn
import pg8000
import sqlalchemy

%load_ext sql
%config SqlMagic.displaylimit = 5

In [None]:
# Initialize DBConfig for Cloud SQL connection
config = DBConfig(
    instance_name=os.getenv('DB_INSTANCE_CONNECTION_NAME'),  # Cloud SQL instance connection name
    database=os.getenv('DB_NAME'),
    user=os.getenv('DB_USER'),
    ip_type="public",  # or "private" depending on your setup
    schema="business"  # Set the schema directly in config
)

def get_conn():
    """Create a connection to Google Cloud SQL using the custom cloud-sql-connector."""
    return getconn(config)

## Database Connection Setup

This notebook now uses the custom bcgov cloud-sql-connector library to connect to the database in GCP. 

Required environment variables:
- `DB_USER`: Database username
- `DB_NAME`: Database name  
- `DB_INSTANCE_CONNECTION_NAME`: Cloud SQL instance connection name (format: `project:region:instance`)

The connection automatically uses the `business` schema and handles IAM authentication through the custom connector.

This will create the connection to the Cloud SQL database using the custom bcgov cloud-sql-connector and prep the jupyter magic for SQL

In [None]:
# Create SQLAlchemy engine using custom cloud-sql-connector
engine = sqlalchemy.create_engine(
    "postgresql+pg8000://",
    creator=get_conn,
)

print("Custom cloud-sql-connector engine created successfully!")
print(f"Engine: {engine}")

# Test the connection
try:
    with engine.connect() as connection:
        result = connection.execute(sqlalchemy.text("SELECT 1 as test"))
        print("✅ Database connection test successful!")
        print(f"Test result: {result.fetchone()}")
except Exception as e:
    print(f"❌ Connection failed: {e}")

WorkSafe: Audit of businesses for malicious activity

In [None]:
# WorkSafe: Audit of businesses for malicious activity
query = """
SELECT b.identifier, f.filing_type, f.transaction_id, 
       f.filing_date AT TIME ZONE 'America/Vancouver' as filing_date
FROM business.filings f, business.businesses b
WHERE f.business_id = b.id
  AND b.identifier IN ('BC1381431', 'S0044626', 'S0034315','BC0786328','BC0455835','FM0329275','FM0326476','FM0165333','FM0232910','FM0556598','BC0443045',
                       'BC0812666','BC0613715', 'S0037540','BC0047143','BC0937794','BC1182816','FM0753730','BC1118811','BC1196208','BC0842217','BC0559626',
                       'BC1162353','BC0961510','BC0282129','BC1307937', 'A0120144','BC0527597','BC0842217','BC1155160')
  AND TO_CHAR(filing_date AT TIME ZONE 'America/Vancouver','yyyymmdd') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '1 day','yyyymmdd')
"""

try:
    worksafe_corp_audit = pd.read_sql(query, engine)
    print(f"✅ WorkSafe corp audit query successful! Found {len(worksafe_corp_audit)} rows")
    if not worksafe_corp_audit.empty:
        display(worksafe_corp_audit.head())
except Exception as e:
    print(f"❌ WorkSafe corp audit query failed: {e}")
    worksafe_corp_audit = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
corp_audit = worksafe_corp_audit.copy()  # Use the DataFrame from the previous query

datestr = datetime.strftime(datetime.now()-timedelta(1), '%Y-%m-%d')
filename =  os.path.join(os.getcwd(), r'data/')+'worksafe_audit_' + datestr +'.csv'

with open(filename, 'w') as f:
    if corp_audit.empty:
        f.write('No Data Retrieved for WorkSafe businesses of interest ' + datestr)         
    else:  
        f.write('WorkSafe: Audit of businesses for malicious activity ' + datestr + ':\n')            
        corp_audit.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n') 

WorkSafe: Audit of people for malicious activity
Names to watch out for: Andrew GALLO, Dean GALLO, Brandie BLOOR, Anthony HOLLER, Stephanie MCMILLAN, Norman LABONTE, Amber THOMPSON, Lindsay SWANSON, Marie TRONSON, Susan MYERS

In [None]:
# WorkSafe: Audit of people for malicious activity (Parties table)
query = """
SELECT b.identifier, f.filing_type, f.transaction_id, 
       f.filing_date AT TIME ZONE 'America/Vancouver' as filing_date, 
       p.first_name, p.last_name
FROM business.parties_version p, business.filings f, business.businesses b
WHERE (
       (UPPER(first_name) = 'BRANDIE' AND UPPER(last_name) = 'BLOOR')
    OR (UPPER(first_name) = 'NORMAN' AND UPPER(last_name) = 'LABONTE')
    OR (UPPER(first_name) = 'NORM' AND UPPER(last_name) = 'LABONTE')
    OR (UPPER(first_name) = 'DEAN' AND UPPER(last_name) = 'GALLO')
    OR (UPPER(first_name) = 'MARK' AND UPPER(last_name) = 'FORREST')
)
AND p.transaction_id = f.transaction_id
AND f.business_id = b.id
AND TO_CHAR(filing_date AT TIME ZONE 'America/Vancouver','yyyymmdd') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '1 day','yyyymmdd')
"""

try:
    worksafe_name_audit = pd.read_sql(query, engine)
    print(f"✅ WorkSafe name audit query successful! Found {len(worksafe_name_audit)} rows")
    if not worksafe_name_audit.empty:
        display(worksafe_name_audit.head())
except Exception as e:
    print(f"❌ WorkSafe name audit query failed: {e}")
    worksafe_name_audit = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
name_audit = worksafe_name_audit.copy()  # Use the DataFrame from the previous query

with open(filename, 'a') as f:  
    if name_audit.empty:
        f.write('No Data Retrieved for WorkSafe names of interest from Parties table ' + datestr)         
    else: 
        f.write('WorkSafe: Audit of people for malicious activity from Parties table ' + datestr + ':\n')  
        name_audit.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n')  

In [None]:
# WorkSafe: Audit of people for malicious activity (Users table)
query = """
SELECT username, firstname, lastname, transaction_id, 
       creation_date AT TIME ZONE 'America/Vancouver' as creation_date
FROM business.users_version u
WHERE (
       (UPPER(firstname) = 'BRANDIE' AND UPPER(lastname) = 'BLOOR')
    OR (UPPER(firstname) = 'NORMAN' AND UPPER(lastname) = 'LABONTE')
    OR (UPPER(firstname) = 'NORM' AND UPPER(lastname) = 'LABONTE')
    OR (UPPER(firstname) = 'DEAN' AND UPPER(lastname) = 'GALLO')
    OR (UPPER(firstname) = 'MARK' AND UPPER(lastname) = 'FORREST')
)
AND TO_CHAR(creation_date AT TIME ZONE 'America/Vancouver','yyyymmdd') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '1 day','yyyymmdd')
"""

try:
    worksafe_name_audit2 = pd.read_sql(query, engine)
    print(f"✅ WorkSafe name audit (users) query successful! Found {len(worksafe_name_audit2)} rows")
    if not worksafe_name_audit2.empty:
        display(worksafe_name_audit2.head())
except Exception as e:
    print(f"❌ WorkSafe name audit (users) query failed: {e}")
    worksafe_name_audit2 = pd.DataFrame()  # Create empty DataFrame for error handling



In [None]:
name_audit2 = worksafe_name_audit2.copy()  # Use the DataFrame from the previous query

with open(filename, 'a') as f:  
    if name_audit2.empty:
        f.write('No Data Retrieved for WorkSafe names of interest from Users table ' + datestr)         
    else: 
        f.write('WorkSafe: Audit of people for malicious activity from Users table ' + datestr + ':\n')  
        name_audit2.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n')  

'Motor Vehicle: Audit of businesses for malicious activity'

In [None]:
# Motor Vehicle: Audit of businesses for malicious activity
query = """
SELECT b.identifier, f.filing_type, f.transaction_id, 
       f.filing_date AT TIME ZONE 'America/Vancouver' as filing_date
FROM business.filings f, business.businesses b
WHERE f.business_id = b.id
  AND b.identifier = 'BC1367734'
  AND TO_CHAR(filing_date AT TIME ZONE 'America/Vancouver','yyyymmdd') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '1 day','yyyymmdd')
"""

try:
    motor_vehicle_audit = pd.read_sql(query, engine)
    print(f"✅ Motor Vehicle audit query successful! Found {len(motor_vehicle_audit)} rows")
    if not motor_vehicle_audit.empty:
        display(motor_vehicle_audit.head())
except Exception as e:
    print(f"❌ Motor Vehicle audit query failed: {e}")
    motor_vehicle_audit = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
motor_vehicle_audit_result = motor_vehicle_audit.copy()  # Use the DataFrame from the previous query

with open(filename, 'a') as f:  
    if motor_vehicle_audit_result.empty:
        f.write('No Data Retrieved for Motor Vehicle businesses of interest ' + datestr)         
    else: 
        f.write('Motor Vehicle: Audit of businesses for malicious activity ' + datestr + ':\n')  
        motor_vehicle_audit_result.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n')  

'Motor Vehicle: Audit of people for malicious activity' 

'Names to watch out for: Mike TEPERTO, Caitlyn THOMSON'


In [None]:
# Motor Vehicle: Audit of people for malicious activity (Parties table)
query = """
SELECT b.identifier, f.filing_type, f.transaction_id, 
       f.filing_date AT TIME ZONE 'America/Vancouver' as filing_date, 
       p.first_name, p.last_name
FROM business.parties_version p, business.filings f, business.businesses b
WHERE (
       (UPPER(last_name) = 'TEPERTO' AND UPPER(first_name) LIKE 'MI%')
    OR (UPPER(last_name) = 'THOMSON' AND UPPER(first_name) LIKE 'CA%')
)
AND p.transaction_id = f.transaction_id
AND f.business_id = b.id
AND TO_CHAR(filing_date AT TIME ZONE 'America/Vancouver','yyyymmdd') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '1 day','yyyymmdd')
ORDER BY filing_date DESC
"""

try:
    motor_vehicle_name_audit1 = pd.read_sql(query, engine)
    print(f"✅ Motor Vehicle name audit (parties) query successful! Found {len(motor_vehicle_name_audit1)} rows")
    if not motor_vehicle_name_audit1.empty:
        display(motor_vehicle_name_audit1.head())
except Exception as e:
    print(f"❌ Motor Vehicle name audit (parties) query failed: {e}")
    motor_vehicle_name_audit1 = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
motor_vehicle_name_audit1_result = motor_vehicle_name_audit1.copy()  # Use the DataFrame from the previous query

with open(filename, 'a') as f:  
    if motor_vehicle_name_audit1_result.empty:
        f.write('No Data Retrieved for Motor Vehicle names of interest from Parties table ' + datestr)         
    else: 
        f.write('Motor Vehicle: Audit of people for malicious activity from Parties table ' + datestr + ':\n')  
        motor_vehicle_name_audit1_result.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n')  

In [None]:
# Motor Vehicle: Audit of people for malicious activity (Users table)
query = """
SELECT username, firstname, lastname, transaction_id, 
       creation_date AT TIME ZONE 'America/Vancouver' as creation_date
FROM business.users_version u
WHERE (
       (UPPER(lastname) = 'TEPERTO' AND UPPER(firstname) LIKE 'MI%')
    OR (UPPER(lastname) = 'THOMSON' AND UPPER(firstname) LIKE 'CA%')
)
AND TO_CHAR(creation_date AT TIME ZONE 'America/Vancouver','yyyymmdd') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '1 day','yyyymmdd')
"""

try:
    motor_vehicle_name_audit2 = pd.read_sql(query, engine)
    print(f"✅ Motor Vehicle name audit (users) query successful! Found {len(motor_vehicle_name_audit2)} rows")
    if not motor_vehicle_name_audit2.empty:
        display(motor_vehicle_name_audit2.head())
except Exception as e:
    print(f"❌ Motor Vehicle name audit (users) query failed: {e}")
    motor_vehicle_name_audit2 = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
motor_vehicle_name_audit2_result = motor_vehicle_name_audit2.copy()  # Use the DataFrame from the previous query

with open(filename, 'a') as f:  
    if motor_vehicle_name_audit2_result.empty:
        f.write('No Data Retrieved for Motor Vehicle names of interest from Users table ' + datestr)         
    else: 
        f.write('Motor Vehicle: Audit of people for malicious activity from Users table ' + datestr + ':\n')  
        motor_vehicle_name_audit2_result.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n')  

'Richmond RCMP: Audit of businesses for malicious activity (file 2023-1790)

In [None]:
# Richmond RCMP: Audit of businesses for malicious activity (file 2023-1790)
query = """
SELECT b.identifier, f.filing_type, f.transaction_id, 
       f.filing_date AT TIME ZONE 'America/Vancouver' as filing_date
FROM business.filings f, business.businesses b
WHERE f.business_id = b.id
  AND b.identifier = 'BC1374478'
  AND TO_CHAR(filing_date AT TIME ZONE 'America/Vancouver','yyyymmdd') = TO_CHAR(now() AT TIME ZONE 'America/Vancouver' - INTERVAL '1 day','yyyymmdd')
"""

try:
    rcmp_audit = pd.read_sql(query, engine)
    print(f"✅ RCMP audit query successful! Found {len(rcmp_audit)} rows")
    if not rcmp_audit.empty:
        display(rcmp_audit.head())
except Exception as e:
    print(f"❌ RCMP audit query failed: {e}")
    rcmp_audit = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
rcmp_audit_result = rcmp_audit.copy()  # Use the DataFrame from the previous query

with open(filename, 'a') as f:  
    if rcmp_audit_result.empty:
        f.write('No Data Retrieved for RCMP businesses of interest ' + datestr)         
    else: 
        f.write('Richmond RCMP: Audit of businesses for malicious activity (file 2023-1790) ' + datestr + ':\n')  
        rcmp_audit_result.to_csv(f, sep=',', encoding='utf-8', index=False) 
