# AR filings data from LEAR

In [None]:
import os
import pandas as pd
import csv
import matplotlib
from datetime import datetime, timedelta
from IPython.core.display import HTML
from cloud_sql_connector import DBConfig, getconn
import pg8000
import sqlalchemy

%load_ext sql
%config SqlMagic.displaylimit = 5

## Database Connection Setup

This notebook now uses the custom bcgov cloud-sql-connector library to connect to the database in GCP. 

Required environment variables:
- `DB_USER`: Database username
- `DB_NAME`: Database name  
- `DB_INSTANCE_CONNECTION_NAME`: Cloud SQL instance connection name (format: `project:region:instance`)

The connection automatically uses the `business` schema and handles IAM authentication through the custom connector.

This will create the connection to the Cloud SQL database using the custom bcgov cloud-sql-connector and prep the jupyter magic for SQL

In [None]:
# Initialize DBConfig for Cloud SQL connection
config = DBConfig(
    instance_name=os.getenv('DB_INSTANCE_CONNECTION_NAME'),  # Cloud SQL instance connection name
    database=os.getenv('DB_NAME'),
    user=os.getenv('DB_USER'),
    ip_type="public",  # or "private" depending on your setup
    schema="business"  # Set the schema directly in config
)

def get_conn():
    """Create a connection to Google Cloud SQL using the custom cloud-sql-connector."""
    return getconn(config)

In [None]:
# Create SQLAlchemy engine using Cloud SQL connector
engine = sqlalchemy.create_engine(
    "postgresql+pg8000://",
    creator=get_conn,
)

print("Cloud SQL engine created successfully!")
print(f"Engine: {engine}")

# Test the connection
try:
    with engine.connect() as connection:
        result = connection.execute(sqlalchemy.text("SELECT 1 as test"))
        print("✅ Database connection test successful!")
        print(f"Test result: {result.fetchone()}")
except Exception as e:
    print(f"❌ Connection failed: {e}")

AR Filing Sync Failure Detection: Completed filings missing colin_event_ids (sync failures between LEAR and COLIN)

In [None]:
# AR Filings Data Query - Sync Failure Detection
query = """
SELECT b.identifier, f.filing_type, f.id, TO_CHAR(f.filing_date,'DD-MON-YYYY') AS filing_date
FROM business.filings f
LEFT JOIN business.colin_event_ids c ON f.id = c.filing_id
JOIN business.businesses b ON b.id = f.business_id
WHERE 
    c.filing_id IS NULL AND 
    f.filing_type NOT IN ('lear_epoch', 'adminFreeze', 'courtOrder', 'registrarsNotation', 'registrarsOrder') AND
    f.status = 'COMPLETED' AND
    b.legal_type NOT IN ('SP', 'GP', 'CP')
"""

try:
    ar_filings_data = pd.read_sql(query, engine)
    print(f"✅ AR filings data query successful! Found {len(ar_filings_data)} rows")
    if not ar_filings_data.empty:
        display(ar_filings_data.head())
except Exception as e:
    print(f"❌ AR filings data query failed: {e}")
    ar_filings_data = pd.DataFrame()  # Create empty DataFrame for error handling


In [None]:
corp_audit = ar_filings_data.copy()  # Use the DataFrame from the previous query

datestr = datetime.strftime(datetime.now()-timedelta(1), '%Y-%m-%d')
filename =  os.path.join(os.getcwd(), r'data/')+'ar_filings_data_' + datestr +'.csv'

with open(filename, 'w') as f:
    if corp_audit.empty:
        f.write('No Data Retrieved for Sync Failure Notification')         
    else:  
        f.write('Sync Failure Notification ' + ':\n')            
        corp_audit.to_csv(f, sep=',', encoding='utf-8', index=False) 
    f.write('\n\n') 