# Fix submitter_roles for migrated staff filings script


In [None]:
%pip install pandas
%pip install sqlalchemy>=2.0
%pip install dotenv

## Find Migrated group IDs

In [None]:
import os
import pandas as pd
from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError, OperationalError
from dotenv import load_dotenv

load_dotenv()

MIG_GROUP_IDS = [int(x.strip()) for x in os.getenv('MIG_GROUP_IDS').split(',') if x.strip().isdigit()]
if not MIG_GROUP_IDS:
    raise ValueError("MIG_GROUP_IDS is empty! Need at least one group id.")

mig_group_ids = ','.join(str(x) for x in MIG_GROUP_IDS)
print("Libraries imported and configuration loaded successfully.")

## Configure database connections

In [None]:
DATABASE_CONFIG = {
    'colin_extract': {
        'username': os.getenv("DATABASE_COLIN_EXTRACT_USERNAME"),
        'password': os.getenv("DATABASE_COLIN_EXTRACT_PASSWORD"),
        'host': os.getenv("DATABASE_COLIN_EXTRACT_HOST"),
        'port': os.getenv("DATABASE_COLIN_EXTRACT_PORT"),
        'name': os.getenv("DATABASE_COLIN_EXTRACT_NAME")
    },
    'lear': {
        'username': os.getenv("DATABASE_LEAR_USERNAME"),
        'password': os.getenv("DATABASE_LEAR_PASSWORD"),
        'host': os.getenv("DATABASE_LEAR_HOST"),
        'port': os.getenv("DATABASE_LEAR_PORT"),
        'name': os.getenv("DATABASE_LEAR_NAME")
    }
}

for db_key, db_config in DATABASE_CONFIG.items():
    uri = f"postgresql://{db_config['username']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['name']}"
    DATABASE_CONFIG[db_key] = {'uri': uri}

print("Database configurations successfully.")

In [None]:
engines = {}

for db_key, config in DATABASE_CONFIG.items():
    try:
        engine = create_engine(config['uri'])
        
        # Test connection
        with engine.connect() as conn:
            conn.execute(text("SELECT 1"))
        
        engines[db_key] = engine
        print(f"{db_key.upper()} database engine created and tested successfully.")

    except OperationalError as e:
        print(f"{db_key.upper()} database connection failed: {e}")
        raise
    except SQLAlchemyError as e:
        print(f"{db_key.upper()} database engine creation failed: {e}")
        raise
    except Exception as e:
        print(f"{db_key.upper()} unexpected error: {e}")
        raise

ENGINE_NAMES = {engine: key for key, engine in engines.items()}

print("All database engines ready for use.")

## Find out migrated filings and their event_id
- find out migrated corps
- find out their filings (except lear_tombstone & hide_in_ledger = false)
- Find out filing_id related event_id

In [None]:
# Get migrated corps identifiers
get_corps_query = f"""
    SELECT DISTINCT mcb.corp_num 
    FROM mig_corp_batch mcb
    JOIN mig_batch mb ON mcb.mig_batch_id = mb.id
    WHERE mb.mig_group_id IN ({mig_group_ids})
    ORDER BY mcb.corp_num
"""

corps_df = pd.read_sql(get_corps_query, engines['colin_extract'])
corp_count = len(corps_df['corp_num'])
identifiers_str = ','.join(f"'{corp_num}'" for corp_num in corps_df['corp_num'].astype(str))
identifiers_str = f"ARRAY[{identifiers_str}]"
print(f"found total {corp_count} corps")
print("-"*100)

# Get migrated filings -> event ids
get_filings_query = f"""
SELECT 
    b.identifier,
    f.id AS filing_id,
    cei.colin_event_id AS event_id
FROM businesses b
JOIN filings f ON b.id = f.business_id
JOIN colin_event_ids cei ON f.id = cei.filing_id
WHERE b.identifier = ANY({identifiers_str})
AND f.source = 'COLIN'
"""

event_id_df = pd.read_sql(get_filings_query, engines['lear'])
event_id_count = len(event_id_df['event_id'])
print(f"found total {event_id_count} migrated filings / event_id")
event_id_str = ','.join(f"{event_id}" for event_id in event_id_df['event_id'].astype(str))
event_id_str = f"ARRAY[{event_id_str}]"

display(event_id_df)

## Fill in submitter_roles if it's staff
- find out the submitter role for this event_id
- fill these roles, if it's staff to the entries in the filings table

In [None]:
# Find out staff filings with the given event_ids
event_id_submitter_roles_query = f"""
SELECT
    event_id,
    role_typ_cd
FROM filing_user
WHERE event_id = ANY({event_id_str})
AND role_typ_cd = 'staff'
"""

event_id_submitter_roles_df = pd.read_sql(event_id_submitter_roles_query, engines['colin_extract'])
print(f"Get total {len(event_id_submitter_roles_df)} staff submitters for the given event_ids")
display(event_id_submitter_roles_df)
print('-'*100)

# Merge back to filings df
filings_results_df = (event_id_df
                      .merge(event_id_submitter_roles_df,
                             on='event_id',
                             how='left').query('role_typ_cd.notna()')
                      )
filings_results_df = filings_results_df[['event_id', 'filing_id', 'role_typ_cd']]

if filings_results_df.empty:
    print("No staff filings found to update. Exiting.")
    exit(0)

print(f"Found {len(filings_results_df)} staff filings to update")
print('-'*100)

with pd.option_context('display.max_rows', None):
    display(filings_results_df)

### Populate found staff filing and role value to filings table

In [None]:
# Prepare values for bulk insert
values_list = []
for _, row in filings_results_df.iterrows():
    values_list.append(f"({row['filing_id']}, '{row['role_typ_cd']}')")

values_str = ','.join(values_list)

upsert_query = f"""
    INSERT INTO filings (id, submitter_roles)
    VALUES {values_str}
    ON CONFLICT (id)
    DO UPDATE SET submitter_roles = 'staff'
"""
try:
    with engines['lear'].begin() as conn:
        conn.execute(text(upsert_query))
        print("Updating filings table completed")

except Exception as e:
    print(f"Error when updating filings table: {e}")