In [1]:
import os
import pandas as pd
import warnings
from komodo.snowflake import get_snowflake_connection
from komodo.client import Client

client = Client()

# Connect to Snowflake
warnings.filterwarnings("ignore", category=UserWarning)
pd.set_option("display.max_columns", None)
account_id = os.getenv("KOMODO_ACCOUNT_ID")
conn = get_snowflake_connection(account_id)
curs = conn.cursor()
curs.execute("USE ROLE CUSTOMER_ROLE")

KeyboardInterrupt: 

In [None]:
!pip install streamlit polars plotly 


Looking in indexes: https://pulp.onkomodo.com/pypi/internal/simple/


In [None]:
# Function to output the Snowflake table location for our PLAID datasets
def get_snowflake_location_from_dataset_id(dataset_id):
    try:
        # Retrieves a dataset. Requires permission platform_data_integration:dataset:read
        get_dataset_response = client.data_catalog.get_dataset(dataset_id)
    except Exception as e:
        print(f"Exception when calling DatasetsApi->get_dataset: {e}")
        get_dataset_response = None

    if get_dataset_response:
        try:
            # Access the manifestations list
            manifestations = getattr(get_dataset_response, "manifestations", [])

            # Extract table_name from the first manifestation object
            if manifestations and len(manifestations) > 0:
                snowflake_location = getattr(
                    manifestations[0], "fully_qualified_name", None
                )
                if not snowflake_location:
                    print(
                        "Snowflake location not found in the first manifestation object."
                    )
            else:
                print("No data found in the dataset response.")
        except AttributeError as attr_error:
            print(f"Error accessing table name: {attr_error}")
    else:
        print("Dataset response is empty or invalid.")

    return snowflake_location

In [None]:
# Plaid Dataset IDs for the Cancer Cohort (we can swap these out with a different cohort later if needed)
patient_mortality_id = "7fd3aa7a-3a46-4255-a985-f41abb34d454"
medical_events_id = "4aa719bf-ede6-471a-aef5-73c5c43f8aea"
pharmacy_events_id = "f96771d3-0300-48e0-a96a-0a97debc33f7"
patient_demographics_id = "fc518f94-c2ed-4038-ada9-3f0873577acf"
race_ethnicity_id = "5c89a860-c53f-4be5-827f-747a95391c98"
providers_id = "413ee509-d67a-45c1-bd1e-83c00839f62f"
insurance_id = "c2211b91-c4f7-41c1-a80c-62a4119a01f4"
patient_geographic_id = "a04b9d6c-abdc-4c0c-bb78-82d17db39672"
plans_id = "04fb0851-4b0a-4948-bbc3-c880c24b5ef0"
closed_patients = "a352fa1e-0fe1-4a87-b8eb-baaaed273bbc"

plaid_mx, plaid_rx, morty, demo, race, providers, insurance, geo, plans, patients = map(
    get_snowflake_location_from_dataset_id,
    (
        medical_events_id,
        pharmacy_events_id,
        patient_mortality_id,
        patient_demographics_id,
        race_ethnicity_id,
        providers_id,
        insurance_id,
        patient_geographic_id,
        plans_id,
        closed_patients,
    ),
)

In [None]:
sample_pct = 5

In [None]:
behemoth = f"""
SELECT
    mx.patient_id,
    mx.medical_event_id,
    mx.procedure_code,
    mx.service_date,
    mx.place_of_service,
    mx.rendering_npi,
    mx.diagnosis_codes,
    pr.first_name,
    pr.primary_specialty,
    pr.provider_state,
    pr.provider_zip,
    pl.payer_name,
    pl.kh_plan_id,
    pl.parent_name,
    d.patient_yob,
    d.patient_gender
FROM
    {plaid_mx} AS mx
JOIN
    {providers} AS pr ON mx.rendering_npi = pr.npi
JOIN
    {demo} AS d ON mx.patient_id = d.patient_id
JOIN
    {geo} AS g ON mx.patient_id = g.patient_id
JOIN
    {plans} AS pl ON mx.kh_plan_id = pl.kh_plan_id
"""

In [None]:
komodo_df = pd.read_sql(behemoth, conn)
komodo_df.to_csv("komodo_procedure_adoption.csv", index=False)