## HCP Filtering
- Get Datasets from the results of Dataset Builder
- Filter Medical Events table to return only rows with |HE| as a Modifier
- Upload final dataset to MapView

### Import Libraries & Connect to Snowflake

In [36]:
### Import Libraries
import os
from komodo.client import Client
from komodo.definitions.models.cohorts.cohort_create import CohortCreate
from komodo.snowflake import get_snowflake_connection
import pandas as pd
from datetime import datetime
from komodo.dataset import upload_dataset_to_maplab
import pprint
from komodo.analytics import AnalyticDefinitionVersion, AnalyticDefinition, AnalyticDialect, InputType, create_analytic_definition, create_dataflow_from_analytic_definition_version, run_dataflow, check_dataflow_run_status

now = datetime.now()
os.chdir("/home/dragon/workspaces/current/workspace/src/cookbook/")
client = Client()

### Connect to Snowflake
print("--- Connecting to Snowflake ---")

account_id = os.getenv("KOMODO_ACCOUNT_ID")

conn = get_snowflake_connection(account_id)
curs = conn.cursor()
curs.execute("USE ROLE CUSTOMER_ROLE")
print("--- Success connecting to Snowflake ---")

--- Connecting to Snowflake ---


DEBUG:komodo_connector.connection_creators.snowflake.connect:REST API object was created: f82bd78b-1a87-4b6c-a121-dd977d140a9d.snowflakecomputing.com:443


--- Success connecting to Snowflake ---


### Set Variables

In [37]:
### Cohort ID

cohort_id = "fltr_def_NRKYCATAYYCAFIKS"  # This is the Entyvia Cohort ID which you can get from either the UI or the Komodo Extensions tab in a Workspace

### Dataset IDs from Dataset Builder or through Cookbook 3-retrieve-cohort-data

rx_2024 = "18a767b3-f04d-49d3-936a-1484f06ecf01"
rx_2025 = "79d88eff-5173-4e4a-b592-08a88eddf90c"
providers_2024 = "fda09832-d237-4966-bd72-ad33fed772bd"
providers_2025 = "5daa29e2-4e05-4fe3-a08e-04ce4b056919"


### Get SnowflakeLocation & Turn into Pandas Dataframe

In [38]:
def get_snowflake_location(dataset_id: str) -> str:
    get_dataset_response = client.data_catalog.get_dataset(dataset_id)
    return get_dataset_response.manifestations[0].fully_qualified_name

rx_2024_table = get_snowflake_location(rx_2024)
rx_2025_table = get_snowflake_location(rx_2025)
providers_2024_table = get_snowflake_location(providers_2024)
providers_2025_table = get_snowflake_location(providers_2025)


# print the fully qualified name of the dataset table
print(f"Providers Snowflake Location: {rx_2024_table}")
print(f"Pharmacy Events Snowflake Location: {rx_2025_table}")
print(f"Patient Geography Snowflake Location: {providers_2024_table}")
print(f"Patient Demographics Snowflake Location: {providers_2025_table}")


Providers Snowflake Location: RESULTS.DATAFLOWS.DFB_18A767B3_F04D_49D3_936A_1484F06ECF01
Pharmacy Events Snowflake Location: RESULTS.DATAFLOWS.DFB_79D88EFF_5173_4E4A_B592_08A88EDDF90C
Patient Geography Snowflake Location: RESULTS.DATAFLOWS.DFB_FDA09832_D237_4966_BD72_AD33FED772BD
Patient Demographics Snowflake Location: RESULTS.DATAFLOWS.DFB_5DAA29E2_4E05_4FE3_A08E_04CE4B056919


In [42]:
# turn SF table into pandas DF
providers_2025_data = pd.read_sql(f"select * from {providers_2025_table}", conn)

  providers_2025_data = pd.read_sql(f"select * from {providers_2025_table}", conn)


### Additional Analysis / Transformations

In [51]:
unique_query = f"""
SELECT PRESCRIBER_NPI
FROM {rx_2025_table}
EXCEPT
SELECT PRESCRIBER_NPI
FROM {rx_2024_table}
"""

uniques = pd.read_sql(unique_query, conn)
print("--- Success querying data ---")

unique_providers = uniques.merge(providers_2025_data, left_on='PRESCRIBER_NPI', right_on='NPI', how='inner')

print("--- Success joining unique NPIs with providers_2025 ---")
print(unique_providers.head(25))

  uniques = pd.read_sql(unique_query, conn)


--- Success querying data ---
--- Success joining unique NPIs with providers_2025 ---
   PRESCRIBER_NPI FIRST_NAME HCO_PRIMARY_NPI     LAST_NAME         NPI  \
0      1376809616       RUTH      1689919599          CHAN  1376809616   
1      1740467521       ANNA      1912070756       KOEPPEL  1740467521   
2      1740771013      DEREK      1174578249         MEANS  1740771013   
3      1699203083  SHIVANTHA            None      AMARNATH  1699203083   
4      1437618246     HIMESH      1891782470         ZAVER  1437618246   
5      1699260554      NAJLA      1700824455          KHAN  1699260554   
6      1427028554       MARK      1831540822       MAHONEY  1427028554   
7      1467911149    RACHAEL      1821442518         SOLES  1467911149   
8      1043756794  BILLIE JO      1649203431     DAGENHART  1043756794   
9      1659997161     LAVANA            None  GREENE-HIGGS  1659997161   
10     1487807111  ALEXANDER      1437292927          PHAM  1487807111   
11     1013235464       SU

### Final Preview

In [52]:
unique_providers

Unnamed: 0,PRESCRIBER_NPI,FIRST_NAME,HCO_PRIMARY_NPI,LAST_NAME,NPI,ORGANIZATION_NAME,PRIMARY_SPECIALTY,PROVIDER_ADDRESS,PROVIDER_CITY,PROVIDER_PHONE_NUMBER,PROVIDER_STATE,PROVIDER_TYPE,PROVIDER_ZIP,SECONDARY_SPECIALTY
0,1376809616,RUTH,1689919599,CHAN,1376809616,,Internal Medicine,3691 RIDGE MILL DR,HILLIARD,6142934837,OH,INDIVIDUAL,43026,Rheumatology
1,1740467521,ANNA,1912070756,KOEPPEL,1740467521,,Pediatrics,"3606 MACLAY BLVD, SUITE 102",TALLAHASSEE,8508771162,FL,INDIVIDUAL,32312,
2,1740771013,DEREK,1174578249,MEANS,1740771013,,Family Medicine,12455 E 100TH ST N STE 350,OWASSO,9182745510,OK,INDIVIDUAL,74055,
3,1699203083,SHIVANTHA,,AMARNATH,1699203083,,Student in an Organized Health Care Education/...,"475 SEAVIEW AVE, STATEN ISLAND UNIVERSITY HOSP...",STATEN ISLAND,7182266205,NY,INDIVIDUAL,10305,
4,1437618246,HIMESH,1891782470,ZAVER,1437618246,,Student in an Organized Health Care Education/...,"1215 LEE ST., BOX # 800708",CHARLOTTESVILLE,4349242959,VA,INDIVIDUAL,22908,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,1619283439,TEMITOPE,1639128069,OYERINDE,1619283439,,Registered Nurse,1000 LOCUST ST,RENO,7757867200,NV,INDIVIDUAL,89502,
208,1508160870,TANIA,1336265263,POSA,1508160870,,Family Medicine,9505 S STEELE ST,TACOMA,6094423064,WA,INDIVIDUAL,98444,
209,1265503221,SHAILAJA,1275840027,GUNNALA,1265503221,,Internal Medicine,"19636 N 27TH AVE, #207",PHOENIX,6235807240,AZ,INDIVIDUAL,85027,
210,1154785343,JEFFREY,1174582282,BANKS,1154785343,,Family Medicine,1111 S SAINT LOUIS AVE,TULSA,9186194400,OK,INDIVIDUAL,74120,


### Save Analysis Dataset(s) to MapLab

In [53]:
# set the name of the dataset to be uploaded to the Komodo platform
# add the current date and time to the end of the dataset name to make it more distinct
final_dataset_datetime = now.strftime("%Y%m%d_%H%M%S")
final_dataset_dataset_name = "ENTYVIA_UNIQUE_HCPS" + final_dataset_datetime

# call the upload_dataset_to_maplab function
dataset_upload_dataset = upload_dataset_to_maplab(unique_providers, final_dataset_dataset_name)

# save the ID of the dataset
dataset_id = dataset_upload_dataset.id

# print the dataset ID
dataset_id

# store the dataset ID as an environment variable that can be used in subsequent cookbook files
from dotenv import load_dotenv, set_key

set_key(".env", "dataset_id", dataset_id)

DEBUG:komodo_connector.connection_creators.snowflake.connect:REST API object was created: f82bd78b-1a87-4b6c-a121-dd977d140a9d.snowflakecomputing.com:443


(True, 'dataset_id', 'f2d58179-edc7-45a4-9a2c-3a495a115c7e')