In [1]:
import os
import pandas as pd

def load_parquet_files(directory):
    """
    Loads all Parquet files from a specified directory into a dictionary of DataFrames.
    """
    dataframes = {}
    if not os.path.exists(directory):
        print(f"Directory not found: {directory}")
        return dataframes

    for filename in os.listdir(directory):
        if filename.endswith('.parquet'):
            file_path = os.path.join(directory, filename)
            try:
                df = pd.read_parquet(file_path)
                dataframes[os.path.splitext(filename)[0]] = df
                print(f"Loaded {filename}")
            except Exception as e:
                print(f"Failed to load {filename}: {e}")
    
    return dataframes

# Standardized data prior to running analysis
stndz_dir = os.path.join(os.getcwd(), '..', 'data', 'processed')

# Load Parquet files into dataframes dictionary
dataframes = load_parquet_files(stndz_dir)


Loaded demographics.parquet
Loaded facilities.parquet
Loaded claims.parquet


In [3]:
# Merge claims, facilities, and demographics
claims = dataframes['claims']
demographics = dataframes['demographics']
facilities = dataframes['facilities']

joined_df = pd.merge(claims, demographics, left_on='clinician_npi', right_on='npi')
joined_df = pd.merge(joined_df, facilities, left_on='facility_npi', right_on='npi') 

specialties = ['INTERNAL MEDICINE', 'UROLOGY', 'ANESTHESIOLOGY']
filtered_df = joined_df[joined_df['specialty'].isin(specialties)]

# Group by specialty and get unique organization names
grouped = filtered_df.groupby('specialty')['organization_name'].unique()

for specialty, organizations in grouped.items():
    print(f"Specialty: {specialty}")
    for organization in organizations:
        print(f" - {organization}")
    print()


Specialty: ANESTHESIOLOGY
 - LUTHERAN HOSPITAL
 - FAIRVIEW HOSPITAL
 - ST VINCENT CHARITY MEDICAL CENTER
 - THE CLEVELAND CLINIC FOUNDATION
 - UNIVERSITY HOSPITALS CLEVELAND MEDICAL CENTER
 - THE METROHEALTH SYSTEM
 - CLEVELAND CLINIC CHILDREN'S HOSPITAL FOR REHABILITATION

Specialty: INTERNAL MEDICINE
 - LUTHERAN HOSPITAL
 - FAIRVIEW HOSPITAL
 - ST VINCENT CHARITY MEDICAL CENTER
 - THE CLEVELAND CLINIC FOUNDATION
 - UNIVERSITY HOSPITALS CLEVELAND MEDICAL CENTER
 - THE METROHEALTH SYSTEM
 - CLEVELAND CLINIC CHILDREN'S HOSPITAL FOR REHABILITATION
 - SELECT SPECIALTY HOSPITAL - CLEVELAND, LLC

Specialty: UROLOGY
 - LUTHERAN HOSPITAL
 - FAIRVIEW HOSPITAL
 - THE CLEVELAND CLINIC FOUNDATION
 - UNIVERSITY HOSPITALS CLEVELAND MEDICAL CENTER
 - THE METROHEALTH SYSTEM

