In [46]:
import pandas as pd
import numpy as np

In [47]:
# Load data

df = pd.read_csv('data\minnesota_hospitals.csv')

print("=" * 80)
print("MINNESOTA HOSPITAL READMISSION ANALYSIS")
print("=" * 80)
print(f"\nAnalyzing {len(df)} hospital records from Minnesota\n")

MINNESOTA HOSPITAL READMISSION ANALYSIS

Analyzing 276 hospital records from Minnesota



In [48]:
# Understanding the data structure

# Counting how many UNIQUE measures are found and how many of each
print("AVAILABLE READMISSION MEASURES\n")
measures = df['Measure Name'].unique()
for i, measure in enumerate(measures, 1):
    count = len(df[df['Measure Name'] == measure])
    print(f"{i}. {measure} ({count} hospitals)")

AVAILABLE READMISSION MEASURES

1. READM-30-PN-HRRP (46 hospitals)
2. READM-30-HIP-KNEE-HRRP (46 hospitals)
3. READM-30-HF-HRRP (46 hospitals)
4. READM-30-COPD-HRRP (46 hospitals)
5. READM-30-CABG-HRRP (46 hospitals)
6. READM-30-AMI-HRRP (46 hospitals)


In [49]:
# Cleaning data

print("\n" + "=" * 80)
print("DATA CLEANING")
print("=" * 80)

# Using excess readmission ratio used by Medicare
print("\nPerformance Categories:\n")
worse_than_average = (df['Excess Readmission Ratio'] > 1).sum()
average = (df['Excess Readmission Ratio'] == 1).sum()
better_than_average = (df['Excess Readmission Ratio'] < 1).sum()
no_average = (df['Excess Readmission Ratio']).isnull().sum()
print(f"Worse Than Average (greater than 1.0): {worse_than_average}")
print(f"Average (approx. 1.0): {average}")
print(f"Better Than Average (less than 1.0): {better_than_average}")
print(f"No Average Given: {no_average}")

# Removing those without an ERR as this is crucial to analyzing the data
df_clean = df[df['Excess Readmission Ratio'].notna()].copy()
print(F"\nAfter removing 'no averages given': {len(df_clean)} records remain.")


DATA CLEANING

Performance Categories:

Worse Than Average (greater than 1.0): 57
Average (approx. 1.0): 0
Better Than Average (less than 1.0): 115
No Average Given: 104

After removing 'no averages given': 172 records remain.


In [50]:
print("\n" + "=" * 80)
print("ANALYSIS 1: WORST PERFORMING HOSPITALS")
print("=" * 80)


ANALYSIS 1: WORST PERFORMING HOSPITALS


In [51]:
df_worse_than_average = df_clean[df_clean['Excess Readmission Ratio'] > 1]

print(f"\nHospitals performing worse than the national average: {len(df_worse_than_average)}")
print("\nThese hospitals need improvement:\n")

# Grouping by hospital to see which hospitals appear most in "worse" category
problem_hospitals = df_worse_than_average.groupby('Facility Name').size().sort_values(ascending=False)
print(problem_hospitals.head(10))


Hospitals performing worse than the national average: 57

These hospitals need improvement:

Facility Name
PARK NICOLLET METHODIST HOSPITAL      6
REGIONS HOSPITAL                      4
NORTH MEMORIAL HEALTH HOSPITAL        4
M HEALTH FAIRVIEW UNIVERSITY OF MN    2
ALLINA UNITED HOSPITAL                2
ST LUKES HOSPITAL                     2
ST FRANCIS REGIONAL MEDICAL CENTER    2
ST CLOUD HOSPITAL                     2
OLMSTED MEDICAL CENTER                2
MERCY HOSPITAL                        2
dtype: int64


In [52]:
print("\n" + "=" * 80)
print("ANALYSIS 2: BEST PERFORMING HOSPITALS")
print("=" * 80)

df_better_than_average = df_clean[df_clean['Excess Readmission Ratio'] < 1]

print(f"\nHospitals performing better than the national average: {len(df_better_than_average)}")
print("\nTop performing hospitals:\n")

best_hospitals = df_better_than_average.groupby('Facility Name').size().sort_values(ascending=False)
print(best_hospitals.head(10))


ANALYSIS 2: BEST PERFORMING HOSPITALS

Hospitals performing better than the national average: 115

Top performing hospitals:

Facility Name
ABBOTT NORTHWESTERN HOSPITAL            5
M HEALTH FAIRVIEW SOUTHDALE HOSPITAL    5
RIDGEVIEW MEDICAL CENTER                5
MAYO CLINIC HOSPITAL ROCHESTER          5
MAYO CLINIC HEALTH SYSTEM - MANKATO     5
ST LUKES HOSPITAL                       4
ST CLOUD HOSPITAL                       4
SANFORD BEMIDJI MEDICAL CENTER          4
MERCY HOSPITAL                          4
M HEALTH FAIRVIEW ST JOHN'S HOSPITAL    4
dtype: int64


In [53]:
print("\n" + "=" * 80)
print("ANALYSIS 3: PERFORMANCE BY MEDICAL CONDITION")
print("=" * 80)

# Looking at each data and calculating how many hospitals are better/worse/same
for measure in measures:
    measure_data = df_clean[df_clean['Measure Name'] == measure]
    
    print(f"\n{measure}:")
    print(f"  Total hospitals: {len(measure_data)}")
    
    performance = {
        "Worse Than Average": (measure_data['Excess Readmission Ratio'] > 1).sum(),
        "Better Than Average": (measure_data['Excess Readmission Ratio'] < 1).sum(),
        "Average": (measure_data['Excess Readmission Ratio'] == 1).sum()
    }
    for category, count in performance.items():
        pct = (count / len(measure_data)) * 100
        print(f"  {category}: {count} ({pct:.1f}%)")
        
    


ANALYSIS 3: PERFORMANCE BY MEDICAL CONDITION

READM-30-PN-HRRP:
  Total hospitals: 43
  Worse Than Average: 16 (37.2%)
  Better Than Average: 27 (62.8%)
  Average: 0 (0.0%)

READM-30-HIP-KNEE-HRRP:
  Total hospitals: 24
  Worse Than Average: 11 (45.8%)
  Better Than Average: 13 (54.2%)
  Average: 0 (0.0%)

READM-30-HF-HRRP:
  Total hospitals: 44
  Worse Than Average: 11 (25.0%)
  Better Than Average: 33 (75.0%)
  Average: 0 (0.0%)

READM-30-COPD-HRRP:
  Total hospitals: 29
  Worse Than Average: 9 (31.0%)
  Better Than Average: 20 (69.0%)
  Average: 0 (0.0%)

READM-30-CABG-HRRP:
  Total hospitals: 13
  Worse Than Average: 4 (30.8%)
  Better Than Average: 9 (69.2%)
  Average: 0 (0.0%)

READM-30-AMI-HRRP:
  Total hospitals: 19
  Worse Than Average: 6 (31.6%)
  Better Than Average: 13 (68.4%)
  Average: 0 (0.0%)
