In [63]:
import pandas as pd
import numpy as np

In [64]:
# Load data

df = pd.read_csv('data\minnesota_hospitals.csv')

print("=" * 80)
print("MINNESOTA HOSPITAL READMISSION ANALYSIS")
print("=" * 80)
print(f"\nAnalyzing {len(df)} hospital records from Minnesota\n")

MINNESOTA HOSPITAL READMISSION ANALYSIS

Analyzing 276 hospital records from Minnesota



In [65]:
# Understanding the data structure

# Counting how many UNIQUE measures are found and how many of each
print("AVAILABLE READMISSION MEASURES\n")
measures = df['Measure Name'].unique()
for i, measure in enumerate(measures, 1):
    count = len(df[df['Measure Name'] == measure])
    print(f"{i}. {measure} ({count} hospitals)")

AVAILABLE READMISSION MEASURES

1. READM-30-PN-HRRP (46 hospitals)
2. READM-30-HIP-KNEE-HRRP (46 hospitals)
3. READM-30-HF-HRRP (46 hospitals)
4. READM-30-COPD-HRRP (46 hospitals)
5. READM-30-CABG-HRRP (46 hospitals)
6. READM-30-AMI-HRRP (46 hospitals)


In [66]:
# Cleaning data

print("\n" + "=" * 80)
print("DATA CLEANING")
print("=" * 80)

# Using excess readmission ratio used by Medicare
print("\nPerformance Categories:\n")
worse_than_average = (df['Excess Readmission Ratio'] > 1).sum()
average = (df['Excess Readmission Ratio'] == 1).sum()
better_than_average = (df['Excess Readmission Ratio'] < 1).sum()
no_average = (df['Excess Readmission Ratio']).isnull().sum()
print(f"Worse Than Average (greater than 1.0): {worse_than_average}")
print(f"Average (approx. 1.0): {average}")
print(f"Better Than Average (less than 1.0): {better_than_average}")
print(f"No Average Given: {no_average}")

# Removing those without an ERR as this is crucial to analyzing the data
df_clean = df[df['Excess Readmission Ratio'].notna()].copy()
print(F"\nAfter removing 'no averages given': {len(df_clean)} records remain.")


DATA CLEANING

Performance Categories:

Worse Than Average (greater than 1.0): 57
Average (approx. 1.0): 0
Better Than Average (less than 1.0): 115
No Average Given: 104

After removing 'no averages given': 172 records remain.


In [67]:
print("\n" + "=" * 80)
print("ANALYSIS 1: WORST PERFORMING HOSPITALS")
print("=" * 80)


ANALYSIS 1: WORST PERFORMING HOSPITALS


In [68]:
df_worse_than_average = df_clean[df_clean['Excess Readmission Ratio'] > 1]

print(f"\nHospitals performing worse than the national average: {len(df_worse_than_average)}")
print("\nThese hospitals need improvement:\n")

# Grouping by hospital to see which hospitals appear most in "worse" category
problem_hospitals = df_worse_than_average.groupby('Facility Name').size().sort_values(ascending=False)
print(problem_hospitals.head(10))


Hospitals performing worse than the national average: 57

These hospitals need improvement:

Facility Name
PARK NICOLLET METHODIST HOSPITAL      6
REGIONS HOSPITAL                      4
NORTH MEMORIAL HEALTH HOSPITAL        4
M HEALTH FAIRVIEW UNIVERSITY OF MN    2
ALLINA UNITED HOSPITAL                2
ST LUKES HOSPITAL                     2
ST FRANCIS REGIONAL MEDICAL CENTER    2
ST CLOUD HOSPITAL                     2
OLMSTED MEDICAL CENTER                2
MERCY HOSPITAL                        2
dtype: int64


In [69]:
print("\n" + "=" * 80)
print("ANALYSIS 2: BEST PERFORMING HOSPITALS")
print("=" * 80)

df_better_than_average = df_clean[df_clean['Excess Readmission Ratio'] < 1]

print(f"\nHospitals performing better than the national average: {len(df_better_than_average)}")
print("\nTop performing hospitals:\n")

best_hospitals = df_better_than_average.groupby('Facility Name').size().sort_values(ascending=False)
print(best_hospitals.head(10))


ANALYSIS 2: BEST PERFORMING HOSPITALS

Hospitals performing better than the national average: 115

Top performing hospitals:

Facility Name
ABBOTT NORTHWESTERN HOSPITAL            5
M HEALTH FAIRVIEW SOUTHDALE HOSPITAL    5
RIDGEVIEW MEDICAL CENTER                5
MAYO CLINIC HOSPITAL ROCHESTER          5
MAYO CLINIC HEALTH SYSTEM - MANKATO     5
ST LUKES HOSPITAL                       4
ST CLOUD HOSPITAL                       4
SANFORD BEMIDJI MEDICAL CENTER          4
MERCY HOSPITAL                          4
M HEALTH FAIRVIEW ST JOHN'S HOSPITAL    4
dtype: int64


In [70]:
print("\n" + "=" * 80)
print("ANALYSIS 3: PERFORMANCE BY MEDICAL CONDITION")
print("=" * 80)

# Looking at each data and calculating how many hospitals are better/worse/same
for measure in measures:
    measure_data = df_clean[df_clean['Measure Name'] == measure]
    
    print(f"\n{measure}:")
    print(f"  Total hospitals: {len(measure_data)}")
    
    performance = {
        "Worse Than Average": (measure_data['Excess Readmission Ratio'] > 1).sum(),
        "Better Than Average": (measure_data['Excess Readmission Ratio'] < 1).sum(),
        "Average": (measure_data['Excess Readmission Ratio'] == 1).sum()
    }
    
    # Showing how many of the hospitals in each medical condition are better/worse than average (and % of 100)
    for category, count in performance.items():
        pct = (count / len(measure_data)) * 100
        print(f"  {category}: {count} ({pct:.1f}%)")
        
    


ANALYSIS 3: PERFORMANCE BY MEDICAL CONDITION

READM-30-PN-HRRP:
  Total hospitals: 43
  Worse Than Average: 16 (37.2%)
  Better Than Average: 27 (62.8%)
  Average: 0 (0.0%)

READM-30-HIP-KNEE-HRRP:
  Total hospitals: 24
  Worse Than Average: 11 (45.8%)
  Better Than Average: 13 (54.2%)
  Average: 0 (0.0%)

READM-30-HF-HRRP:
  Total hospitals: 44
  Worse Than Average: 11 (25.0%)
  Better Than Average: 33 (75.0%)
  Average: 0 (0.0%)

READM-30-COPD-HRRP:
  Total hospitals: 29
  Worse Than Average: 9 (31.0%)
  Better Than Average: 20 (69.0%)
  Average: 0 (0.0%)

READM-30-CABG-HRRP:
  Total hospitals: 13
  Worse Than Average: 4 (30.8%)
  Better Than Average: 9 (69.2%)
  Average: 0 (0.0%)

READM-30-AMI-HRRP:
  Total hospitals: 19
  Worse Than Average: 6 (31.6%)
  Better Than Average: 13 (68.4%)
  Average: 0 (0.0%)


In [71]:
print("\n" + "=" * 80)
print("ANALYSIS 4: MINNESOTA SUMMARY STATISTICS")
print("=" * 80)

# Finding total hospitals (no repeats) measured and how many have at least one of their measures (better/worse) 
# for each medical condition
total_hospitals_measured = len(df_clean['Facility Name'].unique())
hospitals_worse = len(df_worse_than_average['Facility Name'].unique())
hospitals_better = len(df_better_than_average['Facility Name'].unique())

print(f"\nTotal Minnesota hospitals with readmission data: {total_hospitals_measured}")
print(f"Hospitals with at least one 'worse' measure: {hospitals_worse} ({hospitals_worse/total_hospitals_measured*100:.1f}%)")
print(f"Hospitals with at least one 'better' measure: {hospitals_better} ({hospitals_better/total_hospitals_measured*100:.1f}%)")


ANALYSIS 4: MINNESOTA SUMMARY STATISTICS

Total Minnesota hospitals with readmission data: 45
Hospitals with at least one 'worse' measure: 34 (75.6%)
Hospitals with at least one 'better' measure: 41 (91.1%)


In [72]:
print("\n" + "=" * 80)
print("ANALYSIS 5: IMPROVEMENT OPPORTUNITIES")
print("=" * 80)

# Aggregating all hospitals with at least ONE worse measure
df_hospitals_to_improve = df_worse_than_average.groupby('Facility Name').agg({
    'Measure Name': 'count',
    'Facility ID': 'first'
})
df_hospitals_to_improve.columns = ['Worse Measures Count', 'Facility ID']
df_hospitals_to_improve = df_hospitals_to_improve.sort_values('Worse Measures Count', ascending=False)

print(f"\nHospitals with more than 2 worse than average measures:")
print(df_hospitals_to_improve[df_hospitals_to_improve['Worse Measures Count'] >= 2])


ANALYSIS 5: IMPROVEMENT OPPORTUNITIES

Hospitals with more than 2 worse than average measures:
                                                  Worse Measures Count  \
Facility Name                                                            
PARK NICOLLET METHODIST HOSPITAL                                     6   
REGIONS HOSPITAL                                                     4   
NORTH MEMORIAL HEALTH HOSPITAL                                       4   
M HEALTH FAIRVIEW UNIVERSITY OF MN                                   2   
ALLINA UNITED HOSPITAL                                               2   
ST LUKES HOSPITAL                                                    2   
ST FRANCIS REGIONAL MEDICAL CENTER                                   2   
ST CLOUD HOSPITAL                                                    2   
OLMSTED MEDICAL CENTER                                               2   
MERCY HOSPITAL                                                       2   
MAPLE GROVE HOSP

In [73]:
df_best_hospitals = df_better_than_average.groupby('Facility Name').agg({
    'Measure Name': 'count',
    'Facility ID': 'first'    
})

df_best_hospitals.columns = ['Better Measures Count', 'Facility ID']
df_best_hospitals = df_best_hospitals.sort_values('Better Measures Count', ascending=False)

print(f"\nHospitals with more than 2 better than average measures:")
print(df_best_hospitals[df_best_hospitals['Better Measures Count'] >= 2])


Hospitals with more than 2 better than average measures:
                                                   Better Measures Count  \
Facility Name                                                              
ABBOTT NORTHWESTERN HOSPITAL                                           5   
M HEALTH FAIRVIEW SOUTHDALE HOSPITAL                                   5   
RIDGEVIEW MEDICAL CENTER                                               5   
MAYO CLINIC HOSPITAL ROCHESTER                                         5   
MAYO CLINIC HEALTH SYSTEM - MANKATO                                    5   
ST LUKES HOSPITAL                                                      4   
ST CLOUD HOSPITAL                                                      4   
SANFORD BEMIDJI MEDICAL CENTER                                         4   
MERCY HOSPITAL                                                         4   
M HEALTH FAIRVIEW ST JOHN'S HOSPITAL                                   4   
M HEALTH FAIRVIEW UNIVERSITY O

In [74]:
# Saving data of hospitals that have one or more worse/better than average measures

df_hospitals_to_improve.to_csv('data\hospitals_needing_improvement.csv')
df_best_hospitals.to_csv(r'data\top_performing_hospitals.csv')

In [75]:
# Finding condition with the most problems

worst_condition = df_worse_than_average.groupby('Measure Name').size().sort_values(ascending = False).index[0]
worst_condition_count = df_worse_than_average.groupby('Measure Name').size().sort_values(ascending = False).values[0]

In [76]:
print("\n" + "=" * 80)
print("KEY FINDINGS SUMMARY")
print("=" * 80)

print("\nWHAT WAS DISCOVERED:\n")
print(f"1. Analyzed {total_hospitals_measured} Minnesota hospitals")
print(f"2. {hospitals_worse} hospitals have at least one worse-than-average measure")
print(f"3. {hospitals_better} hospitals have at least one better-than-average measure")
print(f"4. Identified specific hospitals needing targeted interventions")
print(f"5. '{worst_condition}' has the most hospitals ({worst_condition_count}) performing worse than the national average.")


print("\nRECOMMENDATIONS:\n")
print("1. Focus improvement efforts on hospitals with multiple poor measures")
print(f"2. Prioritize interventions for '{worst_condition}'")
print("3. Study best practices from top-performing hospitals")
print("4. Implement targeted quality improvement programs")


KEY FINDINGS SUMMARY

WHAT WAS DISCOVERED:

1. Analyzed 45 Minnesota hospitals
2. 34 hospitals have at least one worse-than-average measure
3. 41 hospitals have at least one better-than-average measure
4. Identified specific hospitals needing targeted interventions
5. 'READM-30-PN-HRRP' has the most hospitals (16) performing worse than the national average.

RECOMMENDATIONS:

1. Focus improvement efforts on hospitals with multiple poor measures
2. Prioritize interventions for 'READM-30-PN-HRRP'
3. Study best practices from top-performing hospitals
4. Implement targeted quality improvement programs


In [77]:
print("\n" + "=" * 80)
print("ANALYSIS COMPLETE!")
print("=" * 80)


ANALYSIS COMPLETE!
