In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [33]:
# Setting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

In [34]:
# Load data
df = pd.read_csv('data\minnesota_hospitals.csv')
df_clean = df[df['Excess Readmission Ratio'].notna()].copy()


df_clean.loc[df_clean['Excess Readmission Ratio'] > 1, 'Compared to National Average'] = 'Worse than National Average'
df_clean.loc[df_clean['Excess Readmission Ratio'] == 1, 'Compared to National Average'] = 'National Average'
df_clean.loc[df_clean['Excess Readmission Ratio'] < 1, 'Compared to National Average'] = 'Better than National Average'

In [35]:
print("=" * 80)
print("CREATING VISUALIZATIONS")
print("=" * 80)

CREATING VISUALIZATIONS


In [43]:
print("\nOverall Performance Distribution Visualization Created...")

performance_counts = df_clean['Compared to National Average'].value_counts()

# Creating figure
fig, ax = plt.subplots(figsize=(10,6))

# Adding bars 
bars = ax.bar(performance_counts.index, performance_counts.values)

# Adding colors specific to bars
colors = [ '#2ecc71','#e74c3c']
for bar, color in zip(bars, colors):
    bar.set_color(color)

# Setting labels/title
ax.set_xlabel('Performance Compared to National Average', fontsize=12, fontweight='bold')
ax.set_ylabel('Number of Hospital-Measure Records', fontsize=12, fontweight='bold')
ax.set_title('Minnesota Hospital Readmission Performance vs National Average', fontsize=14, fontweight='bold', pad=20)

# Adding data values to bar charts
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., # x location
           height, # y location
           f'{int(height)}', # data value
           ha='center', va='bottom', fontweight='bold')
    
# Saving and closing chart (to save memory)
plt.savefig('charts\performance_distribution.png', dpi=300, bbox_inches='tight')
plt.close()


Overall Performance Distribution Visualization Created...


In [55]:
print("\nPerformance by Medical Condition Visualization Created...")

# Creating crosstab to show how many hospital records have measures that are better/worse than national average
performance_by_measure = pd.crosstab(
df_clean['Measure Name'],
df_clean['Compared to National Average'])

fig, ax = plt.subplots(figsize=(12,8))

# Adding horizontal stacked bar charts with one measure per bar (split by worse/better)
performance_by_measure.plot(kind='barh', stacked=True, ax=ax, 
                           color=['#2ecc71','#e74c3c'])

ax.set_xlabel('Number of Hospital Records', fontsize=12, fontweight='bold')
ax.set_ylabel('Readmission Measure', fontsize=12, fontweight='bold')
ax.set_title('Minnesota Hospitals Performance by Readmission Measure', fontsize=14, fontweight='bold', pad=20)

# Moving legend out of graph
ax.legend(title="Performance", bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.savefig('charts\performance_by_measure.png', dpi=300, bbox_inches='tight')
plt.close()


Performance by Medical Condition Visualization Created...
