In [90]:
import pandas as pd

In [91]:
# Load the data

df = pd.read_csv("data\hospital_readmissions.csv")

print('=' * 80)
print('MEDICARE HOSPITAL READMISSIONS DATA - INITIAL EXPLORATION')
print('=' * 80)

MEDICARE HOSPITAL READMISSIONS DATA - INITIAL EXPLORATION


In [92]:
# Basic information about dataset

print(f"\nDataset size: {df.shape[0]:,} rows and {df.shape[1]} columns")
print("\nColumn names:")
print(df.columns.tolist())


Dataset size: 18,510 rows and 12 columns

Column names:
['Facility Name', 'Facility ID', 'State', 'Measure Name', 'Number of Discharges', 'Footnote', 'Excess Readmission Ratio', 'Predicted Readmission Rate', 'Expected Readmission Rate', 'Number of Readmissions', 'Start Date', 'End Date']


In [93]:
# First 10 rows of data

print("\nFirst 10 rows:")
print(df.head(10))


First 10 rows:
                     Facility Name  Facility ID State            Measure Name  \
0  SOUTHEAST HEALTH MEDICAL CENTER        10001    AL       READM-30-AMI-HRRP   
1  SOUTHEAST HEALTH MEDICAL CENTER        10001    AL      READM-30-CABG-HRRP   
2  SOUTHEAST HEALTH MEDICAL CENTER        10001    AL        READM-30-HF-HRRP   
3  SOUTHEAST HEALTH MEDICAL CENTER        10001    AL  READM-30-HIP-KNEE-HRRP   
4  SOUTHEAST HEALTH MEDICAL CENTER        10001    AL        READM-30-PN-HRRP   
5  SOUTHEAST HEALTH MEDICAL CENTER        10001    AL      READM-30-COPD-HRRP   
6         MARSHALL MEDICAL CENTERS        10005    AL      READM-30-CABG-HRRP   
7         MARSHALL MEDICAL CENTERS        10005    AL  READM-30-HIP-KNEE-HRRP   
8         MARSHALL MEDICAL CENTERS        10005    AL        READM-30-HF-HRRP   
9         MARSHALL MEDICAL CENTERS        10005    AL        READM-30-PN-HRRP   

   Number of Discharges  Footnote  Excess Readmission Ratio  \
0                 296.0      

In [94]:
# Checking data types

print("\nData types:")
print(df.dtypes)


Data types:
Facility Name                  object
Facility ID                     int64
State                          object
Measure Name                   object
Number of Discharges          float64
Footnote                      float64
Excess Readmission Ratio      float64
Predicted Readmission Rate    float64
Expected Readmission Rate     float64
Number of Readmissions         object
Start Date                     object
End Date                       object
dtype: object


In [95]:
# Checking for missing data

print("\nMissing values per column:")
missing = df.isnull().sum()
print(missing[missing > 0])


Missing values per column:
Number of Discharges          10170
Footnote                      11927
Excess Readmission Ratio       6583
Predicted Readmission Rate     6583
Expected Readmission Rate      6583
Number of Readmissions         6583
dtype: int64


In [96]:
# Checking all hospitals per state found in data

print("\nAll states by number of hospitals:")
print(df['State'].value_counts().head(50))


All states by number of hospitals:
TX    1704
CA    1674
FL    1002
PA     804
NY     792
OH     714
IL     696
GA     576
MI     546
NC     492
IN     486
AL     486
LA     486
TN     480
OK     456
VA     426
WI     390
MO     390
AZ     384
KY     366
NJ     366
MS     348
MA     330
SC     306
CO     306
MN     276
WA     276
KS     264
AR     264
MD     258
OR     198
UT     192
IA     180
CT     168
NM     156
NE     150
WV     150
NV     132
SD     108
ID      96
ME      96
NH      78
HI      72
WY      66
MT      66
RI      60
AK      48
ND      42
VT      36
DE      36
Name: State, dtype: int64


In [97]:
# Filtering to JUST Minnesota

mn_df = df[df['State'] == 'MN']
print(f"\nMinneosta hospitals: {len(mn_df)}")


Minneosta hospitals: 276


In [98]:
# Checking some sample Minnesota hospital names

print('\nSample Minnesota hospitals:')
print(mn_df['Facility Name'].drop_duplicates().head(10).tolist())


Sample Minnesota hospitals:
['NORTH MEMORIAL HEALTH HOSPITAL', "ESSENTIA HEALTH ST MARY'S MEDICAL CENTER", 'HENNEPIN COUNTY MEDICAL CENTER', 'OLMSTED MEDICAL CENTER', 'MAYO CLINIC HOSPITAL ROCHESTER', 'NORTHFIELD HOSPITAL', 'MAYO CLINIC HEALTH SYSTEM IN RED WING', 'ESSENTIA HEALTH DULUTH', 'CAMBRIDGE MEDICAL CENTER', 'SANFORD WORTHINGTON MEDICAL CENTER']


In [99]:
# Check what measures we currently have

print("\nReadmission measures available:")
print(df['Measure Name'].unique())
print("\nDefinitions:")
print("READM-30-AMI-HRRP: 30-Day Readmission after Acute Myocardial Infarction (Heart Attack)")
print("READM-30-CABG-HRRP: 30-Day Readmission after Coronary Artery Bypass Graft Surgery (Open-Heart Surgery)")
print("READM-30-HF-HRRP: 30-Day Readmission after Heart Failure")
print("READM-30-HIP-KNEE-HRRP: 30-Day Readmission after Hip or Knee Replacement")
print("READM-30-PN-HRRP: 30-Day Readmission after Pneumonia")
print("READM-30-COPD-HRRP: 30-Day Readmission after Chronic Obstructive Pulmonary Disease")


Readmission measures available:
['READM-30-AMI-HRRP' 'READM-30-CABG-HRRP' 'READM-30-HF-HRRP'
 'READM-30-HIP-KNEE-HRRP' 'READM-30-PN-HRRP' 'READM-30-COPD-HRRP']

Definitions:
READM-30-AMI-HRRP: 30-Day Readmission after Acute Myocardial Infarction (Heart Attack)
READM-30-CABG-HRRP: 30-Day Readmission after Coronary Artery Bypass Graft Surgery (Open-Heart Surgery)
READM-30-HF-HRRP: 30-Day Readmission after Heart Failure
READM-30-HIP-KNEE-HRRP: 30-Day Readmission after Hip or Knee Replacement
READM-30-PN-HRRP: 30-Day Readmission after Pneumonia
READM-30-COPD-HRRP: 30-Day Readmission after Chronic Obstructive Pulmonary Disease


In [100]:
# Saving filtered MN data to new file

mn_df.to_csv('data\minnesota_hospitals.csv', index=False)
print("\n Saved Minnesota data to minnesota_hospitals.csv")


 Saved Minnesota data to minnesota_hospitals.csv


In [101]:
print("\n" + "=" * 80)
print('EXPLORATION COMPLETE')
print("=" * 80)



EXPLORATION COMPLETE
