In [99]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [100]:
hosp= pd.read_csv("hosp_cost.csv")

In [101]:
hosp.shape

(163065, 12)

In [102]:
hosp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163065 entries, 0 to 163064
Data columns (total 12 columns):
DRG Definition                          163065 non-null object
Provider Id                             163065 non-null int64
Provider Name                           163065 non-null object
Provider Street Address                 163065 non-null object
Provider City                           163065 non-null object
Provider State                          163065 non-null object
Provider Zip Code                       163065 non-null int64
Hospital Referral Region Description    163065 non-null object
 Total Discharges                       163065 non-null int64
 Average Covered Charges                163065 non-null object
 Average Total Payments                 163065 non-null object
Average Medicare Payments               163065 non-null object
dtypes: int64(3), object(9)
memory usage: 14.9+ MB


In [103]:
hosp.head()

Unnamed: 0,DRG Definition,Provider Id,Provider Name,Provider Street Address,Provider City,Provider State,Provider Zip Code,Hospital Referral Region Description,Total Discharges,Average Covered Charges,Average Total Payments,Average Medicare Payments
0,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,AL - Dothan,91,$32963.07,$5777.24,$4763.73
1,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,AL - Birmingham,14,$15131.85,$5787.57,$4976.71
2,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10006,ELIZA COFFEE MEMORIAL HOSPITAL,205 MARENGO STREET,FLORENCE,AL,35631,AL - Birmingham,24,$37560.37,$5434.95,$4453.79
3,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10011,ST VINCENT'S EAST,50 MEDICAL PARK EAST DRIVE,BIRMINGHAM,AL,35235,AL - Birmingham,25,$13998.28,$5417.56,$4129.16
4,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10016,SHELBY BAPTIST MEDICAL CENTER,1000 FIRST STREET NORTH,ALABASTER,AL,35007,AL - Birmingham,18,$31633.27,$5658.33,$4851.44


In [104]:
# As column names contains unneccesary spaces that can lead to problems while querying data later on, so we will remove the white spaces and replace it with underscore

In [105]:
cols = hosp.columns
cols = cols.map(lambda x: x.lstrip(' ').rstrip(' ').replace(' ', '_'))

In [106]:
hosp.columns= cols

In [107]:
hosp.columns

Index(['DRG_Definition', 'Provider_Id', 'Provider_Name',
       'Provider_Street_Address', 'Provider_City', 'Provider_State',
       'Provider_Zip_Code', 'Hospital_Referral_Region_Description',
       'Total_Discharges', 'Average_Covered_Charges', 'Average_Total_Payments',
       'Average_Medicare_Payments'],
      dtype='object')

In [108]:
hosp.head(2)

Unnamed: 0,DRG_Definition,Provider_Id,Provider_Name,Provider_Street_Address,Provider_City,Provider_State,Provider_Zip_Code,Hospital_Referral_Region_Description,Total_Discharges,Average_Covered_Charges,Average_Total_Payments,Average_Medicare_Payments
0,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,AL - Dothan,91,$32963.07,$5777.24,$4763.73
1,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,AL - Birmingham,14,$15131.85,$5787.57,$4976.71


In [109]:
# The last three columns gives us information about different types of charges and they are of object type, so we need to convert them into float so as to perform Exploratory data analysis

In [110]:
charges= hosp[['Average_Covered_Charges', 'Average_Total_Payments','Average_Medicare_Payments']]

In [111]:
charges= charges.applymap(lambda x: x.lstrip('$'))

In [112]:
charges= charges.astype(float)

In [113]:
hosp[['Average_Covered_Charges', 'Average_Total_Payments','Average_Medicare_Payments']]= charges

In [114]:
charges.head()

Unnamed: 0,Average_Covered_Charges,Average_Total_Payments,Average_Medicare_Payments
0,32963.07,5777.24,4763.73
1,15131.85,5787.57,4976.71
2,37560.37,5434.95,4453.79
3,13998.28,5417.56,4129.16
4,31633.27,5658.33,4851.44


In [115]:
charges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163065 entries, 0 to 163064
Data columns (total 3 columns):
Average_Covered_Charges      163065 non-null float64
Average_Total_Payments       163065 non-null float64
Average_Medicare_Payments    163065 non-null float64
dtypes: float64(3)
memory usage: 3.7 MB


In [116]:
hosp.head(2)

Unnamed: 0,DRG_Definition,Provider_Id,Provider_Name,Provider_Street_Address,Provider_City,Provider_State,Provider_Zip_Code,Hospital_Referral_Region_Description,Total_Discharges,Average_Covered_Charges,Average_Total_Payments,Average_Medicare_Payments
0,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,AL - Dothan,91,32963.07,5777.24,4763.73
1,039 - EXTRACRANIAL PROCEDURES W/O CC/MCC,10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,AL - Birmingham,14,15131.85,5787.57,4976.71


In [121]:
hosp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163065 entries, 0 to 163064
Data columns (total 12 columns):
DRG_Definition                          163065 non-null object
Provider_Id                             163065 non-null int64
Provider_Name                           163065 non-null object
Provider_Street_Address                 163065 non-null object
Provider_City                           163065 non-null object
Provider_State                          163065 non-null object
Provider_Zip_Code                       163065 non-null int64
Hospital_Referral_Region_Description    163065 non-null object
Total_Discharges                        163065 non-null int64
Average_Covered_Charges                 163065 non-null float64
Average_Total_Payments                  163065 non-null float64
Average_Medicare_Payments               163065 non-null float64
dtypes: float64(3), int64(3), object(6)
memory usage: 14.9+ MB
