In [1]:
import os, sys
sys.path.append('/data/spark15/python/')

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (18, 9)
import seaborn as sb
import numpy as np
import pandas as pd

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import HiveContext
from pyspark.sql.types import *
import pyspark.sql.functions

sc = SparkContext("local", "hospital_compare")
sqlContext = SQLContext(sc)
hc = HiveContext(sc)

### What states are models of high-quality care?

In [3]:
print(hc.tableNames())

[u'effective_care', u'effective_care_scores', u'hospitals', u'hospitals_reduced', u'measures', u'readmissions', u'readmissions_reduced', u'survey_responses', u'survey_responses_reduced']


In [4]:
hospitals = hc.sql('select * from hospitals_reduced').toPandas()
effective_care = hc.sql('select * from effective_care_scores').toPandas()

In [13]:
hospitals['hospital_overall_rating'] = hospitals['hospital_overall_rating'].replace('Not Available', np.nan).apply(pd.to_numeric)

In [20]:
hospitals.groupby('state').mean().sort_values(by='hospital_overall_rating', ascending=False).head(10)

Unnamed: 0_level_0,hospital_overall_rating
state,Unnamed: 1_level_1
SD,3.857143
DE,3.666667
WI,3.611111
MN,3.531646
ID,3.526316
IN,3.481481
NH,3.48
CO,3.45098
UT,3.448276
ME,3.4375


In [23]:
df = pd.merge(hospitals, effective_care, how='inner', left_on='provider_id', right_on='provider_id')

In [34]:
results = df.set_index(['state', 'provider_id', 'hospital_name', 'measure_id'])[['score']].unstack()
results = results.apply(pd.to_numeric)
results.loc[:, 'Average_Score'] = results.mean(axis=1)
results.loc[:, 'StDev_Score'] = results.std(axis=1)

In [41]:
results.mean(level='state', axis=0).sort_values(by='Average_Score', ascending=False).head(10)

Unnamed: 0_level_0,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,Average_Score,StDev_Score
measure_id,ED_1b,ED_2b,IMM_2,IMM_3_OP_27_FAC_ADHPCT,OP_1,OP_18b,OP_2,OP_20,OP_21,OP_22,...,OP_31,OP_3b,OP_4,OP_5,PC_01,STK_4,VTE_5,VTE_6,Unnamed: 20_level_1,Unnamed: 21_level_1
state,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
VI,,,,,,,,,,,...,,,,,6.0,,,,6.0,0.0
WV,,,,,,,,8.0,,1.945946,...,,,,6.695652,2.333333,,,1.444444,3.371795,1.65877
AK,,,,,,,,8.5,,1.0,...,,,,7.6,1.428571,,,4.0,3.333333,2.180966
WY,,5.0,,,,,,8.333333,,0.692308,...,,,,5.625,2.333333,,,8.0,3.217544,1.159381
NC,,0.0,,,,,,9.0,,2.448276,...,,,,5.952381,1.438356,,,1.5625,3.068841,2.053104
KY,,,,,,,,6.0,,1.917808,...,0.0,,,5.568627,1.418605,,,2.0,2.990506,1.473358
ME,,,,,,,,7.5,,1.28,...,,,,6.0,1.684211,,,0.5,2.974138,1.69685
OK,,,,,,,,6.588235,,1.817073,...,,,,5.782609,1.4375,0.0,,1.625,2.944624,1.507994
VT,,,,,,,,4.0,,1.166667,...,,,,6.0,2.5,,,8.0,2.904762,1.520182
WI,,2.5,,,,,,7.0,,0.633333,...,,,,6.106061,1.597222,0.0,,1.565217,2.891141,1.986965
