# Awasome Apps
1. ScatterPlotter for analysis
2. HospitalIndicator for telling good hospitals to user

## Requirement

This project is tested under the following environment.

* Mac OS (10.13.4)
* Python (3.6.2)
* Jupyter Notebook (5.0.0)

## Usage
1. create `plots` directory at the same hierarchy as this file (for saving png files)
2. Run Jupyter Notebook
```
$ Jupyter Notebook
```
3. edit `csvSoure` on the next cell
4. run all cell
5. select the state on the dropdown list in the output, then the plot will change according to the selection.

In [1]:
###### EDIT THIS #######
csvSource = 'main_data.csv'

### Dependency

In [2]:
from bokeh.io import output_notebook, push_notebook
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.models import HoverTool,PanTool,WheelZoomTool,BoxZoomTool,ResetTool,UndoTool,RedoTool, Range1d
from IPython.display import display
import ipywidgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

### Function Definition

In [3]:
# dataframe checker
def checkDF(df):
    requirements = ['Provider State', 'Cost', 'BEDS', 'Provider Name']
    errorFlag = False
    for req in requirements:
        if not req in df:
            print(f'DF structure error: dataframe must include "{req}". please confirm your dataframe:-(')
            errorFlag = True
            break
    if not errorFlag:
        return df

# plot setter
def setPlot(df, state, rating=[1,2,3,4,5]):
    # check the dataframe first
    if not state in list(df['Provider State']):
        print('your state input might be wrong. please check your input!')
    elif not isinstance(rating, list) or len(rating)==0:
        print('rating must be a list contains more than one numeric element. please check your input!')
    else:
        # df proseccing
        # extract the data of the `state` 
        dfbyState = df[df['Provider State'] == state]
        # extract the data of the `rating`
        dfbyState_Rating = dfbyState[dfbyState["Hospital overall rating"].isin(rating)]
        # set seaborn
        sns.lmplot(x='Cost', y="BEDS", hue="Hospital overall rating", data=dfbyState_Rating, fit_reg=False, legend=True, size = 8, scatter_kws={'alpha':0.9}, palette={1:'#fc8d59', 2:'#fee08b',3:'#d9ef8b',4:'#91cf60',5:'#1a9850'})
        sns.set(font_scale=2)
        # set axis and titles
        plt.ylim(0,1400)
        plt.xlim(0,4000)
        plt.title(f'Cost-Size relationship in {state}')

### Data Cleaning

In [4]:
# read the csv as df and change tha rating value to float
df = checkDF(pd.read_csv(csvSource))
df['Hospital overall rating'] = df['Hospital overall rating'].astype(float)

# drop duplicates
df_uniqueProv = df.drop_duplicates('Provider Id')

# create buffer df includes Provider Id and avarage cost per provider
df_buff = pd.DataFrame()
df_buff['Provider Id'] = list(df.groupby('Provider Id').mean().index)
df_buff['average Cost'] = list(df.groupby('Provider Id').mean()['Cost'])

# Add avarage cost to df_uniqueProv using pd.merge
mergedDF = pd.merge(df_uniqueProv, df_buff, how='outer')

## **Scatter Plotter for Analysis**
This app returns the Cost-Size scatter plot of the state selected.

In [5]:
def f(STATE):
    sns.set_style('whitegrid')
    setPlot(mergedDF,STATE)
    plt.show()
states = mergedDF.groupby('Provider State').mean().index
ipywidgets.interact(f, STATE=states);

A Jupyter Widget

### Save Plots as png file
** Make sure you have `plots` directory at the same hierarchy as this file!**

It takes few minutes to finish.

In [6]:
for state in states:
    sns.set_style('whitegrid')
    setPlot(mergedDF, state)
    plt.savefig(f'plots/app_{state}.png')



## HospitalIndicator for telling good hospitals to user
This app returns the Cost-Size scatter plot for higher rated hospitals of the state selected.
It allows to see the hospital name and rating when the cursor is on the dot in the plot.

In [6]:
# data processing
# drop data that 'BEDS' > 0
df_hasbeds = mergedDF[mergedDF['BEDS'] > 0]

# get state list and set dropdown
states = df_hasbeds.groupby('Provider State').mean().index
d = ipywidgets.Dropdown(options=states, value='CA')
# ATTEMPTS TO COLOR DOTS BASED ON RATINGS 
# exg_r = []

# set bokeh tools for an interactive plot
hover = HoverTool(tooltips=[
    ("rating:", "@rating"),
    ("name:", "@prov"),
    ("cost:", "$x"),
    ("size:", "$y")    
])
pan = PanTool()
box = BoxZoomTool()
undo = UndoTool() 
redo = RedoTool()
reset = ResetTool()

# ATTEMPTS TO COLOR DOTS BASED ON RATINGS 
# colormap = {1:'#d7191c', 2:'#fdae61', 3:'#ffffbf', 4:'#a6d96a', 5:'#1a9641'}
# colors = [colormap[x] for x in df_hasbeds['Hospital overall rating']]
# colors = ['#d7191c', '#fdae61', '#ffffbf', '#a6d96a', '#1a9641']

# arrange figure and assign source
fig = figure(width=640, height=400, tools=[hover,box,pan,undo,redo,reset], title='Cost-Size relationship')
source= ColumnDataSource(data=dict(x=[],y=[],prov=[]))

# plot drawer
def setPlot(df, state, ratings=[np.random.randint(2,4)]):
    # extract data of the state inputted
    df_State = df[df['Provider State'] == state]
    # extract data rated as the argument rating
    df_State_Rating = df_State[df_State["Hospital overall rating"].isin(ratings)]
    
# ATTEMPTS TO COLOR DOTS BASED ON RATINGS     
#     df_State['Color'] = list[colormap[x] for x in df_State['Hospital overall rating']]

    # assign source data
    x = df_State_Rating['Cost']
    y = df_State_Rating['BEDS']
    provider = df_State_Rating['Provider Name']
    rating = df_State_Rating['Hospital overall rating']
    
    # update source.data
    source.data = dict(x=x,y=y,prov=provider, rating=rating)
    
    # draw scatter plot
    fig.circle(x='x', y='y', source=source, fill_alpha=0.7, color='#5ab4ac')

# event handler
def on_value_change(change):
    # ATTEMPTS TO COLOR DOTS BASED ON RATINGS
#     for r in exg_r:
#         if r.visible:
#             r.visible=False
    setPlot(df_hasbeds, change.new, [4,5])    
    push_notebook(handle=t)

# set event handler to dropdown
d.observe(on_value_change, names='value')
# show dropdown
display(d)

# draw default plot of CA
setPlot(df_hasbeds, 'CA', [4,5])

# spell for using bokeh
output_notebook()

# axis settings
fig.xaxis.axis_label = 'Cost /$'
fig.yaxis.axis_label = 'Size (Bed count)'
# fig.x_range=Range1d(0,4000)
# fig.y_range=Range1d(0,700)

# show plot
t = show(fig, notebook_handle=True)

A Jupyter Widget