Before you turn this problem in, make sure everything runs as expected. First, **restart the kernel** (in the menubar, select Kernel$\rightarrow$Restart) and then **run all cells** (in the menubar, select Cell$\rightarrow$Run All).

Make sure you fill in any place that says `YOUR CODE HERE` or "YOUR ANSWER HERE", as well as your name and collaborators below:

In [None]:
NAME = ""
COLLABORATORS = ""

---


* Exploratory plots with pandas
  * Histograms
  * Scatter Plots
  
* Descriptive and presentable plots with matplotlib
  * Multiple plots per figure
  * Adjusting axes and formatting
  

Use `%matplotlib notebook` to make plots interactive

In [None]:
%matplotlib inline

Plotting from pandas
---

In [None]:
import pandas as pd
data = pd.read_csv('/data/flu_2016_2017.csv', quotechar='"')
usa = data[(data['Country'] == 'United States of America') & 
           (data['Year'] == 2016)]
usa.shape

In [None]:
data[0:2]

In [None]:
usa.dtypes

In [None]:
usa[0:1]

---

Histogram
---

Plot a histogram of the number of Influenza B cases per week

In [None]:
usa['INF_B'].hist()

In [None]:
usa[['INF_B','INF_A','SPEC_RECEIVED_NB']].hist(bins=5)


---

Line Plot (default)
---

Plot the number of cases by week

In [None]:
import seaborn as sns
usa.plot(x='SDATE', y=['INF_B','INF_A','SPEC_RECEIVED_NB','SPEC_PROCESSED_NB'])

In [None]:
usa[0:10].plot(x='SDATE', y=['INF_B','INF_A','SPEC_RECEIVED_NB','SPEC_PROCESSED_NB'])

---

Scatter Plot
---

Use a scatter plot to see if there are some possible relationship

In [None]:
axes = usa.plot.scatter(x='SPEC_RECEIVED_NB', y='INF_A', color='DarkRed', label='Influeza A')
axes = usa.plot.scatter(x='SPEC_RECEIVED_NB', y='INF_B', color='DarkBlue', label='Influenza B', ax=axes)
axes.set_ylabel('Positive Cases')
axes.set_xlabel('Specimens Received')

In [None]:
import seaborn as sns
ax = sns.regplot(data=usa, x='SPEC_RECEIVED_NB', y='INF_A', order=2, label='Flu A')
sns.regplot(data=usa, x='SPEC_RECEIVED_NB', y='INF_B', label='Flu B', ax=ax)
ax.legend()
ax.set_ylabel('Positive Cases')
ax.set_xlabel('Specimens Received')

In [None]:
import seaborn as sns
ax = sns.jointplot(data=usa, x='SPEC_RECEIVED_NB', y='INF_B')


---

See if there are always a similar ratio of type A to type B cases of influenza

In [None]:
ax = usa.plot.scatter(x='INF_A', y='INF_B')

In [None]:
%matplotlib inline
import seaborn as sns
sns.lmplot('INF_A', 'INF_B', data=usa, hue='SDATE', fit_reg=False)


Other Examples
===

Plot by State
---

In [None]:
import matplotlib.pyplot as plt

d = pd.read_csv('/data/hhcaps.csv', quotechar='"')
d.replace(to_replace='Not Available', value=0, inplace=True)
d = d[d['State'].isin(['WY','CA','AL'])]
by_state = d.groupby('State')

In [None]:
data = pd.DataFrame()
data['Year Certified'] = pd.to_datetime(d['Date Certified'].str[0:4] + '-01-01', format='%Y-%m-%d')
data['Percent Recommend'] = d['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'].astype(float)

fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling

for name, group in by_state:
    data['Percent Recommend'].plot.kde(ax=ax, label=name, alpha=.5)
ax.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in by_state:
    recommend = group['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'].astype(float)
    professional = group['Percent of patients who reported that their home health team gave care in a professional way'].astype(float)
    ax.plot(recommend, professional, marker='o', linestyle='', ms=5, alpha=0.5, label=name)
ax.legend()

plt.show()

Time Series Plots
---

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


d = pd.read_csv('/data/hhcaps.csv', quotechar='"')
d = d[d['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'] != 'Not Available']
#d.replace(to_replace='Not Available', value=0, inplace=True)


In [None]:
d.dtypes

In [None]:
data = pd.DataFrame()
data['Year Certified'] = pd.to_datetime(d['Date Certified'].str[0:4] + '-01-01', format='%Y-%m-%d')
data['Percent Recommend'] = d['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'].astype(float)


g = data.groupby(['Year Certified']).mean().plot(style='-')
g.set_ylim(0,100)
g

In [None]:
data = pd.DataFrame()
data['Date Certified'] = pd.to_datetime(d['Date Certified'], format='%Y-%m-%d')
data['Percent Recommend'] = d['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'].astype(float)

g = data.groupby(['Date Certified']).mean().plot(style='.', alpha=.1)
g.set_ylim(0,100)

In [None]:
recommend = pd.DataFrame()
recommend['Date Certified'] = pd.to_datetime(d['Date Certified'], format='%Y-%m-%d')
recommend['Percent Recommend'] = d['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'].astype(float)

data = recommend.groupby(['Date Certified']).mean()

ma = data.rolling(20).mean()

mstd = data.rolling(20).std()

plt.figure()

plt.plot(data.index, data, 'k')

plt.plot(ma.index, ma, 'b')

#plt.fill_between(mstd.index, ma-2*mstd, ma+2*mstd, color='b', alpha=0.2)


# Bootstrap Plot

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from pandas.plotting import bootstrap_plot


d = pd.read_csv('/data/hhcaps.csv', quotechar='"')
d = d[d['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'] != 'Not Available']
#d.replace(to_replace='Not Available', value=0, inplace=True)

data = pd.DataFrame()
data['Date Certified'] = pd.to_datetime(d['Date Certified'], format='%Y-%m-%d')
data['Percent Recommend'] = d['Percent of patients who reported YES, they would definitely recommend the home health agency to friends and family'].astype(float)

bootstrap_plot(data['Percent Recommend'], size=50, samples=500, color='grey')