In [1]:
# Dependencies and Setup
%matplotlib inline
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import sem


# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "mouse_drug_data.csv"
clinical_trial_data_to_load = "clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data

mouse_data= pd.read_csv(mouse_drug_data_to_load)
mouse_data.head()




Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn


In [2]:
# Read the Mouse and Drug Data and the Clinical Trial Data

clinical_data= pd.read_csv(clinical_trial_data_to_load)
clinical_data.head()



Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.0,0
1,f932,0,45.0,0
2,g107,0,45.0,0
3,a457,0,45.0,0
4,c819,0,45.0,0


In [14]:
# Combine the data into a single dataset
# Display the data table for preview

merged_df=pd.merge(mouse_data, clinical_data, on='Mouse ID', how='left')
pd.DataFrame(merged_df).head()



Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [16]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
mean_df=pd.pivot_table(merged_df, values='Tumor Volume (mm3)',
               index=['Drug'], 
               columns='Timepoint',aggfunc='mean').T

# Convert to DataFrame
pd.DataFrame(mean_df)


# Preview DataFrame
pd.DataFrame(mean_df).head()


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [17]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint

stderr_df=pd.pivot_table(merged_df, values='Tumor Volume (mm3)',
               index=['Drug'], 
               columns='Timepoint',aggfunc='sem').T
stderr_df

# Convert to DataFrame
pd.DataFrame(stderr_df)

# Preview DataFrame
pd.DataFrame(stderr_df).head()


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.448593,0.164505,0.235102,0.264819,0.202385,0.218091,0.231708,0.482955,0.239862,0.18895
10,0.702684,0.236144,0.282346,0.357421,0.319415,0.402064,0.376195,0.720225,0.433678,0.263949
15,0.838617,0.332053,0.357705,0.580268,0.444378,0.614461,0.466109,0.770432,0.493261,0.370544
20,0.909731,0.359482,0.47621,0.726484,0.59526,0.839609,0.555181,0.786199,0.621889,0.533182


In [19]:
# Minor Data Munging to Re-Format the Data Frames

# 1) Mean Dataframe Formatted
pd.DataFrame(np.round(mean_df, decimals=2))


# Preview that Reformatting worked
pd.DataFrame(np.round(mean_df, decimals=2)).head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.27,46.5,47.06,47.39,46.8,47.13,47.25,43.94,47.53,46.85
10,43.08,48.29,49.4,49.58,48.69,49.42,49.1,42.53,49.46,48.69
15,42.06,50.09,51.3,52.4,50.93,51.36,51.07,41.5,51.53,50.78
20,40.72,52.16,53.2,54.92,53.64,54.36,53.35,40.24,54.07,53.17


In [13]:
# Minor Data Munging to Re-Format the Data Frames

# 2) Standard Error DataFrame Formatted
pd.DataFrame(np.round(stderr_df, decimals=2))


# Preview that Reformatting worked
pd.DataFrame(np.round(stderr_df, decimals=2)).head()


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.45,0.16,0.24,0.26,0.2,0.22,0.23,0.48,0.24,0.19
10,0.7,0.24,0.28,0.36,0.32,0.4,0.38,0.72,0.43,0.26
15,0.84,0.33,0.36,0.58,0.44,0.61,0.47,0.77,0.49,0.37
20,0.91,0.36,0.48,0.73,0.6,0.84,0.56,0.79,0.62,0.53


In [8]:
means = [sample.stderr_df.mean() for sample in vehicle_sample_set]
standard_errors = [sem(sample.Combined_MPG) for sample in vehicle_sample_set]
x_axis = np.arange(0, len(vehicle_sample_set), 1) + 1



NameError: name 'vehicle_sample_set' is not defined

In [None]:
# Generate the Plot (with Error Bars)

fig, ax = plt.subplots()
ax.errorbar(mean_df, stderr_df, fmt="o")
ax.set_xlim(0,45)
ax.set_ylim(0,2)


ax.set_xlabel("Time (Days)")
ax.set_ylabel("Tumor Volume(mm3)")
plt.title('Tumor Response to Treatment')

plt.show()


# Save the Figure

In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [21]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 



mean_df3=pd.pivot_table(merged_df, values='Metastatic Sites',
               index=['Drug'], 
               columns='Timepoint',aggfunc='mean').T
mean_df3

# Convert to DataFrame
pd.DataFrame(mean_df3)


# Preview DataFrame
pd.DataFrame(mean_df3).head()


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.16,0.380952,0.28,0.304348,0.26087,0.375,0.32,0.12,0.24,0.166667
10,0.32,0.6,0.666667,0.590909,0.52381,0.833333,0.565217,0.25,0.478261,0.5
15,0.375,0.789474,0.904762,0.842105,0.857143,1.25,0.764706,0.333333,0.782609,0.809524
20,0.652174,1.111111,1.05,1.210526,1.15,1.526316,1.0,0.347826,0.952381,1.294118


In [23]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 

stderr_df3=pd.pivot_table(merged_df, values='Metastatic Sites',
               index=['Drug'], 
               columns='Timepoint',aggfunc='sem').T
stderr_df3

# Convert to DataFrame

pd.DataFrame(stderr_df3)

# Preview DataFrame
pd.DataFrame(stderr_df3).head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.074833,0.108588,0.091652,0.0981,0.093618,0.100947,0.095219,0.066332,0.087178,0.077709
10,0.125433,0.152177,0.159364,0.142018,0.163577,0.115261,0.10569,0.090289,0.123672,0.109109
15,0.132048,0.180625,0.194015,0.191381,0.158651,0.190221,0.136377,0.115261,0.153439,0.111677
20,0.161621,0.241034,0.234801,0.23668,0.181731,0.234064,0.171499,0.11943,0.200905,0.166378


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview that Reformatting worked


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure


![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [27]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

mice_count=pd.pivot_table(merged_df, values='Mouse ID',
               index=['Drug'], 
               columns='Timepoint',aggfunc='count').T
mice_count


# Convert to DataFrame
pd.DataFrame(mice_count)

# Preview DataFrame
pd.DataFrame(mice_count).head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17


In [None]:
# Minor Data Munging to Re-Format the Data Frames


# Preview the Data Frame


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [32]:
# Calculate the percent changes for each drug
mice_count=pd.pivot_table(merged_df, values='Mouse ID',
               index=['Drug'], 
               columns='Timepoint',aggfunc='percent').T

# Display the data to confirm
merged_df.pct_change()

TypeError: unsupported operand type(s) for /: 'str' and 'str'

In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

![Metastatic Spread During Treatment](../Images/change.png)