In [55]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drug_data = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_data = pd.read_csv(clinical_trial_data_to_load)




In [56]:
# Combine the data into a single dataset
combine_data_df = pd.merge(mouse_drug_data, clinical_trial_data, on="Mouse ID", how="left")

#replace all NAN values with 0
combine_data_df = combine_data_df.fillna(0)

# Display the data table for preview

combine_data_df.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [57]:
# Create new data frame 
tumor_data_timepoint = combine_data_df[["Drug", "Timepoint","Tumor Volume (mm3)"]]

# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
tumor_volume_data = tumor_data_timepoint.groupby(["Drug", "Timepoint"])

# Calculate the mean
tumor_volume_data_mean = tumor_volume_data["Tumor Volume (mm3)"].mean()
#tumor_volume_data_mean.head()

#tumor_volume_data_mean.head(100)


# Convert to DataFrame
tumor_response_treatment_df = pd.DataFrame(tumor_volume_data_mean)
tumor_response_treatment_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.000000
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325
...,...,...
Zoniferol,25,55.432935
Zoniferol,30,57.713531
Zoniferol,35,60.089372
Zoniferol,40,62.916692


In [58]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint

tumor_volume_data_sd = tumor_volume_data["Tumor Volume (mm3)"].sem()

# Convert to DataFrame

tumor_volume_data_sd_df = pd.DataFrame(tumor_volume_data_sd)

# Preview DataFrame
tumor_volume_data_sd_df.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.448593
Capomulin,10,0.702684
Capomulin,15,0.838617
Capomulin,20,0.909731


In [59]:
# Minor Data Munging to Re-Format the Data Frames
#tumor_response_treatment_sum_df = tumor_response_treatment_df.set_index("Timepoint")

tumor_response_treatment_sum_df = tumor_response_treatment_df.reset_index()

#tumor_response_treatment_sum_df.head()
# Preview that Reformatting worked

#tumor_response_treatment_sum_df = tumor_response_treatment_sum_df.pivot(columns="Drug", values="Tumor Volume (mm3)")
#tumor_response_treatment_sum_df.head()

tumor_response_treatment_sum_df = tumor_response_treatment_sum_df.pivot(index="Timepoint",columns="Drug",values="Tumor Volume (mm3)")
tumor_response_treatment_sum_df.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure



In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [60]:
# Create new data frame 
met_data_timepoint = combine_data_df[["Drug", "Timepoint","Metastatic Sites"]]

# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
met_volume_data = met_data_timepoint.groupby(["Drug", "Timepoint"])

# Calculate the mean
met_volume_data_mean = met_volume_data["Metastatic Sites"].mean()
#tumor_volume_data_mean.head()

#tumor_volume_data_mean.head(100)


# Convert to DataFrame
met_response_treatment_df = pd.DataFrame(met_volume_data_mean)
met_response_treatment_df.tail(20)


Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Stelasyn,0,0.0
Stelasyn,5,0.24
Stelasyn,10,0.478261
Stelasyn,15,0.782609
Stelasyn,20,0.952381
Stelasyn,25,1.157895
Stelasyn,30,1.388889
Stelasyn,35,1.5625
Stelasyn,40,1.583333
Stelasyn,45,1.727273


In [61]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 

met_volume_data_sd = met_volume_data["Metastatic Sites"].sem()

# Convert to DataFrame

met_volume_data_sd_df = pd.DataFrame(met_volume_data_sd)

# Preview DataFrame
met_volume_data_sd_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.074833
Capomulin,10,0.125433
Capomulin,15,0.132048
Capomulin,20,0.161621


In [62]:
# Minor Data Munging to Re-Format the Data Frames

met_response_treatment_sum_df = met_response_treatment_df.reset_index()

#met_response_treatment_sum_df.head()

# Preview that Reformatting worked


met_response_treatment_sum_df = met_response_treatment_sum_df.pivot(index="Timepoint",columns="Drug",values="Metastatic Sites")

met_response_treatment_sum_df.head()


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.16,0.380952,0.28,0.304348,0.26087,0.375,0.32,0.12,0.24,0.166667
10,0.32,0.6,0.666667,0.590909,0.52381,0.833333,0.565217,0.25,0.478261,0.5
15,0.375,0.789474,0.904762,0.842105,0.857143,1.25,0.764706,0.333333,0.782609,0.809524
20,0.652174,1.111111,1.05,1.210526,1.15,1.526316,1.0,0.347826,0.952381,1.294118


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure


![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [66]:


# Create new data frame 
mouse_data_timepoint = combine_data_df[["Drug", "Timepoint","Mouse ID"]]

# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric) 
mouse_data = mouse_data_timepoint.groupby(["Drug", "Timepoint"])

mouse_data_count = mouse_data["Mouse ID"].count()
# Convert to DataFrame

mouse_survival_df = pd.DataFrame(mouse_data_count)
#mouse_survival_df.head()

mouse_survival_df = mouse_survival_df.rename(columns={"Mouse ID":"Mouse Count"})

mouse_survival_df.head()

# Preview DataFrame


Unnamed: 0_level_0,Unnamed: 1_level_0,Mouse Count
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,25
Capomulin,5,25
Capomulin,10,25
Capomulin,15,24
Capomulin,20,23


Unnamed: 0,Drug,Timepoint,Mouse Count
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23


In [70]:
# Minor Data Munging to Re-Format the Data Frames

mouse_survival_sum_df = mouse_survival_df.reset_index()

# Preview the Data Frame


mouse_survival_sum_df = mouse_survival_sum_df.pivot(index="Timepoint",columns="Drug",values="Mouse Count")

mouse_survival_sum_df.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


Drug
Capomulin   -19.475303
Ceftamin     42.516492
Infubinol    46.123472
Ketapril     57.028795
Naftisol     53.923347
Placebo      51.297960
Propriva     47.241175
Ramicane    -22.320900
Stelasyn     52.085134
Zoniferol    46.579751
dtype: float64

In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

![Metastatic Spread During Treatment](../Images/change.png)