In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')


In [2]:
# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_trials_df = pd.read_csv("./data/mouse_drug_data.csv")
clinical_trials_df = pd.read_csv("./data/clinicaltrial_data.csv")

# Combine the data into a single dataset
all_trials_df = pd.merge(mouse_trials_df, clinical_trials_df, on = ["Mouse ID", "Mouse ID"])

# Display the data table for preview
all_trials_df.head()


Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [6]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint
mean_tumor_volume_gp = all_trials_df.groupby(["Drug", "Timepoint"]).mean()[["Tumor Volume (mm3)"]]
# Double brackets are StackOverflow's idea to make a groupby object into a dataframe in one line.

# Preview new dataframe
mean_tumor_volume_gp

Unnamed: 0_level_0,Tumor Volume (mm3)
Drug,Unnamed: 1_level_1
Capomulin,40.675741
Ceftamin,52.591172
Infubinol,52.957935
Ketapril,55.235638
Naftisol,54.331565
Placebo,54.033581
Propriva,52.322552
Ramicane,40.216745
Stelasyn,54.106357
Zoniferol,53.236507


In [8]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
# Define standard errors
from scipy.stats import sem

standard_errors = all_trials_df.groupby(["Drug", "Timepoint"]).sem()["Tumor Volume (mm3)"]


Drug
Capomulin    0.329346
Ceftamin     0.469821
Infubinol    0.483691
Ketapril     0.603860
Naftisol     0.596466
Placebo      0.581331
Propriva     0.512884
Ramicane     0.320955
Stelasyn     0.543853
Zoniferol    0.516398
Name: Tumor Volume (mm3), dtype: float64

In [15]:
# Minor Data Munging to Re-Format the Data Frames
mean_tumor_volume_gp.pivot(index = "Timepoint",
                            columns = "Drug",
                            values = "Tumor Volume (mm3)")

# Preview that Reformatting worked
mean_tumor_volume_gp.head()

KeyError: 'Timepoint'

In [None]:
# Generate the Plot (with Error Bars)
plt.errorbar("Drug", "Tumor Volume (mm3)", standard_errors, fmt = 'o')
plt.title("Tumor Response to Treatment")
plt.xlabel("Drug Used")
plt.ylabel("Tumor Volume")
plt.legend(loc="best")

# Save the Figure
plt.savefig(Tumor_Response.png)


In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [27]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint
metastasis_gp = all_trials_df.groupby(["Drug", "Timepoint"]).mean()[["Metastatic Sites"]]
# Double brackets are StackOverflow's idea to make a groupby object into a dataframe in one line.

# Preview new dataframe

Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.16
Capomulin,10,0.32
Capomulin,15,0.375
Capomulin,20,0.652174


In [29]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
drug_name_meta = all_trials_df["Drug"]
time_point_meta = all_trials_df["Timepoint"]
standard_error_meta = all_trials_df.groupby(["Drug", "Timepoint"]).sem()["Metastatic Sites"]


In [7]:
# Convert to DataFrame

# Preview DataFrame


Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.074833
Capomulin,10,0.125433
Capomulin,15,0.132048
Capomulin,20,0.161621


In [None]:
# Minor Data Munging to Re-Format the Data Frames
metastasis_gp.pivot(index = "Timepoint",
                            columns = "Drug",
                            values = "Metastatic Sites")

# Preview that Reformatting worked
metastasis_gp.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [None]:
# Generate the Plot (with Error Bars)
plt.errorbar("Drug", "Metastatic Sites", standard_errors, fmt = 'o')
plt.title("Metastatic Spread During Treatment")
plt.xlabel("Drug Used")
plt.ylabel("Metastatic Sites")
plt.legend(loc="best")

# Save the Figure
plt.savefig(Metastatic_Sites.png)

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mice_count = all_trials_df.groupby(["Drug", "Timepoint"]).countn()[["Mouse ID"]]
# Convert to DataFrame

# Preview DataFrame


Unnamed: 0,Drug,Timepoint,Mouse Count
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23


In [None]:
# Minor Data Munging to Re-Format the Data Frames
mice_count.pivot(index = "Timepoint",
                            columns = "Drug",
                            values = mice_count)

# Preview the Data Frame


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


Drug
Capomulin   -19.475303
Ceftamin     42.516492
Infubinol    46.123472
Ketapril     57.028795
Naftisol     53.923347
Placebo      51.297960
Propriva     47.241175
Ramicane    -22.320900
Stelasyn     52.085134
Zoniferol    46.579751
dtype: float64

In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

![Metastatic Spread During Treatment](../Images/change.png)