In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Read in the data
mouse_data = pd.read_csv("Data/mouse_drug_data.csv")
trial_data = pd.read_csv("Data/clinical_trial_data.csv")

In [21]:
# Combine the data into a single dataset
mouse_trial_data = pd.merge(trial_data, mouse_data, on="Mouse ID")
mouse_trial_data.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


In [196]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
drug_time_tumor = mouse_trial_data.groupby(["Drug", "Timepoint"])
mean_tumor_volume = drug_time_tumor["Tumor Volume (mm3)"].mean()

# Convert to DataFrame
dtt_df = pd.DataFrame({"Tumor Volume (mm3)":mean_tumor_volume}).groupby(["Drug", "Timepoint"])
dtt_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.000000
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325
Capomulin,25,39.939528
Capomulin,30,38.769339
Capomulin,35,37.816839
Capomulin,40,36.958001
Capomulin,45,36.236114


In [197]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tum_vol_sem = drug_time_tumor.sem()

# Convert to DataFrame
dtt_sem_df = pd.DataFrame({"Tumor Volume (mm3)":tum_vol_sem["Tumor Volume (mm3)"]})
dtt_sem_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.448593
Capomulin,10,0.702684
Capomulin,15,0.838617
Capomulin,20,0.909731


In [205]:
# Minor Data Munging to Re-Format the Data Frames
# Drug/Time/Tumor
dtt_reformatted = mouse_trial_data.groupby(["Timepoint", "Drug"])
mtv_2 = dtt_reformatted["Tumor Volume (mm3)"].mean()
dtt_unstacked = mtv_2.unstack()

# Standard Error
sem_reformatted = mouse_trial_data.groupby(["Timepoint", "Drug"])
sem_2 = sem_reformatted["Tumor Volume (mm3)"].sem()
sem_unstacked = dtt_sem_df.unstack()

# Extract column names for use in the errorbar plot
dtt_columns = list(dtt_unstacked)
sem_columns = list(sem_unstacked)

# Preview that Reformatting worked
sem_reformatted.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.000000,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin
5,b128,25,43.262145,1,Capomulin
6,b128,30,40.605335,1,Capomulin
7,b128,35,37.967644,1,Capomulin
8,b128,40,38.379726,2,Capomulin
9,b128,45,38.982878,2,Capomulin


In [None]:
# Generate the Plot (with Error Bars)
fig, ax = plt.subplots()

ax.errorbar(np.arange(0, len(dtt_unstacked)), dtt_unstacked, yerr=sem, fmt="o", color="b",
            alpha=0.5, label="Tumor Response to Treatment")

ax.set_xlim(-0.5, len(means))

ax.set_xlabel("Time (days)")
ax.set_ylabel("Tumor Volume (mm3)")

plt.legend(loc="best", fontsize="small", fancybox=True)

# Save the Figure
