In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem
import os

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_data = pd.read_csv(mouse_drug_data_to_load)
clinical_data = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
mouse_clin = pd.merge(clinical_data, mouse_data, on='Mouse ID')

# Display the data table for preview
mouse_clin.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


In [2]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
tumorGrp = mouse_clin.groupby(['Drug', 'Timepoint'])
tumorVol = tumorGrp.mean()
tumorVol = tumorVol.reset_index()
del tumorVol['Metastatic Sites']

# Convert to DataFrame
tumorVol = pd.DataFrame(tumorVol)

# Preview DataFrame
tumorVol.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [3]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tumorErr = tumorGrp.sem()
tumorErr = tumorErr.reset_index()
del tumorErr['Metastatic Sites']
del tumorErr['Mouse ID']

# Convert to DataFrame
tumorErr = pd.DataFrame(tumorErr)

# Preview DataFrame
tumorErr.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.0
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731


In [4]:
# Minor Data Munging to Re-Format the Data Frames
tumorVolTab = pd.pivot_table( tumorVol,
                                  index = tumorVol['Timepoint'], 
                                  columns = ['Drug'], 
                                   values = ['Tumor Volume (mm3)'] )
tumorVolTab.columns = tumorVolTab.columns.droplevel()
tumorVolTab = tumorVolTab.reset_index()
tumorVolErrTab = pd.pivot_table( tumorErr,
                                  index = tumorErr['Timepoint'], 
                                  columns = ['Drug'], 
                                   values = ['Tumor Volume (mm3)'] )
tumorVolErrTab.columns = tumorVolErrTab.columns.droplevel()
tumorVolErrTab = tumorVolErrTab.reset_index()

# Preview that Reformatting worked
tumorVolTab

Drug,Timepoint,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
0,0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
1,5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
2,10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
3,15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
4,20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
5,25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
6,30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
7,35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
8,40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
9,45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888
