In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)

mouse_drug_data_to_load = os.path.join("data", "mouse_drug_data.csv")
clinical_trial_data_to_load = os.path.join("data", "clinicaltrial_data.csv")

# Read the Mouse and Drug Data and the Clinical Trial Data

mouse_drug_data = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_data = pd.read_csv(clinical_trial_data_to_load)                                           

# Combine the data into a single dataset
data = pd.merge(mouse_drug_data, clinical_trial_data, on="Mouse ID")

# Display the data table for preview
data.head()


Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


In [2]:
data["Drug"].unique() 


array(['Stelasyn', 'Propriva', 'Naftisol', 'Ketapril', 'Capomulin',
       'Infubinol', 'Ceftamin', 'Zoniferol', 'Ramicane', 'Placebo'],
      dtype=object)

In [3]:
tumor_response = pd.DataFrame(data.groupby(["Drug", "Timepoint"]).mean()["Tumor Volume (mm3)"])
tumor_response


Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.000000
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325
...,...,...
Zoniferol,25,55.432935
Zoniferol,30,57.713531
Zoniferol,35,60.089372
Zoniferol,40,62.916692


In [4]:
tumor_response.drop(['Stelasyn', 'Propriva', 'Naftisol', 'Ceftamin', 'Zoniferol', 'Ramicane' ])


Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.0
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325
Capomulin,25,39.939528
Capomulin,30,38.769339
Capomulin,35,37.816839
Capomulin,40,36.958001
Capomulin,45,36.236114


In [5]:
standard_error = data.sem(axis=1)
standard_error
data = data.assign(Sem=standard_error)
data.head()


Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites,Sem
0,f234,Stelasyn,0,45.0,0,15.0
1,f234,Stelasyn,5,47.313491,0,15.007402
2,f234,Stelasyn,10,47.904324,0,14.589879
3,f234,Stelasyn,15,48.735197,1,14.167083
4,f234,Stelasyn,20,51.112713,2,14.345072


In [6]:
data.sort_values(by=["Drug", "Timepoint", "Tumor Volume (mm3)"])


Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites,Sem
581,b128,Capomulin,0,45.000000,0,15.000000
591,r944,Capomulin,0,45.000000,0,15.000000
601,s185,Capomulin,0,45.000000,0,15.000000
611,w914,Capomulin,0,45.000000,0,15.000000
621,l897,Capomulin,0,45.000000,0,15.000000
...,...,...,...,...,...,...
1437,g296,Zoniferol,45,68.163195,4,18.759367
1424,w575,Zoniferol,45,68.401286,3,19.132503
1339,q511,Zoniferol,45,68.611061,3,19.186647
1410,q633,Zoniferol,45,70.827796,2,20.073947


In [10]:
response_treatment = data[["Drug", "Timepoint", "Tumor Volume (mm3)"]]
response_treatment.sort_values(by=["Drug"])


Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
584,Capomulin,15,43.784893
727,Capomulin,5,45.595685
728,Capomulin,10,43.421014
729,Capomulin,15,37.978778
730,Capomulin,20,38.531136
...,...,...,...
1422,Zoniferol,35,63.686445
1421,Zoniferol,30,60.153065
1420,Zoniferol,25,58.587322
1392,Zoniferol,25,54.209836


In [8]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint and convert to a DataFrame
tumor_response_treatment = pd.DataFrame(data.groupby(["Drug", "Timepoint"]).agg(["mean", "sem"])["Tumor Volume (mm3)"])

tumor_response_treatment


Unnamed: 0_level_0,Unnamed: 1_level_0,mean,sem
Drug,Timepoint,Unnamed: 2_level_1,Unnamed: 3_level_1
Capomulin,0,45.000000,0.000000
Capomulin,5,44.266086,0.448593
Capomulin,10,43.084291,0.702684
Capomulin,15,42.064317,0.838617
Capomulin,20,40.716325,0.909731
...,...,...,...
Zoniferol,25,55.432935,0.602513
Zoniferol,30,57.713531,0.800043
Zoniferol,35,60.089372,0.881426
Zoniferol,40,62.916692,0.998515
