In [100]:
%matplotlib inline

In [101]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [102]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load 
mouse_drug_data = "Data/mouse_drug_data.csv"
clinical_trial_data = "Data/clinicaltrial_data.csv"

In [103]:
# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_data = pd.read_csv(mouse_drug_data)
clinical_data = pd.read_csv(clinical_trial_data) 
mouse_data

Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn
5,o848,Stelasyn
6,z314,Stelasyn
7,g989,Stelasyn
8,m269,Stelasyn
9,a963,Stelasyn


In [104]:
# Combine the data into a single dataset
combined_data_df = pd.merge(clinical_data, mouse_data, how='outer', on='Mouse ID')

# Reduce data to look at effects of Capomulin, Infubinol, Ketapril, and Placebo
drugs = ['Capomulin', 'Infubinol', 'Ketapril', 'Placebo']
reduced_data_df = combined_data_df.loc[combined_data_df['Drug'].isin(drugs)]
reduced_data_df

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.000000,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin
5,b128,25,43.262145,1,Capomulin
6,b128,30,40.605335,1,Capomulin
7,b128,35,37.967644,1,Capomulin
8,b128,40,38.379726,2,Capomulin
9,b128,45,38.982878,2,Capomulin


In [95]:
# Group by Drug and Timepoint columns and 
#Calculate the mean for each Tumor Volume Data in group
tumor_mean = reduced_data_df.groupby(['Drug', 'Timepoint']).mean()['Tumor Volume (mm3)']

# Convert to DataFrame
tumor_mean_df = pd.DataFrame(tumor_mean)

# Preview DataFrame
tumor_mean_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.0
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325
Capomulin,25,39.939528
Capomulin,30,38.769339
Capomulin,35,37.816839
Capomulin,40,36.958001
Capomulin,45,36.236114


In [105]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tumor_error = reduced_data_df.groupby(['Drug', 'Timepoint']).sem()['Tumor Volume (mm3)']
# Convert to DataFrame
tumor_error_df = pd.DataFrame(tumor_error)
# Preview DataFrame
tumor_error_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.448593
Capomulin,10,0.702684
Capomulin,15,0.838617
Capomulin,20,0.909731
Capomulin,25,0.881642
Capomulin,30,0.93446
Capomulin,35,1.052241
Capomulin,40,1.223608
Capomulin,45,1.223977


In [108]:
# Re-Format the Data Frames, index by timepoint, columns by drug name and tumor mean and error values by df
mean_reformat = tumor_mean_df.reset_index()
mean_pivot = mean_reformat.pivot(index='Timepoint', columns='Drug')['Tumor Volume (mm3)']

error_reformat = tumor_error_df.reset_index()
error_pivot = error_reformat.pivot(index='Timepoint', columns='Drug')['Tumor Volume (mm3)']


# Preview data sets
mean_pivot.head()

Drug,Capomulin,Infubinol,Ketapril,Placebo
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,45.0,45.0,45.0,45.0
5,44.266086,47.062001,47.389175,47.125589
10,43.084291,49.403909,49.582269,49.423329
15,42.064317,51.296397,52.399974,51.359742
20,40.716325,53.197691,54.920935,54.364417
