In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data

clinical_data = pd.read_csv(clinical_trial_data_to_load)
mouse_data = pd.read_csv(mouse_drug_data_to_load)


# Combine the data into a single dataset

combined_data_df = pd.merge(clinical_data, mouse_data, how="left", on=["Mouse ID", "Mouse ID"])


# Display the data table for preview

combined_data_df.head()



Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,f932,0,45.0,0,Ketapril
2,g107,0,45.0,0,Ketapril
3,a457,0,45.0,0,Ketapril
4,c819,0,45.0,0,Ketapril


## Tumor Response to Treatment

In [2]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
mean_tumor_volume = combined_data_df.groupby(['Drug', 'Timepoint'])
output_df_mean = mean_tumor_volume['Tumor Volume (mm3)'].mean()
output_df_mean.head()
# Convert to DataFrame

output_df = pd.DataFrame(output_df_mean)
output_df = output_df.reset_index()

# Preview DataFrame

output_df

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.000000
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325
...,...,...,...
95,Zoniferol,25,55.432935
96,Zoniferol,30,57.713531
97,Zoniferol,35,60.089372
98,Zoniferol,40,62.916692


In [3]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tumor_standard_errors = mean_tumor_volume.sem()['Tumor Volume (mm3)']

# Convert to DataFrame
avg_tumor_df = pd.DataFrame(tumor_standard_errors).reset_index()

# Preview DataFrame
avg_tumor_df.head() 


Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.0
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731


In [4]:
# Minor Data Munging to Re-Format the Data Frames
mung_avg_tumor_df = avg_tumor_df.pivot(
    index='Timepoint',
    columns='Drug',
    values='Tumor Volume (mm3)')

# Preview that Reformatting worked
mung_avg_tumor_df.head(20)


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.448593,0.164505,0.235102,0.264819,0.202385,0.218091,0.231708,0.482955,0.239862,0.18895
10,0.702684,0.236144,0.282346,0.357421,0.319415,0.402064,0.376195,0.720225,0.433678,0.263949
15,0.838617,0.332053,0.357705,0.580268,0.444378,0.614461,0.466109,0.770432,0.493261,0.370544
20,0.909731,0.359482,0.47621,0.726484,0.59526,0.839609,0.555181,0.786199,0.621889,0.533182
25,0.881642,0.439356,0.550315,0.755413,0.813706,1.034872,0.577401,0.746991,0.741922,0.602513
30,0.93446,0.49062,0.631061,0.934121,0.975496,1.218231,0.746045,0.864906,0.899548,0.800043
35,1.052241,0.692248,0.984155,1.127867,1.013769,1.287481,1.084929,0.967433,1.003186,0.881426
40,1.223608,0.708505,1.05522,1.158449,1.118567,1.370634,1.564779,1.128445,1.410435,0.998515
45,1.223977,0.902358,1.144427,1.453186,1.416363,1.351726,1.888586,1.226805,1.576556,1.003576


In [5]:
# Generate the Plot (with Error Bars)
# x_axis = np.arange(0, 50, 5)

# capo_st_err = plt.errorbar(x_axis, mung_avg_tumor_df["Capomulin"], marker="o", ls="dashed", color ="red", label="Capomulin")
# infu_st_err =  plt.errorbar(x_axis, mung_avg_tumor_df["Infubinol"], marker="^", ls="dashed", color ="blue", label="Infubinol")
# keta_st_err = plt.errorbar(x_axis, mung_avg_tumor_df["Ketapril"], marker="s", ls="dashed", color ="green", label="Ketapril")
# placebo_st_err = plt.errorbar(x_axis, mung_avg_tumor_df["Placebo"], marker="d", ls="dashed",color ="black", label="Placebo")

# plt.ylim(35, 75)
# plt.xlim(-1, 50)

# plt.title("Tumor Response to Treatment", weight='bold', fontsize=15)
# plt.xlabel("Time (Days)", weight='bold', fontsize=10)
# plt.ylabel("Tumor Volume (mm3)", weight='bold', fontsize=10)
# plt.legend()
# plt.grid()
# plt.show()

plt.figure(figsize = (10, 5))


plt.errorbar(x=mung_avg_tumor_df['Timepoint'],y=mung_avg_tumor_df['Capomulin'], yerr=None, linestyle="--", fmt='o')
plt.errorbar(x=mung_avg_tumor_df['Timepoint'],y=mung_avg_tumor_df['Infubinol'], yerr=None, linestyle="--", fmt='o')
plt.errorbar(x=mung_avg_tumor_df['Timepoint'],y=mung_avg_tumor_df['Ketapril'], yerr=None, linestyle="--",fmt='o')
plt.errorbar(x=mung_avg_tumor_df['Timepoint'],y=mung_avg_tumor_df['Placebo'], yerr=None, linestyle="--", fmt='o')

plt.ylabel('Tumor Volume(mm3)')
plt.xlabel('Time (Days)')
plt.title('Tumor Response to Treatment')
plt.grid()
plt.legend()       
plt.show()

# Save the Figure
plt.savefig(os.path.join('figures','tumor_response_to_treatment.png'))


KeyError: 'Timepoint'

<Figure size 720x360 with 0 Axes>

In [6]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [7]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
grouped_met_df = combined_data_df.groupby(['Drug','Timepoint'])['Metastatic Sites']
avg_met_df = grouped_met_df.mean()
# Convert to DataFrame
avg_met_df = avg_met_df.reset_index()

# Preview DataFrame
avg_met_df.head()


Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.16
2,Capomulin,10,0.32
3,Capomulin,15,0.375
4,Capomulin,20,0.652174


In [8]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
met_standard_errors = combined_data_df.sem()

# Convert to DataFrame
met_standard_errors_df = pd.DataFrame(met_standard_errors)

# Preview DataFrame
met_standard_errors_df.reset_index(inplace=True)
met_standard_errors_df.head()

Unnamed: 0,index,0
0,Timepoint,0.32216
1,Tumor Volume (mm3),0.203161
2,Metastatic Sites,0.02602


In [9]:
# Minor Data Munging to Re-Format the Data Frames
pivot_avg_met_df = avg_met_df.pivot(index='Timepoint',columns='Drug',values='Metastatic Sites')

# Preview that Reformatting worked
pivot_avg_met_df.head(20)


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.16,0.380952,0.28,0.304348,0.26087,0.375,0.32,0.12,0.24,0.166667
10,0.32,0.6,0.666667,0.590909,0.52381,0.833333,0.565217,0.25,0.478261,0.5
15,0.375,0.789474,0.904762,0.842105,0.857143,1.25,0.764706,0.333333,0.782609,0.809524
20,0.652174,1.111111,1.05,1.210526,1.15,1.526316,1.0,0.347826,0.952381,1.294118
25,0.818182,1.5,1.277778,1.631579,1.5,1.941176,1.357143,0.652174,1.157895,1.6875
30,1.090909,1.9375,1.588235,2.055556,2.066667,2.266667,1.615385,0.782609,1.388889,1.933333
35,1.181818,2.071429,1.666667,2.294118,2.266667,2.642857,2.3,0.952381,1.5625,2.285714
40,1.380952,2.357143,2.1,2.733333,2.466667,3.166667,2.777778,1.1,1.583333,2.785714
45,1.47619,2.692308,2.111111,3.363636,2.538462,3.272727,2.571429,1.25,1.727273,3.071429


In [10]:
# Generate the Plot (with Error Bars)
drug_format_list = [('Capomulin','o','red'),('Infubinol','^','blue'),('Ketapril','s','green'),('Placebo','d','black')]

for drug,marker,colors in drug_format_list:
    ste = met_standard_errors[drug]
    met_treatment_plt = plt.errorbar(pivot_avg_met_df.index,pivot_avg_met_df[drug],ste,
                                       fmt=marker,ls='--',color=colors,linewidth=0.5)
# Save the Figure
plt.legend(loc='best')
plt.title('Metastatic Spread During Treatment')
plt.xlabel('Treatment Duration (Days)')
plt.ylabel('Met. Sites')
plt.grid()
# Show the Figure
plt.savefig(os.path.join('figures','metastic_spread_during_treatment.png'))


KeyError: 'Capomulin'

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [11]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mouse_grouped_df = complete_df.groupby(['Drug','Timepoint'])['Mouse ID']
count_mouse_df = mouse_grouped_df.nunique()
count_mouse_df = pd.DataFrame(count_mouse_df)
# Convert to DataFrame
count_mouse_df.reset_index(inplace=True)
count_mouse_df=count_mouse_df.rename(columns={'Mouse ID':'Mouse Count'})
# Preview DataFrame
count_mouse_df.head()


NameError: name 'complete_df' is not defined

In [12]:
# Minor Data Munging to Re-Format the Data Frames
pivot_count_mouse_df = count_mouse_df.pivot(index='Timepoint',columns='Drug',values='Mouse Count')

# Preview the Data Frame
pivot_count_mouse_df.head()


NameError: name 'count_mouse_df' is not defined

In [13]:
# Generate the Plot (Accounting for percentages)
for drug,marker,colors in drug_format_list:
    total_mouse = pivot_count_mouse_df[drug][0]
    survival_rate = (pivot_count_mouse_df[drug]/total_mouse)*100
    survival_treatment_plt = plt.plot(pivot_count_mouse_df.index,survival_rate,
                                       marker=marker,ls='--',color=colors,linewidth=0.5)
# Save the Figure
plt.legend(loc='best')
plt.title('Survival During Treatment')
plt.xlabel('Times (Days)')
plt.ylabel('Survival Rate (%)')
plt.grid()

# Show the Figure
plt.savefig(os.path.join('figures','survival_during_treatment.png'))


plt.show()

NameError: name 'pivot_count_mouse_df' is not defined

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [14]:
# Calculate the percent changes for each drug
percentage_change = (pivot_avg_tumor_df.iloc[-1]/(pivot_avg_tumor_df.iloc[0])-1)*100

# Display the data to confirm
percentage_change


NameError: name 'pivot_avg_tumor_df' is not defined

In [15]:
# Store all Relevant Percent Changes into a Tuple
passing = percentage_change < 0


# Splice the data between passing and failing drugs
drug_list = ['Capomulin','Infubinol','Ketapril','Placebo']
change_list = [(percentage_change[durg])for durg in drug_list]
change_plt = plt.bar(drug_list,change_list,width=-1,align='edge',color=passing.map({True:'g',False:'r'}))
plt.grid()
plt.ylim(-30,70)
plt.ylabel('% Tumor Volume Change')
plt.title('Tumor Change over 45 Day Treatment')

# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

NameError: name 'percentage_change' is not defined

![Metastatic Spread During Treatment](../Images/change.png)