In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_data = pd.read_csv(mouse_drug_data_to_load)
clinical_data = pd.read_csv(clinical_trial_data_to_load)
# Combine the data into a single dataset
merged_data=pd.merge(clinical_data, mouse_data, on="Mouse ID", how="outer")

# Display the data table for preview
merged_data


In [None]:
drug=merged_data["Drug"]
timepoint=merged_data["Timepoint"]

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
grouped_data=merged_data.groupby(["Drug", "Timepoint"])
grouped_data.count()
grouped_data1=grouped_data["Tumor Volume (mm3)"].mean()
grouped_data1.head()
# Convert to DataFrame
tumor_df= pd.DataFrame(grouped_data1)
# Preview DataFrame
tumor_df.reset_index()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
standard_error= grouped_data["Tumor Volume (mm3)"].sem()
# Convert to DataFrame
sterr_df= pd.DataFrame(standard_error)
# Preview DataFrame
sterr_df


In [None]:
# Minor Data Munging to Re-Format the Data Frames
tumor_df_pivot = pd.pivot_table(tumor_df,values='Tumor Volume (mm3)',
                                index=['Timepoint'], columns=["Drug"])
# Preview that Reformatting worked
tumor_df_pivot

In [None]:
grouped_data1 = merged_data.groupby(['Drug','Timepoint'])

tumor_df = pd.DataFrame(grouped_data1['Tumor Volume (mm3)'].mean())
tumor_sem = grouped_data1['Tumor Volume (mm3)'].sem()

mean_met_df = pd.DataFrame(grouped_data1['Metastatic Sites'].mean())
sterr_met_sem = grouped_data1['Metastatic Sites'].sem()

tumor_unstack = tumor_df.unstack(0)
mean_met_unstack = mean_met_df.unstack(0)

x_axis = tumor_unstack.index

In [None]:
drugs = ['Capomulin','Ketapril','Infubinol','Placebo']
colors = ['red','green','blue','black']
markers = ['o','^','s','D']

fig, ax = plt.subplots()

for i in drugs:
    k = drugs.index(i)
    ax.errorbar(x_axis, tumor_unstack['Tumor Volume (mm3)'][i], yerr=tumor_sem[i], marker=markers[k], color=colors[k],
                       linestyle='--', capsize=5,)

ax.set_title('Response to Treatment')
ax.legend()
ax.set_xlim(0, 45)
ax.set_ylim(35, 75)
ax.grid(linestyle=':')
ax.set_ylabel('Tumor Volume (mm3)')
ax.set_xlabel('Time Elapsed (Days)')



## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
met=merged_data.groupby(["Drug","Timepoint"])
mean_met=met["Metastatic Sites"].mean()
mean_met
# Convert to DataFrame
mean_met_df=pd.DataFrame(mean_met)
# Preview DataFrame
mean_met_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
standard_err_met=met["Metastatic Sites"].sem()
# Convert to DataFrame
sterr_met_df=pd.DataFrame(standard_err_met)
# Preview DataFrame
sterr_met_df

In [None]:
# Minor Data Munging to Re-Format the Data Frames
mean_met_df_pivot = pd.pivot_table(mean_met_df,values='Metastatic Sites',
                                index=['Timepoint'], columns=["Drug"])
# Preview that Reformatting worked
mean_met_df_pivot

In [None]:
# Generate the Plot (with Error Bars)


drugs = ['Capomulin','Ketapril','Infubinol','Placebo']
colors = ['red','green','blue','black']
markers = ['o','^','s','D']

fig, ax = plt.subplots()

for i in drugs:
    k = drugs.index(i)
    ax.errorbar(x_axis, mean_met_unstack['Metastatic Sites'][i], yerr=sterr_met_df[i], marker=markers[k], color=colors[k],
                       linestyle='--', capsize=5,)


ax.set_title('Response to Treatment')
ax.legend()
ax.set_xlim(0, 45)
ax.set_ylim(0.0,4.0)
ax.grid(linestyle=':')
ax.set_ylabel('Metastatic Sites')
ax.set_xlabel('Time Elapsed (Days)')


## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mice=merged_data.groupby(["Drug","Timepoint"])
mouse_count=mice["Mouse ID"].unique()
mouse_count
# Convert to DataFrame
mouse_df=pd.DataFrame(mouse_count)

# Preview DataFrame
mouse_df

In [None]:
# Minor Data Munging to Re-Format the Data Frames

survival_rate = pd.pivot_table(merged_data, values='Tumor Volume (mm3)', columns=['Drug'], index=['Timepoint'], aggfunc='count')
sr_df = survival_rate[['Capomulin','Ketapril','Infubinol','Placebo']]

survival_rate


In [None]:
# Generate the Plot (Accounting for percentages)
drugs = ['Capomulin','Ketapril','Infubinol','Placebo']
colors = ['red','green','blue','black']
markers = ['o','^','s','D']

fig, ax = plt.subplots()


for i in drugs:
    n = drugs.index(i)
    ax.errorbar(x_axis, ((sr_df[i] / sr_df[i][0])*100) , marker=markers[n], color=colors[n],
                       linestyle='--', capsize=5, mec = 'black')



ax.set_title('Survival During Treatment')
ax.legend()
ax.set_xlim(0, 45)
ax.set_ylim(30, 100)
ax.grid()
ax.set_ylabel('Survival Rate (%)')
ax.set_xlabel('Time Elapsed (Days)')

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug
percent_change = (tumor_df_pivot.loc[45, :] - tumor_df_pivot.loc[0, :])/tumor_df_pivot.loc[0, :] * 100

# Display the data to confirm
percent_change

#Plotting the Bar Graph

summary_pt = pd.pivot_table(merged_data, values='Tumor Volume (mm3)', columns=['Drug'], index=['Timepoint'], aggfunc='mean')
summary_df = summary_pt[['Capomulin','Ketapril','Infubinol','Placebo']]
summary_pct_list = []
growth_dir = []
pct_labels = []

for i in drugs_to_plot:
    start = summary_df[i][summary_df.index[0]]
    end = summary_df[i][summary_df.index[-1]]
    summary_pct = ((end - start) / start) * 100
    summary_pct_list.append(summary_pct)
    if summary_pct > 0:
        growth_dir.append('red')
    elif summary_pct < 0:
        growth_dir.append('green')
        
summary_fig, summary_ax = plt.subplots()
find_xlim = len(summary_pct_list) -0.5
summary_bar = summary_ax.bar(drugs_to_plot, summary_pct_list, align='center',width=1)


summary_ax.set_ylabel('Percent Tumor Volume Change')
summary_ax.grid()
summary_ax.set_xlim(-0.5, find_xlim, 1)
summary_ax.set_title('Tumor Change Over 45 Day Treatment')


for i in np.arange(0,len(drugs_to_plot)):
    summary_bar[i].set_color(growth_dir[i])
    summary_bar[i].set_label(summary_pct_list[i])
    pct_labels.append(str(summary_pct_list[i].round(1))+'%')
    for bar, label in zip(summary_bar, pct_labels):
        if bar.get_height() > 0:
            height = 6
        elif bar.get_height() < 0:  
            height = -10
        summary_ax.text(bar.get_x() + bar.get_width() / 2, height, label, ha='center', va='bottom', color='white')



##Observations
1. out of the 4 drugs that were plotted, Capomulin seemed to give mice the best survival rate.
2. the tumor volume didnt change/increased for infubinol, ketapril, or the placebo
3. from the pivot table on tumor volume, it was also observed that Ramicane also had a positive effect on the decrease on tumor volume over time.