In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random

In [None]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [None]:
# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

mouse_drug_df = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_df = pd.read_csv(clinical_trial_data_to_load)

In [None]:
# Combine the data into a single dataset
combined_df = pd.merge(clinical_trial_df,mouse_drug_df, how= "left")
# Display the data table for preview
combined_df

## Tumor Response to Treatment

In [None]:
grouped_drug = combined_df[["Timepoint", "Tumor Volume (mm3)", "Drug"]]
grouped_drug


In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
grouped_drug_mean = pd.DataFrame(grouped_drug.groupby(["Drug","Timepoint"],as_index = False).mean())
grouped_drug_mean

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
grouped_drug_sem = pd.DataFrame(grouped_drug.groupby(["Drug","Timepoint"]).sem().reset_index())
grouped_sem_reform = grouped_drug_sem.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
grouped_sem_reform

In [None]:
# Minor Data Munging to Re-Format the Data Frames
grouped_mean_reform = grouped_drug_mean.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')

# Preview that Reformatting worked
grouped_mean_reform

In [None]:
x_axis = list(grouped_mean_reform.index)

In [None]:
mean_drug = []
for x in grouped_mean_reform:
    mean_drug.append(list(grouped_mean_reform[x]))


In [None]:
standard_errors_drug = []
for x in grouped_sem_reform:
    standard_errors_drug.append(list(grouped_sem_reform[x]))


In [None]:
columns = [x for x in grouped_mean_reform]


In [None]:
formats = [':o','--v','-^','-.<','-s']

fig, ax = plt.subplots(figsize=(15, 10))

i = 0
for x in columns:
    ax.errorbar(x_axis, mean_drug[i], standard_errors_drug[i], fmt=random.choice(formats) )
    i=i+1

handles = columns
ax.set_title('Tumor Response to Treatment', fontweight = "bold", fontsize = 15)
ax.set_xlim(-1,46)
ax.set_xlabel("Time (Days)")
ax.set_ylabel("Tumor Volume (mm3)")
ax.legend(handles, loc="best", prop={'size': 12})
ax.grid(axis = "y")
plt.savefig("Images/01_tumor_response.png", dpi=fig.dpi)
plt.show()

## Metastatic Response to Treatment

In [None]:
grouped_met = combined_df[["Timepoint", "Metastatic Sites", "Drug"]]
grouped_met

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
grouped_met_mean = pd.DataFrame(grouped_met.groupby(["Drug","Timepoint"],as_index = False).mean())
grouped_met_mean

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
grouped_met_sem = pd.DataFrame(grouped_met.groupby(["Drug","Timepoint"]).sem().reset_index())
grouped_met_sem

In [None]:
# Convert to DataFrame
grouped_met_sem_reform = grouped_met_sem.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')
grouped_met_sem_reform

# Preview DataFrame

In [None]:
# Minor Data Munging to Re-Format the Data Frames
grouped_met_mean_reform = grouped_met_mean.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')

# Preview that Reformatting worked
grouped_met_mean_reform

In [None]:
mean_met = []
for x in grouped_met_mean_reform:
    mean_met.append(list(grouped_met_mean_reform[x]))


In [None]:
standard_errors_met = []
for x in grouped_met_sem_reform:
    standard_errors_met.append(list(grouped_met_sem_reform[x]))


In [None]:
formats = [':o','--v','-^','-.<','-s']

fig, ax = plt.subplots(figsize=(15, 10))

i = 0
for x in columns:
    ax.errorbar(x_axis, mean_met[i], standard_errors_met[i], fmt=random.choice(formats) )
    i=i+1

handles = columns
ax.set_title('Metastatic Spread During Treatment', fontweight = "bold", fontsize = 15)
#ax.setfigsize()
ax.set_xlim(-1,46)
ax.set_xlabel("Treatment Duration (Days)")
ax.set_ylabel("Metastatic Sites")
ax.legend(handles, loc="best", prop={'size': 12})
ax.grid(axis = "y")
plt.tight_layout()
plt.savefig("Images/02_spread.png", dpi=fig.dpi)
plt.show()

## Survival Rates

In [None]:
grouped_mouse_count = combined_df[["Timepoint", "Mouse ID", "Drug"]]
grouped_mouse_count

In [None]:
grouped_mouse_count = pd.DataFrame(grouped_mouse_count.groupby(["Drug","Timepoint"],as_index = False).count())
grouped_mouse_count

In [None]:
grouped_mouse_count_reform = grouped_mouse_count.pivot(index='Timepoint', columns='Drug', values='Mouse ID')
grouped_mouse_count_reform

In [None]:
mouse_count = []
for x in grouped_mouse_count_reform:
    mouse_count.append(list(grouped_mouse_count_reform[x]))

In [None]:
# Saving a list with the % values 

i=0
count_percent = []
for x in columns:
    count_drug = mouse_count[i]
    d = 0
    drug_percent = []
    for y in count_drug:
        value = (mouse_count[i][d]/mouse_count[i][0])*100
        #(ex.: mouse_count[0][5] = 22)
        drug_percent.append(value)
        d = d + 1
    count_percent.append(drug_percent)
    i=i+1

In [None]:
formats = ['o','v','^','<','s']

fig, ax = plt.subplots(figsize=(15, 10), )

i = 0
for x in columns:
    ax.plot(x_axis, count_percent[i], marker=random.choice(formats), ls="-")
    i=i+1

handles = columns
ax.set_title('Survival During Treatment', fontweight = "bold", fontsize = 15)
ax.set_xlim(-1,47)
ax.set_xlabel("Treatment Duration (Days)")
ax.set_ylabel("Survival Rates (%)")
ax.legend(handles, loc="best", prop={'size': 12})
plt.tight_layout()
ax.grid()
plt.savefig("Images/03_survival_rates.png", dpi=fig.dpi)
plt.show()

## Summary Bar Graph

In [None]:
grouped_mean_reform

In [None]:
# Saving a list with the % values 
# Display the data to confirm
i=0
drug_percent = []
for x in columns:
    volume_drug = mean_drug[i]
    d = 0
    for y in mean_drug:
        value = ((mean_drug[i][d] - mean_drug[i][0] )/mean_drug[i][0])*100
        d = d + 1
    drug_percent.append(value)
    i=i+1
drug_percent
drug_percent_df = pd.DataFrame(drug_percent, columns)
drug_percent_df.columns = ["change"]
drug_percent_df

In [None]:
# Slice the data between passing and failing drugs
# set placebo as minimum value for relevant failure

selection = (drug_percent_df["change"] < 0) | (drug_percent_df["change"] >= 51.297960)
relevant_drug_df = pd.DataFrame(data = drug_percent_df[selection])
relevant_drug_df.reset_index(inplace=True)


In [None]:
relevant_drug_df = relevant_drug_df.rename(columns = {"index": "Drug"})

In [None]:
relevant_drug_df

In [None]:
# Use functions to label the percentages of changes
# passing drugs are green failing drugs are red

def drug_result(value):
    if value >= 51.297960:
        return "r"
    else:
        return "g"

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))

# Call functions to implement the function calls
for index, row in relevant_drug_df.iterrows():
    ax.bar(relevant_drug_df["Drug"][index], relevant_drug_df["change"][index], align="center", facecolor = drug_result(relevant_drug_df["change"][index] ))
    if relevant_drug_df["change"][index] > 0:
        ax.text(relevant_drug_df["Drug"][index], relevant_drug_df["change"][index] +1,\
                 str(round(relevant_drug_df["change"][index],2))+"%",\
                color='black', fontweight='bold',horizontalalignment='center')
    else:
        ax.text(relevant_drug_df["Drug"][index], relevant_drug_df["change"][index] -2,\
                str(round(relevant_drug_df["change"][index],2))+"%",\
                color='black', fontweight='bold',horizontalalignment='center')

ax.set_title('Tumor Change Over 45 Days of Treatment', fontweight = "bold", fontsize = 15)
ax.set_ylabel("% Tumor Volume Change (Survival Rates (%)")
ax.grid(axis = "y")
plt.savefig("Images/04_change.png", dpi=fig.dpi)
plt.show()

        

# Results Observations

 - During the treatment period, only 2 drugs were effective in reducing tumor volume. As shown in the chart below the drugs, Capomulin and Ramicane, managed to reduce tumor volume by 19% and 22% respectively. On the other hand, 3 drugs performed worse than the Placebo in terms of tumor volume reduction. Ketapril, Naftisol and Stelasyn showed the worst performances with a tumor size increase of 57%, 52.92% and 52.08% respectively.

![Tumor Change](Images/04_change.png)

 - Considering survival rate, again, Capomulin and Ramicane achieved the best results, with over 90% of subjects surviving at the end of the treatment period. Although some drugs obtained a result above Placebo in terms of tumor volume reduction, they did not perform well when analyzing the survival rate. Infubinol (34%) and Propriva (26%) achieved the two worst survival rates in the study, as shown in the chart below. Using only these two criteria, Survival Rate and Tumor Volume, one can assume that while they are more efficient than Placebo in reducing tumor volume, they can be very aggressive also to the subject's body.
   

![Survival Rates During Treatment](Images/03_survival_rates.png)

 - Regarding Metastatic spread during the treatment all but one drug performed better them the Placebo. The metastatic sites for the two most efficient drugs in the study, Capomulin and Ramicane, remained at levels below 1. Again indicating these drugs as a general good choice as cancer medicine.

![Tumor Spread](Images/02_spread.png)