In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [None]:
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

In [None]:
#read csv files
mousedf = pd.read_csv(mouse_drug_data_to_load)
clinicaldf = pd.read_csv(clinical_trial_data_to_load)
#merge on Mouse ID
merged = pd.merge(mousedf, clinicaldf, on="Mouse ID")
merged.head()

#create list of unique time values
timelist = merged["Timepoint"]
timelist = timelist.drop_duplicates()
timelist = list(timelist)

In [None]:
#group drug and timepoint and average the values in tumor volume(mm3)
itsnotatuma = merged.groupby(["Drug","Timepoint"]).mean()
dfmean = itsnotatuma["Tumor Volume (mm3)"]
#reset index and set back to a dataframe
dfmean = dfmean.reset_index()
dfmean = pd.DataFrame(dfmean)
dfmean.head()

In [None]:
#calculate standard error
itsnotatuma = merged.groupby(["Drug","Timepoint"]).sem()
dfsem = itsnotatuma["Tumor Volume (mm3)"]
dfsem = dfsem.reset_index()
dfsem = pd.DataFrame(dfsem)
dfsem.head()

In [None]:
#use pivot method to pivot for the averages
dfmunging = dfmean.pivot(index="Timepoint", columns="Drug", values="Tumor Volume (mm3)")
dfmunging.head()

In [None]:
#use pivot method to pivot for the sems
dfmungingsem = dfsem.pivot(index="Timepoint", columns="Drug", values="Tumor Volume (mm3)")
dfmungingsem.head()

In [None]:
#set drugs to be analyzed, colors for the plots, and markers
drugs = ["Capomulin","Infubinol","Ketapril","Placebo"]
colors = ["red", "blue","darkgreen","black"]
markers = ["o","^","s","d"]

#use for loop to grab errorbar components 
for i in drugs:
    #set yerror variable to the dfmungingsem dataframe index of the drugs list
    yerror = dfmungingsem[i]
    #grab the index property to reference in loop later
    index = drugs.index(i)
    #plot errorbar using the timelist we created earlier, the y axis will be the dataframe values, and errors will be 
    #reference the sem dataframe
    plt.errorbar(timelist, dfmunging[i],yerr=yerror, marker=markers[index], color=colors[index], linestyle=":", alpha=.85)

#setup chart attributes
plt.legend(labels=drugs, loc="best")  
plt.ylabel("Tumor Volume(mm3)")
plt.xlabel("Time(Days)")
plt.title("Tumor Resonse to Treatment")
plt.xlim(-2,47)
plt.ylim(33,75)
plt.grid()
#save figure to images folder
plt.savefig("Images/TumorResonsetoTreatment.png")
plt.show()

In [None]:
metasdfavg = merged[["Drug","Timepoint", "Metastatic Sites"]]
metasdfavg = metasdfavg.groupby(["Drug","Timepoint"]).mean()
metasdfavg.head()

In [None]:
metasdfsem = merged[["Drug","Timepoint", "Metastatic Sites"]]
metasdfsem = metasdfsem.groupby(["Drug","Timepoint"]).sem()
metasdfsem.head()

In [None]:
metasdfavg = metasdfavg.reset_index()
metasmungavg = metasdfavg.pivot(index="Timepoint", columns="Drug", values="Metastatic Sites")
metasmungavg

In [None]:
metasdfsem = metasdfsem.reset_index()
metasdfmungsem = metasdfsem.pivot(index="Timepoint", columns="Drug", values="Metastatic Sites")
metasdfmungsem

In [None]:
#create loop to reference the drugs list
for i in drugs:
    yerror = metasdfmungsem[i]
    index=drugs.index(i)
    plt.errorbar(timelist, metasmungavg[i],yerr=yerror, marker=markers[index], color=colors[index], linestyle=":", alpha=.85)
    plt.legend(labels=drugs, loc="best")  

#setup chart attributes
plt.ylabel("Metastic Sites")
plt.xlabel("Treatment Duration (Days)")
plt.title("Metastic Spread During Treatment")
plt.savefig("Images/TumorResonsetoTreatment.png")
plt.grid()
plt.show()

In [None]:
mouse_survival = pd.pivot_table(merged, values="Tumor Volume (mm3)", columns=["Drug"], index=["Timepoint"], aggfunc='count')
mouse_survival

In [None]:
for i in drugs:
    yerror = mouse_survival[i]
    index=drugs.index(i)
    plt.plot(timelist, mouse_survival[i], marker=markers[index], color=colors[index], linestyle=":", alpha=.85)
    plt.legend(labels=drugs, loc="best")  
    
plt.ylabel("Survival Rate (%)")
plt.xlabel("Time (Days)")
plt.title("Survival During Treatment")
plt.savefig("Images/Survival During Treatment.png")
plt.grid()
plt.show()

In [None]:
#Create DataFrame with drugs that need to be tested
dfmungingC = dfmunging[drugs]
#empty list to hold appended values from calculation
y = []

#loop through the dataframe for each drug and find the percent change of the first and last value
for i in dfmungingC:
    y.append((dfmungingC[i].iloc[-1] - dfmungingC[i].iloc[0]) / dfmungingC[i].iloc[0] *100)

#set variable for x locations
x = np.arange(len(y))
#list comprehension to determine if bar is green or red
colors = ["green" if item < 0 else "red" for item in y]

#chart attributes
plt.ylabel("% Tumor Volume Change")
plt.title("Tumor Change Over 45 Day Treatment")
plt.bar(x, y, color=colors)
plt.axhline(linewidth=1, color='black', y=0)
plt.xticks(x, drugs)

#loop through values in the y list to determine where label is placed
count = 0
for x in y:
    if x < 0:
        ybar = -5.5
    else:
        ybar = 2
    index=y.index(x)
    plt.text(count, ybar, str(round(y[index],1)) + "%", ha='center', color='white')
    count += 1
    
plt.savefig("Images/Tumor Change Over 45 Day Treatment.png")
plt.show()