In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import scipy.stats as st

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_datapath = "Data/mouse_drug_data.csv"
clinicaltrial_datapath= "Data/clinicaltrial_data.csv"

In [2]:
mousedrug_datadf = pd.read_csv(mouse_drug_datapath )
mousedrug_datadf

Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn
...,...,...
245,i669,Placebo
246,r850,Placebo
247,a262,Placebo
248,q787,Placebo


In [3]:
clinicaltrial_datadf = pd.read_csv(clinicaltrial_datapath)
clinicaltrial_datadf

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.000000,0
1,f932,0,45.000000,0
2,g107,0,45.000000,0
3,a457,0,45.000000,0
4,c819,0,45.000000,0
...,...,...,...,...
1888,r944,45,41.581521,2
1889,u364,45,31.023923,3
1890,p438,45,61.433892,1
1891,x773,45,58.634971,4


In [4]:
#Merged the mouse_drug data and the clinicaltrial_data   
merged_dataset = pd.merge(mousedrug_datadf,clinicaltrial_datadf, how="left",on="Mouse ID")

#Display the data table for preview
merged_dataset.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [5]:
#Group the merged dataset by the prescribed drug and theh Timepoint
DrugTime_data = merged_dataset.groupby(["Drug","Timepoint"])
DrugTime_data.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.000000,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2
...,...,...,...,...,...
1780,x773,Placebo,35,55.192736,4
1781,x773,Placebo,40,55.987676,4
1782,x773,Placebo,45,58.634971,4
1802,y478,Placebo,40,65.971120,1


# The right output isn't coming out because the variables that stored the stats were reused for the 'summarydict'

    #Minor Data Munging to Re-Format the Data Frames
meantumorvol_mean = DrugTime_data.mean()['Tumor Volume (mm3)']
meantumorvol_count = DrugTime_data.count()['Tumor Volume (mm3)']
meantumorvol_median = DrugTime_data.median()['Tumor Volume (mm3)'] 
meantumorvol_variance = DrugTime_data.var()['Tumor Volume (mm3)']
meantumorvol_StdDev = DrugTime_data.std()['Tumor Volume (mm3)']
meantumorvol_StdErr = DrugTime_data.sem()['Tumor Volume (mm3)']

    #Using the range of stat list to retrieve all the required values
mean_mtumorvol = meantumorvol_mean[0:100]
count_mtumorvol = meantumorvol_count[0:100] 
median_mtumorvol= meantumorvol_median[0:100]  
variance_mtumorvol= meantumorvol_variance[0:100]  
StdDev_mtumorvol= meantumorvol_StdDev[0:100]  
StdErr_mtumorvol= meantumorvol_StdErr[0:100] 

In [6]:
DrugTime_data.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.000000,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2
...,...,...,...,...,...
1780,x773,Placebo,35,55.192736,4
1781,x773,Placebo,40,55.987676,4
1782,x773,Placebo,45,58.634971,4
1802,y478,Placebo,40,65.971120,1


df = pd.DataFrame(mean_mtumorvol,count_mtumorvol)
df
#df.columns[0]

In [7]:
    #Merging the various stat results through a dictionary
summarydict = {
    'Mean Tumor Volume Mean':mean_mtumorvol,
    'Mean Tumor Volume Count':count_mtumorvol,
    'Mean Tumor Volume Median':median_mtumorvol,
    'Mean Tumor Volume Variance':variance_mtumorvol,
    'Mean Tumor Volume Standard Deviation':StdDev_mtumorvol,
    'Mean Tumor Volume Standard Error':StdErr_mtumorvol,
}
summarydict

NameError: name 'mean_mtumorvol' is not defined

In [None]:
#Converting the Merged Data into a DataFrame
summarydf = pd.DataFrame(summarydict)
summarydf

In [None]:
#Merging the Original Dataframe and the summarydf
merged_summarydf = pd.merge(merged_dataset,summarydf,how='left',on='Drug')
merged_summarydf

In [None]:
#Grouped the merged summary dataframe by 'Drug' and 'Timepoint'
msummarydf = merged_summarydf.groupby(['Drug'])
msummarydf.head(5)

#From the Solution

    #Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen
    #This method produces everything in a single groupby function
summary_table = study_data_complete.groupby("Drug Regimen").agg({"Tumor Volume (mm3)":["mean","median","var","std","sem"]})
summary_table

In [None]:
uniquevalues = merged_summarydf['Drug'].value_counts()
pd.DataFrame(uniquevalues)
# Generate the Bar Chart (with Error Bars)
uniquevalues.plot(kind="bar")
plt.xlabel("Drug")
plt.xticks(rotation=90)
plt.ylabel("# of Data Points")
plt.show()
#Save the Figure#

In [None]:
#Generate a bar plot showing number of data points for each treatment regimen using pyplot
uniquevalues = merged_summarydf['Drug'].value_counts()
plt.bar(uniquevalues.index.values,uniquevalues.values)
plt.xlabel("Drug")
plt.xticks(rotation=90)
plt.ylabel("# of Data Points")
plt.show()
#Save the Figure#

In [None]:
#Generate a pie plot showing the distribution of the drugs prescribed using pandas
uniquevalues = merged_summarydf.Drug.value_counts()
uniquevalues.plot(kind="pie",autopct='%1.1f%%')
plt.pie(uniquevalues.values,labels=uniquevalues.index.values,autopct='%1.1f%%')
plt.show()

## Metastatic Response to Treatment

In [None]:
#Group the data by 'Drug' and 'Timepoint'
msummarydf_grouped = merged_summarydf.groupby(['Drug','Timepoint'])

#Store the Mean Metastatic Sites Data 
metsite_mean = msummarydf_grouped['Metastatic Sites'].mean()

#Convert to DataFrame
mean_metsitedf = pd.DataFrame(metsite_mean)

#Preview DataFrame
meanmetsitedf = mean_metsitedf.rename(columns={'Metastatic Sites':"Mean of the Metastatic Sites"})
meanmetsitedf.head()

In [None]:
#Store the Standard Error associated with Met.Sites Grouped by Drug and Timepoint 
StdErr_metsite = msummarydf_grouped['Metastatic Sites'].sem()
# Convert to DataFrame
StdErr_metsitedf = pd.DataFrame(StdErr_metsite)
# Preview DataFrame
StdErr_mdf = StdErr_metsitedf.rename(columns={'Metastatic Sites':"Standard Error of the Metastatic Sites"})
StdErr_mdf.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
metsitemerged_df = pd.merge(merged_summarydf,meanmetsitedf,how="left",on="Drug").merge(StdErr_mdf,how='left',on="Drug")
# Preview that Reformatting worked
metsitemerged_df.head()

In [None]:
# Generate the Plot (with Error Bars)
uniquevalues = metsitemerged_df['Mean of the Metastatic Sites'].value_counts()

uniquevalues.plot(kind="bar")
plt.xlabel("Mean of the Metastatic Sites")
plt.xticks(rotation=90)
plt.ylabel("# of Data Points for Metastatic Sites")

# Save the Figure

# Show the Figure


![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

![Metastatic Spread During Treatment](../Images/change.png)