In [2]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats

In [3]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_data_path= 'data/mouse_drug_data.csv'
clinical_data_path = 'data/clinicaltrial_data.csv'


# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drug_df = pd.read_csv(mouse_data_path)
clinical_trial_df = pd.read_csv(clinical_data_path)


# Combine the data into a single dataset
complete_df = pd.merge(clinical_trial_df,mouse_drug_df,on='Mouse ID',how='left')

# Display the data table for preview
complete_df.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,f932,0,45.0,0,Ketapril
2,g107,0,45.0,0,Ketapril
3,a457,0,45.0,0,Ketapril
4,c819,0,45.0,0,Ketapril


## Tumor Response to Treatment

In [13]:
# Store the Mean "Tumor Volume Data" Grouped by Drug and Timepoint 
tumor_response_mean_df = complete_df.groupby(["Drug","Timepoint"]).mean()

# Convert to DataFrame
tumor_response_mean_df = pd.DataFrame(tumor_response_mean_df)

# Preview DataFrame 
tumor_response_mean_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3),Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1,Unnamed: 3_level_1
Capomulin,0,45.0,0.0
Capomulin,5,44.266086,0.16
Capomulin,10,43.084291,0.32
Capomulin,15,42.064317,0.375
Capomulin,20,40.716325,0.652174


In [20]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tumor_response_sem = complete_df.groupby(["Drug", "Timepoint"])['Tumor Volume (mm3)'].sem()

# Convert to DataFrame
tumor_response_sem = pd.DataFrame(tumor_response_sem)

# Preview DataFrame 
tumor_response_sem.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.448593
Capomulin,10,0.702684
Capomulin,15,0.838617
Capomulin,20,0.909731


In [None]:
# Minor Data Munging to Re-Format the Data Frames
tumor_pivot = tumor_response_mean_df.pivot(index ="Timepoint", columns = 'Drug', values = "Tumor Volume (mm3)")
stdErrVsTime = stdErrorTumorVol.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
#Preview the formatted table
tumor_pivot_table.head()


In [None]:
table_fourdrugs = pivot_table[["Timepoint", "Capomulin", "Infubinol", "Ketapril", "Placebo"]]
table_fourdrugs.head()

In [None]:
# Generate the Plot (with Error Bars)
plt.figure(figsize = (10, 5))

plt.errorbar(x=table_fourdrugs['Timepoint'],y=table_fourdrugs['Capomulin'], yerr=None, linestyle="--", fmt='o')
plt.errorbar(x=table_fourdrugs['Timepoint'],y=table_fourdrugs['Infubinol'], yerr=None, linestyle="--", fmt='o')
plt.errorbar(x=table_fourdrugs['Timepoint'],y=table_fourdrugs['Ketapril'], yerr=None, linestyle="--",fmt='o')
plt.errorbar(x=table_fourdrugs['Timepoint'],y=table_fourdrugs['Placebo'], yerr=None, linestyle="--", fmt='o')

plt.ylabel('Tumor Volume(mm3)')
plt.xlabel('Time (Days)')
plt.title('Tumor Response to Treatment')
plt.grid()
plt.legend()  
# Show the Figure
plt.show()


In [None]:
# Show the Figure

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
combine_group_mean_met= complete_df.groupby(["Drug","Timepoint"]).mean()

# Convert to DataFrame
met_response_mean_df = pd.DataFrame(combine_group_mean_met["Metastatic Sites"])

# Preview DataFrame 
met_response_mean_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
combine_group_met_sem = complete_df.groupby(["Drug","Timepoint"]).sem()

# Convert to DataFrame
met_response_sem_df = pd.DataFrame(combine_group_sem)

# Preview DataFrame
met_response_sem_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
met_response_mean_df2 = pd.DataFrame(combine_group_mean)

# Preview that Reformatting worked
# Store the Mean "Tumor Volume Data" Grouped by Drug and Timepoint 
#met_response_mean_df.reset_index(level = None, inplace = True)
#met_response_mean_df2 = pd.DataFrame(combine_group_mean)


pivot_table_met = met_response_mean_df2.pivot(index ="Timepoint", columns = 'Drug', values = "Metastatic Sites")
pivot_table_met.reset_index(level = None, inplace = True)

#Preview the formatted table
pivot_table_met.head()

In [None]:

met_table_fourdrugs = pivot_table_met[["Timepoint", "Capomulin", "Infubinol", "Ketapril", "Placebo"]]
met_table_fourdrugs.head()

In [None]:
# Generate the Plot (with Error Bars)
plt.figure(figsize = (10, 5))

plt.errorbar(x=met_table_fourdrugs['Timepoint'],y=met_table_fourdrugs['Capomulin'], yerr=None, linestyle="--", fmt='o')
plt.errorbar(x=met_table_fourdrugs['Timepoint'],y=met_table_fourdrugs['Infubinol'], yerr=None, linestyle="--", fmt='o')
plt.errorbar(x=met_table_fourdrugs['Timepoint'],y=met_table_fourdrugs['Ketapril'], yerr=None, linestyle="--",fmt='o')
plt.errorbar(x=met_table_fourdrugs['Timepoint'],y=met_table_fourdrugs['Placebo'], yerr=None, linestyle="--", fmt='o')

plt.ylabel("Met Sites")
plt.xlabel('Time (Days)')
plt.title('Metastatic Response to Treatment')
plt.grid()
plt.legend()       
plt.show()
# Save the Figure
plt.savefig("../MetSiteResponse.png")

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
micecount=complete_df.groupby(["Drug","Timepoint"]).count()

# Convert to DataFrame
micecount_df=pd.DataFrame(micecount["Mouse ID"])
micecount_df.reset_index(inplace=True)

#Display dataframe
micecount_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames

pivot_table_mice = micecount_df.pivot(index ="Timepoint", columns = 'Drug', values = "Mouse ID")
pivot_table_mice.reset_index(level = None, inplace = True)

#Preview the formatted table
pivot_table_mice.head()

In [None]:
mice_table_fourdrugs = pivot_table_mice[["Timepoint", "Capomulin", "Infubinol", "Ketapril", "Placebo"]]
mice_table_fourdrugs.head()

In [None]:

#Calculations for the survival rate

survival_fourdrugs_df = mice_table_fourdrugs.astype(float)
survival_fourdrugs_df["Capomulin_percent"]=survival_fourdrugs_df["Capomulin"]/survival_fourdrugs_df["Capomulin"].iloc[0] * 100
survival_fourdrugs_df["Infubinol_percent"]=survival_fourdrugs_df["Infubinol"]/survival_fourdrugs_df["Infubinol"].iloc[0] * 100
survival_fourdrugs_df["Ketapril_percent"]=survival_fourdrugs_df["Ketapril"]/survival_fourdrugs_df["Ketapril"].iloc[0] * 100
survival_fourdrugs_df["Placebo_percent"]=survival_fourdrugs_df["Placebo"]/survival_fourdrugs_df["Placebo"].iloc[0] * 100
survival_fourdrugs_df

In [None]:
# Generate the Plot (Accounting for percentages)
plt.figure(figsize = (10, 5))


plt.errorbar(x=survival_fourdrugs_df ['Timepoint'],y=survival_fourdrugs_df['Capomulin_percent'], linestyle="--", fmt='o')
plt.errorbar(x=survival_fourdrugs_df['Timepoint'],y=survival_fourdrugs_df['Infubinol_percent'], linestyle="--", fmt='o')
plt.errorbar(x=survival_fourdrugs_df['Timepoint'],y=survival_fourdrugs_df['Ketapril_percent'], linestyle="--",fmt='o')
plt.errorbar(x=survival_fourdrugs_df['Timepoint'],y=survival_fourdrugs_df['Placebo_percent'], linestyle="--", fmt='o')

plt.ylabel("Survival Rate (%)")
plt.xlabel('Time (Days)')
plt.title(' Survival During Treatment')
plt.grid()
plt.legend()       
plt.show()
# Save the Figure
plt.savefig("../SurvivalRespnse.png")
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for Capomulin drug
Capomulin_percent=(table_fourdrugs["Capomulin"].iloc[9]-table_fourdrugs["Capomulin"].iloc[0])/table_fourdrugs["Capomulin"].iloc[0]*100
# Display the data to confirm
Capomulin_percent

In [None]:
# Calculate the percent changes for Infubinol drug
Infubinol_percent=(table_fourdrugs["Infubinol"].iloc[9]-table_fourdrugs["Infubinol"].iloc[0])/table_fourdrugs["Infubinol"].iloc[0]*100
# Display the data to confirm
Infubinol_percent

In [None]:
# Calculate the percent changes for Ketapril drug
Ketapril_percent=(table_fourdrugs["Ketapril"].iloc[9]-table_fourdrugs["Ketapril"].iloc[0])/table_fourdrugs["Ketapril"].iloc[0]*100
# Display the data to confirm
Ketapril_percent

In [None]:
# Calculate the percent changes for Placebo drug
Placebo_percent=(table_fourdrugs["Placebo"].iloc[9]-table_fourdrugs["Placebo"].iloc[0])/table_fourdrugs["Placebo"].iloc[0]*100
# Display the data to confirm
Placebo_percent

In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

In [None]:
# Store all Relevant Percent Changes into a Tuple
percent_tuple = {'Capomulin': Capomulin_percent, 'Infubinol': Infubinol_percent, 'Ketapril': Ketapril_percent, 'Placebo': Placebo_percent}
percentchange_tumorvolume = pd.Series(percent_tuple)
percentchange_tumorvolume

![Metastatic Spread During Treatment](../Images/change.png)

In [None]:
#Index the 4 drugs
testdrugs=percentchange_tumorvolume.keys()
testdrugs

In [None]:
summary_bar = plt.subplot()
x_axis = np.arange(0, len(testdrugs))
# Splice the data between passing and failing drugs
# Orient widths. Add labels, tick marks, etc. 

tick_locations = []
for x in x_axis:
    tick_locations.append(x + 0.5)
plt.xticks(tick_locations, testdrugs)
colors = []
for value in percentchange_tumorvolume:
    if value >= 0 :
        colors.append('r')
    else:
        colors.append('g')
        
 #Plot       
percent_change = summary_bar.bar(x_axis, percentchange_tumorvolume, color=colors, align="edge")

plt.title("Tumor Change Over 45 Days Treatment")
plt.ylabel("% Tumor Volume Change")
plt.xlim(-0.25, len(testdrugs))
plt.ylim(-30, max(percentchange_tumorvolume) + 20)
plt.grid()

plt.show()