In [None]:
# Dependencies and Setup
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

mouse = "data/mouse_drug_data.csv"
results = "data/clinicaltrial_data.csv"

# Read the mouse data and the study results

mouse = pd.read_csv("data/mouse_drug_data.csv")
results = pd.read_csv("data/clinicaltrial_data.csv")


# Combine the data into a single dataset

pymaceuticals_complete = pd.merge(results, mouse, how="left", on=["Mouse ID", "Mouse ID"])


mouse.head()
results.head()
pymaceuticals_complete.head()


In [None]:

#Rename columns
pymaceuticals_complete = pymaceuticals_complete.rename(columns={"Tumor Volume (mm3)": "Volume"})

pymaceuticals_complete = pymaceuticals_complete[pymaceuticals_complete["Drug"].isin(["Capomulin", "Infubinol", "Ketapril", "Placebo"])]

pymaceuticals_complete.head()                           

In [None]:
#Standard Error....this is count in the homework
sem_drug = pymaceuticals_complete.groupby('Drug').sem()

drug_results_grp = pymaceuticals_complete.groupby(['Drug', 'Timepoint']).sem()
drug_results_grp.head()

In [None]:
drug_tumor_sem_df = pd.DataFrame(drug_results_grp.reset_index())
drug_tumor_sem_df.head()


In [7]:
#Capomulin
capo_means = pymaceuticals_complete.loc[:, ['Capomulin']]
capo_means = capo_means['Capomulin']

capo_sem = pymaceuticals_complete.loc[:, ['Capomulin']]
capo_sem = capo_sem['Capomulin']

#Infubinol
infub_means = pymaceuticals_complete.loc[:, ['Infubinol']]
infub_means = infub_means['Infubinol']

infub_sem = pymaceuticals_complete.loc[:, ['Infubinol']]
infub_sem = infub_sem['Infubinol']

#Ketapril
keta_means = pymaceuticals_complete.loc[:, ['Ketapril']]
keta_means = keta_means['Ketapril']

keta_sem = drug_tumor_sem_df.loc[:, ['Ketapril']]
keta_sem = keta_sem['Ketapril']

#Placebo
placebo_means = drug_tumor_avg_df.loc[:, ['Placebo']]
placebo_means = placebo_means['Placebo']

placebo_sem = drug_tumor_sem_df.loc[:, ['Placebo']]
placebo_sem = placebo_sem['Placebo']

# Generate the Plot (with Error Bars)

#Get min and max values for the avg tumor volume so we set limits on the plot
min_volume = drug_tumor_sum_df.min()['Average Volume']
max_volume = drug_tumor_sum_df.max()['Average Volume']

#add/subtract 5 from the min and max volumes to give some additional white space on the plot
min_volume = min_volume - 5
max_volume = max_volume + 5

#Use the hls color space from Seaborn to give the plot a prettier look
current_palette_4 = sns.color_palette("hls", 4)
sns.set_palette(current_palette_4)

#Assign limits, turn on the background grid and create a title and labels
plt.grid(True)
plt.ylim(min_volume, max_volume)
plt.ylabel("Tumor Volume (mm3)")
plt.xlabel("Treatment Duration (Days)")
plt.title("Tumor Response to Treatment")


#Plot the average tumor size over time for each drug (with error bars)

#The values for the x-axis will be the timepoints
time = list(drug_tumor_avg_df.index.values)

#Plot Capomulin
plt.errorbar(time, capo_means, capo_sem, marker = "*", ms = 10, label = "Capomulin")

#Plot Infubinol
plt.errorbar(time, infub_means, infub_sem, marker = "s", ms = 8, label = "Infubinol")

#Plot Ketapril
plt.errorbar(time, keta_means, keta_sem, marker = "o", ms= 8, label = "Ketapril")

#Plot Placebo
plt.errorbar(time, placebo_means, placebo_sem, marker = "x", ms= 8, label = "Placebo")

#Add a legend, save the plot as a png file and then show the plot
plt.legend(frameon=True)

plt.savefig('Tumor Response.png') 

plt.show()


NameError: name 'drug_tumor_avg_df' is not defined

In [None]:
#set x_axis
x_axis=np.arange(0,50,5)

#set a marker list 
marker_list=['.', ',', 'o', 'v', '^', 's', 'p', '*','D','d']

#set up a figure with a decent size
fig1=plt.figure(figsize=(10,10))

#set ax1 as add subplot
ax1 = fig1.add_subplot(111)

#use a for loop to extract all the mean for all timepoints for each drug one by one
#use i to access different markers in marker_list
i=0
for drug in mean_tumor_volume.columns:
    ax1.errorbar(x_axis,mean_tumor_volume[drug],yerr=sem_tumor_volume[drug],fmt=marker_list[i],\
                 barsabove=True,capsize=2,linestyle='dotted',label=drug)
    i+=1

#adjust plot properties
plt.legend(loc='upper left')
plt.title('Tumor Response to Treatment',fontsize=14,fontweight='bold')
plt.xlim(min(x_axis),max(x_axis)+1)
plt.xticks(x_axis)
plt.xlabel('Time (Days)',fontsize=12)
plt.ylabel('Tumor Volume(mm3)',fontsize=12)
plt.grid(linestyle='dotted')
plt.show()


In [None]:
#Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 

#First get rid of the number of metastatic sites and mice for this analysis because we don't need them

drug_results_tumor_df = pd.DataFrame({'Drug' : drug_results_df['Drug'], 'Timepoint' : drug_results_df['Timepoint'], 
                                     'Volume' : drug_results_df['Volume']})

#Get the average tumor volume for each drug/timepoint group and then put results into a new dataframe
drug_results_grp = drug_results_tumor_df.groupby(['Drug', 'Timepoint'])
drug_results_grp.mean()

drug_tumor_sum_df = pd.DataFrame(drug_results_grp.mean().reset_index())
drug_tumor_sum_df.head(10)

#rename the Volume column Average Volume

drug_tumor_sum_df.columns = ['Drug', 'Timepoint', 'Average Volume']
drug_tumor_sum_df.head(10)

In [None]:
#Creating a scatter plot that shows how the tumor volume changes over time for each treatment.

#First get rid of metastatic

pymaceuticals_tumor = pd.DataFrame({"Drug" : pymaceuticals_complete["Drug"], "Timepoint" : pymaceuticals_complete["Timepoint"], 
                                     "Volume" : pymaceuticals_complete["Volume"]})

#Get the average tumor volume for drug/timepoint 
pymaceuticals_tumor_group = pymaceuticals_tumor.groupby(["Drug", "Timepoint"])
pymaceuticals_tumor_group.mean()

pymaceuticals_tumor_sum = pd.DataFrame(pymaceuticals_tumor_group.mean().reset_index())
pymaceuticals_tumor.head()

#Rename the Volume column Average Volume

pymaceuticals_tumor_sum.columns = ["Drug", "Timepoint", "Average Volume"]
pymaceuticals_tumor_sum.head()


In [None]:
sem_pymaceuticals = pymaceuticals_tumor_sum.groupby('Drug').sem()

pymaceuticals_tumor_group = pymaceuticals_tumor.groupby(['Drug', 'Timepoint']).sem()
pymaceuticals_tumor_group

#Convert the grouped object into a dataframe and then display a preview
drug_tumor_sem_df = pd.DataFrame(drug_results_grp.reset_index())
drug_tumor_sem_df.head(20)

#rename the Volume column to SEM

drug_tumor_sem_df.columns = ['Drug', 'Timepoint', 'SEM Volume']
drug_tumor_sem_df.head(10)

In [None]:
x_axis = [0,5,10,15,20,25,30,35,40,45]
x_limit = 45
plt.figure(figsize=(10,7))

error = tre_df["Tumor Volume (mm3)"]["Capomulin"]
cap = plt.errorbar(x_axis, tumor_plot_df["Capomulin"], yerr=error, fmt="o", ls="dashed", linewidth=1, alpha=1, capsize=3,color ="red")

error = tre_df["Tumor Volume (mm3)"]["Infubinol"]
infu = plt.errorbar(x_axis, tumor_plot_df["Infubinol"], yerr=error, fmt="^", ls="dashed", linewidth=1, alpha=1, capsize=3,color ="blue")

error = tre_df["Tumor Volume (mm3)"]["Ketapril"]
keta = plt.errorbar(x_axis, tumor_plot_df["Ketapril"], yerr=error, fmt="s", ls="dashed", linewidth=1, alpha=1, capsize=3,color = "green")

error = tre_df["Tumor Volume (mm3)"]["Placebo"]
plac = plt.errorbar(x_axis, tumor_plot_df["Placebo"], yerr=error, fmt="D", ls="dashed", linewidth=1, alpha=1, capsize=3,color = "black")

plt.ylim(20, 80)
plt.xlim(0, 45)

plt.title("Tumor Response to Treatment", fontsize=20)
plt.xlabel("Time (Days)", fontsize=14)
plt.ylabel("Tumor Volume (mm3)", fontsize=14)

plt.grid(linestyle="dashed")
plt.legend((cap, infu, keta, plac), ("Capomulin", "Infubinol", "Ketapril", "Placebo"), fontsize=12)
plt.show()

In [None]:
#Creating a scatter plot that shows how the number of metastatic sites changes over time for each treatment.



In [None]:
#Creating a scatter plot that shows the Survival Rate



In [None]:
#Creating a bar graph that compares the total % tumor volume change for each drug across the full 45 days



In [None]:
#Include 3 observations about the results of the study



## Tumor Response to Treatment

In [None]:
#Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 

mean_for_tumor = pd.DataFrame({"Drug" : pymaceuticals_complete["Drug"], "Timepoint" : pymaceuticals_complete["Timepoint"], 
                                     "Volume" : pymaceuticals_complete["Volume"]})

mean_for_tumor

#Convert DataFrame

pymaceuticals_tumor = pymaceuticals_tumor.groupby(["Drug", "Timepoint"])

pymaceuticals_tumor.mean()

pymaceuticals_tumor_sum_df = pd.DataFrame(pymaceuticals_tumor.mean().reset_index())
pymaceuticals_tumor_sum_df.head(10)



In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint

# Convert to DataFrame

# Preview DataFrame



In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview that Reformatting worked


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure



In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview that Reformatting worked


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure


![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

![Metastatic Spread During Treatment](../Images/change.png)