# Pymaceuticals - by Caroline Miller age: 23
### 1. 
### 2. 
### 3. 

In [32]:
%matplotlib notebook

In [33]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [34]:
#import the two datasets
clinicaltrial = pd.read_csv('clinicaltrial_data.csv')
mousedata = pd.read_csv('mouse_drug_data.csv')

#left join two datasets on Mouse ID
clinical_mouse = pd.merge(clinicaltrial, mousedata, on = 'Mouse ID', how = 'left')
clinical_mouse.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,f932,0,45.0,0,Ketapril
2,g107,0,45.0,0,Ketapril
3,a457,0,45.0,0,Ketapril
4,c819,0,45.0,0,Ketapril


## Tumor Response to Treatment

In [35]:
#Groupby function to find the averages based on Drug and Timepoint
drug_timepoint = clinical_mouse.groupby(['Drug','Timepoint'])

#find the mean
mean_vol = drug_timepoint['Tumor Volume (mm3)'].mean()

#save in a dataframe
mean_vol_df = pd.DataFrame({"Mean Volume": mean_vol}).reset_index()
mean_vol_df.head()

Unnamed: 0,Drug,Timepoint,Mean Volume
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [36]:
#Find the standard error for the volume variable based on drug and timepoint
sem_vol = drug_timepoint['Tumor Volume (mm3)'].sem()

#convert standard error into a dataframe
sem_vol_df = pd.DataFrame({"Standard Error Volume by Drug and Timepoint": sem_vol}).reset_index()
sem_vol_df.head()

Unnamed: 0,Drug,Timepoint,Standard Error Volume by Drug and Timepoint
0,Capomulin,0,0.0
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731


In [37]:
#Restructure the dataframe so the drug names are the columns. This is done with "colums"
mean_vol_df_restructured = mean_vol_df.pivot_table(index='Timepoint', columns='Drug')

#print the first five values 
mean_vol_df_restructured.head()

Unnamed: 0_level_0,Mean Volume,Mean Volume,Mean Volume,Mean Volume,Mean Volume,Mean Volume,Mean Volume,Mean Volume,Mean Volume,Mean Volume
Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [38]:
#Making new dataframes to call for making the plots

#Capomulin error and mean
cap_error = sem_vol_df[sem_vol_df['Drug'] == 'Capomulin']
cap_mean = mean_vol_df[mean_vol_df['Drug'] == 'Capomulin']

#Infubinol error and mean
inf_error = sem_vol_df[sem_vol_df['Drug'] == 'Infubinol']
inf_mean = mean_vol_df[mean_vol_df['Drug'] == 'Infubinol']

#Ketapril error and mean
ket_error = sem_vol_df[sem_vol_df['Drug'] == 'Ketapril']
ket_mean = mean_vol_df[mean_vol_df['Drug'] == 'Ketapril']

#Placebo error and mean
pla_error = sem_vol_df[sem_vol_df['Drug'] == 'Placebo']
pla_mean = mean_vol_df[mean_vol_df['Drug'] == 'Placebo']

In [39]:
#Make the graph
#need to add error bars still

#create handles for the 4 lines including their styles and labels. 
x_axis = np.arange(0, 50, 5)
plt.ylim(20, 80)
plt.xlim(0, 45)

plt.style.use('seaborn-whitegrid')
plt.title("Tumor Response to Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Tumor Volume (mm3)")

#Capomulin
capomulin, = plt.plot(x_axis, cap_mean['Mean Volume'], color = 'red', marker = '.', label = "Capomulin")

#Infubinol
infubinol, = plt.plot(x_axis, inf_mean['Mean Volume'], color = 'blue', marker = '^', label = "Infubinol")

#Ketapril
ketapril, = plt.plot(x_axis, ket_mean['Mean Volume'], color = 'green', marker = 's', label = "Ketapril")

#Placebo
placebo, = plt.plot(x_axis, pla_mean['Mean Volume'], color = 'black', marker = 'd', label = "Placebo")

plt.legend(handles = [capomulin, infubinol, ketapril, placebo], loc = 'upper left')


#display the chart
plt.show()

#remember to save the visual
#plt.savefig("../Images/'Tumor_Response'.png")

<IPython.core.display.Javascript object>

## Metastatic Response to Treatment

In [40]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
mean_metastatic = drug_timepoint['Metastatic Sites'].mean()

# Convert to DataFrame
metastatic_df = pd.DataFrame({"Mean Metastatic Site": mean_metastatic}).reset_index()

# Preview DataFrame
metastatic_df.head()

Unnamed: 0,Drug,Timepoint,Mean Metastatic Site
0,Capomulin,0,0.0
1,Capomulin,5,0.16
2,Capomulin,10,0.32
3,Capomulin,15,0.375
4,Capomulin,20,0.652174


In [41]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
sem_metastatic = drug_timepoint['Metastatic Sites'].sem()

# Convert to DataFrame. Resex_index makes the formatting cleaner.
metastatic_sem_df = pd.DataFrame({"Standard Error": sem_metastatic}).reset_index()

# Preview DataFrame
metastatic_sem_df.head()


Unnamed: 0,Drug,Timepoint,Standard Error
0,Capomulin,0,0.0
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621


In [42]:
# Minor Data Munging to Re-Format the Data Frames
metastatic_vol_df_restructured = metastatic_df.pivot_table(index='Timepoint', columns='Drug')

# Preview that Reformatting worked
metastatic_vol_df_restructured.head()

Unnamed: 0_level_0,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site,Mean Metastatic Site
Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.16,0.380952,0.28,0.304348,0.26087,0.375,0.32,0.12,0.24,0.166667
10,0.32,0.6,0.666667,0.590909,0.52381,0.833333,0.565217,0.25,0.478261,0.5
15,0.375,0.789474,0.904762,0.842105,0.857143,1.25,0.764706,0.333333,0.782609,0.809524
20,0.652174,1.111111,1.05,1.210526,1.15,1.526316,1.0,0.347826,0.952381,1.294118


In [43]:
#Making new dataframes to call for making the plots

#Capomulin error and mean
cap_error2 = metastatic_sem_df[metastatic_sem_df['Drug'] == 'Capomulin']
cap_mean2 = metastatic_df[metastatic_df['Drug'] == 'Capomulin']

#Infubinol error and mean
inf_error2 = metastatic_sem_df[metastatic_sem_df['Drug'] == 'Infubinol']
inf_mean2 = metastatic_df[metastatic_df['Drug'] == 'Infubinol']

#Ketapril error and mean
ket_error2 = metastatic_sem_df[metastatic_sem_df['Drug'] == 'Ketapril']
ket_mean2 = metastatic_df[metastatic_df['Drug'] == 'Ketapril']

#Placebo error and mean
pla_error2 = metastatic_sem_df[metastatic_sem_df['Drug'] == 'Placebo']
pla_mean2 = metastatic_df[metastatic_df['Drug'] == 'Placebo']

pla_mean2

Unnamed: 0,Drug,Timepoint,Mean Metastatic Site
50,Placebo,0,0.0
51,Placebo,5,0.375
52,Placebo,10,0.833333
53,Placebo,15,1.25
54,Placebo,20,1.526316
55,Placebo,25,1.941176
56,Placebo,30,2.266667
57,Placebo,35,2.642857
58,Placebo,40,3.166667
59,Placebo,45,3.272727


In [45]:
#create handles for the 4 lines including their styles and labels. 
x_axis = np.arange(0, 50, 5)
plt.ylim(0, 3.5)
plt.xlim(0, 45)

plt.style.use('seaborn-whitegrid')
plt.title("Metastatic Spread During Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Met Sites")

#Capomulin
capomulin2, = plt.plot(x_axis, cap_mean2['Mean Metastatic Site'], color = 'red', marker = '.', label = "Capomulin")

#Infubinol
infubinol2, = plt.plot(x_axis, inf_mean2['Mean Metastatic Site'], color = 'blue', marker = '^', label = "Infubinol")

#Ketapril
ketapril2, = plt.plot(x_axis, ket_mean2['Mean Metastatic Site'], color = 'green', marker = 's', label = "Ketapril")

#Placebo
placebo2, = plt.plot(x_axis, pla_mean2['Mean Metastatic Site'], color = 'black', marker = 'd', label = "Placebo")

plt.legend(handles = [capomulin2, infubinol2, ketapril2, placebo2], loc = 'upper left')


#display the chart
plt.show()

#remember to save the visual
#plt.savefig("../Images/'Tumor_Response'.png")

## Survival Rates

In [133]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mouse_count = drug_timepoint['Mouse ID'].count()

# Convert to DataFrame
mouse_count_df = pd.DataFrame({"Mouse Count": mouse_count}).reset_index()

# Preview DataFrame
mouse_count_df.head()

Unnamed: 0,Drug,Timepoint,Mouse Count
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23


In [134]:
# Minor Data Munging to Re-Format the Data Frames
mouse_count_restructured = mouse_count_df.pivot_table(index='Timepoint', columns='Drug')

# Preview the Data Frame
mouse_count_restructured.head()

Unnamed: 0_level_0,Mouse Count,Mouse Count,Mouse Count,Mouse Count,Mouse Count,Mouse Count,Mouse Count,Mouse Count,Mouse Count,Mouse Count
Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17


In [135]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
#plt.show()

## Summary Bar Graph

In [161]:
# Calculate the percent changes for each drug
# revisit the mean_vol_df_restructured table since it already has mean volumes for each drug based on time
# Store all Relevant Percent Changes into a Tuple

pct_change_vol =((mean_vol_df_restructured.iloc[9,:]-mean_vol_df_restructured.iloc[0,:])
*100/mean_vol_df_restructured.iloc[0,:])

# store relevant data into a tuple
pct_change_vol_split = pct_change_vol[2]

pct_change_vol_split

46.123471727851836

In [156]:
# Splice the data between passing and failing drugs
#use some sorta .split() 

In [None]:
#Graph Stuff

# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
#fig.show()