Skip to content

davidwjones/Matplotlib_homework

Repository files navigation

Four treatments (Capomulin, Infubinol, Ketapril, and Placebo) for comparison. tasked with:

  • Creating a scatter plot that shows how the tumor volume changes over time for each treatment.
  • Creating a scatter plot that shows how the number of metastatic site changes over time for each treatment.
  • Creating a scatter plot that shows the number of mice still alive through the course of treatment
  • Creating a bar graph that compares the total % tumor volume change for each drug across the full 45 days.

#Observations:

  1. Ketapril actually does more harm than good. The mice who received it did worse than the mice who received nothing.
  2. Capomulin is the only one of the four who reduced the tumor size, number of metastatic sites and had the best survival rate.
  3. Looking at all of the drugs, Ramicane was as successful as at Capomulin, and they are the only two in the entire study with positive results.
# Dependencies

import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
# Read data

trial_data = pd.read_csv("raw_data/clinicaltrial_data.csv")
mouse_data = pd.read_csv("raw_data/mouse_drug_data.csv")
#load the data into dataframes

trial_df=pd.DataFrame(trial_data)
mouse_df=pd.DataFrame(mouse_data)
#check for duplicate mouse data and clean it prior to merging the datasets

duplicates = mouse_df.set_index('Mouse ID').index.get_duplicates()
bad_mouse = duplicates[0]
clean_mice=mouse_df[mouse_df['Mouse ID'] != bad_mouse]
clean_trial=trial_df[trial_df['Mouse ID'] != bad_mouse]
dataset=pd.merge(clean_mice,clean_trial, on = "Mouse ID")
dataset.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Mouse ID Drug Timepoint Tumor Volume (mm3) Metastatic Sites
0 f234 Stelasyn 0 45.000000 0
1 f234 Stelasyn 5 47.313491 0
2 f234 Stelasyn 10 47.904324 0
3 f234 Stelasyn 15 48.735197 1
4 f234 Stelasyn 20 51.112713 2
#table of tumer size over time per drug

tumor=dataset[['Mouse ID', 'Drug', 'Timepoint', 'Tumor Volume (mm3)']]
tumor_data=pd.pivot_table(tumor, index= ["Drug", "Timepoint"] )
tumor_data.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Tumor Volume (mm3)
Drug Timepoint
Capomulin 0 45.000000
5 44.266086
10 43.084291
15 42.064317
20 40.716325
#table of all drugs over timepoints
tumor_table=tumor.pivot_table( index= "Timepoint", columns = "Drug", values = "Tumor Volume (mm3)")
tumor_table
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Drug Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo Propriva Ramicane Stelasyn Zoniferol
Timepoint
0 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000
5 44.266086 46.503051 47.062001 47.389175 46.796098 47.125589 47.168130 43.944859 47.470830 46.851818
10 43.084291 48.285125 49.403909 49.582269 48.694210 49.423329 48.938560 42.531957 49.335368 48.689881
15 42.064317 50.094055 51.296397 52.399974 50.933018 51.359742 50.891769 41.495061 51.448025 50.779059
20 40.716325 52.157049 53.197691 54.920935 53.644087 54.364417 53.127384 40.238325 53.970080 53.170334
25 39.939528 54.287674 55.715252 57.678982 56.731968 57.482574 55.462490 38.974300 56.172821 55.432935
30 38.769339 56.769517 58.299397 60.994507 59.559509 59.809063 58.122548 38.703137 59.870528 57.713531
35 37.816839 58.827548 60.742461 63.371686 62.685087 62.420615 60.103457 37.451996 62.432021 60.089372
40 36.958001 61.467895 63.162824 66.068580 65.600754 65.052675 63.045537 36.574081 65.356386 62.916692
45 36.236114 64.132421 65.755562 70.662958 69.265506 68.084082 66.258529 34.955595 68.438310 65.960888
#This is where I set numerous counters and data that I will use for all charts and data, such as 
# set the x_axis, colors, makers, and xlim for the various scatter charts, and a list and number of drugs
x_axis = np.arange(0, 50, 5)
colors = ['red','blue','green','yellow','tan','pink','violet','skyblue','orange','aqua']
markers = ['o','v','^','>','<','1','2','3','4','8']    
plt.xlim(0,45)
drugs=dataset["Drug"].unique()
review_drugs=["Capomulin","Infubinol","Ketapril","Placebo"]
review_count=[0,1,2,3]
count = np.arange(0,len(drugs))
plt.style.use('seaborn-darkgrid')
# First Chart of drug impact on tumors
plt.style.use('seaborn-darkgrid')
plt.title("Tumor Response to Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Tumor Volume in (mm3)")
plt.grid(alpha = 0.5)
for i in review_count:
    chart_data = stats.sem(tumor_table[review_drugs[i]])
    plt.errorbar(x_axis, tumor_table[review_drugs[i]], yerr = chart_data, marker= markers[i], color= colors[i], alpha = 0.5, label = review_drugs[i])

plt.legend(bbox_to_anchor=(1.05,1),loc= 2, borderaxespad = 0.)
plt.show()

# Chart of all drugs impact on tumors
plt.style.use('seaborn-darkgrid')
plt.title("Tumor Response to Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Tumor Volume in (mm3)")
plt.grid(alpha = 0.5)
for i in count:
    chart_data = stats.sem(tumor_table[drugs[i]])
    plt.errorbar(x_axis, tumor_table[drugs[i]], yerr = chart_data, marker= markers[i], color= colors[i], alpha = 0.5, label = drugs[i])

plt.legend(bbox_to_anchor=(1.05,1),loc= 2, borderaxespad = 0.)
plt.show()

png

png

# Metastatic Sites over time per drug

metastatic=dataset[['Mouse ID', 'Drug', 'Timepoint', 'Metastatic Sites']]
metastatic_data=pd.pivot_table(metastatic, index= ["Drug", "Timepoint"] )
metastatic_data.head(10)
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Metastatic Sites
Drug Timepoint
Capomulin 0 0.000000
5 0.160000
10 0.320000
15 0.375000
20 0.652174
25 0.818182
30 1.090909
35 1.181818
40 1.380952
45 1.476190
#table of Metastatic results of all drugs over timepoints
metastatic_table=metastatic.pivot_table( index= "Timepoint", columns = "Drug", values = "Metastatic Sites")
metastatic_table
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Drug Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo Propriva Ramicane Stelasyn Zoniferol
Timepoint
0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
5 0.160000 0.380952 0.280000 0.304348 0.260870 0.375000 0.347826 0.120000 0.260870 0.166667
10 0.320000 0.600000 0.666667 0.590909 0.523810 0.833333 0.619048 0.250000 0.523810 0.500000
15 0.375000 0.789474 0.904762 0.842105 0.857143 1.250000 0.800000 0.333333 0.809524 0.809524
20 0.652174 1.111111 1.050000 1.210526 1.150000 1.526316 1.000000 0.347826 0.947368 1.294118
25 0.818182 1.500000 1.277778 1.631579 1.500000 1.941176 1.384615 0.652174 1.166667 1.687500
30 1.090909 1.937500 1.588235 2.055556 2.066667 2.266667 1.666667 0.782609 1.411765 1.933333
35 1.181818 2.071429 1.666667 2.294118 2.266667 2.642857 2.333333 0.952381 1.533333 2.285714
40 1.380952 2.357143 2.100000 2.733333 2.466667 3.166667 2.777778 1.100000 1.583333 2.785714
45 1.476190 2.692308 2.111111 3.363636 2.538462 3.272727 2.571429 1.250000 1.727273 3.071429
#Chart of Metastatic results for review drugs
plt.style.use('seaborn-darkgrid')
plt.title("Metastatic Spread during Treatment")
plt.xlabel("Treatment Period (Days)")
plt.ylabel("Metastatic Sites")
plt.grid(alpha = 0.5)
for i in review_count:
    chart_data = stats.sem(metastatic_table[review_drugs[i]])
    plt.errorbar(x_axis, metastatic_table[review_drugs[i]], yerr = chart_data, marker= markers[i], color= colors[i], alpha = 0.5, label = review_drugs[i])

plt.legend(bbox_to_anchor=(1.05,1),loc= 2, borderaxespad = 0.)
plt.show() 
#Chart of Metastatic results for all drugs
plt.style.use('seaborn-darkgrid')
plt.title("Metastatic Spread during Treatment")
plt.xlabel("Treatment Period (Days)")
plt.ylabel("Metastatic Sites")
plt.grid(alpha = 0.5)
for i in count:
    chart_data = stats.sem(metastatic_table[drugs[i]])
    plt.errorbar(x_axis, metastatic_table[drugs[i]], yerr = chart_data, marker= markers[i], color= colors[i], alpha = 0.5, label = drugs[i])

plt.legend(bbox_to_anchor=(1.05,1),loc= 2, borderaxespad = 0.)
plt.show()    

png

png

#subject mouse survival during the study
mice=dataset[['Mouse ID', 'Drug', 'Timepoint']]
mouse_count = mice.groupby(["Drug", "Timepoint"]).count()["Mouse ID"].rename("Mouse Count")
rodent_df=pd.DataFrame(mouse_count)
rodent_df.head(10)
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Mouse Count
Drug Timepoint
Capomulin 0 25
5 25
10 25
15 24
20 23
25 22
30 22
35 22
40 21
45 21
mouse_table=rodent_df.pivot_table( index= "Timepoint", columns = "Drug", values = "Mouse Count")
mouse_table
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Drug Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo Propriva Ramicane Stelasyn Zoniferol
Timepoint
0 25 25 25 25 25 25 24 25 24 25
5 25 21 25 23 23 24 23 25 23 24
10 25 20 21 22 21 24 21 24 21 22
15 24 19 21 19 21 20 15 24 21 21
20 23 18 20 19 20 19 15 23 19 17
25 22 18 18 19 18 17 13 23 18 16
30 22 16 17 18 15 15 12 23 17 15
35 22 14 12 17 15 14 9 21 15 14
40 21 14 10 15 15 12 9 20 12 14
45 21 13 9 11 13 11 7 20 11 14
#chart of mouse survival rates per review drug
plt.style.use('seaborn-darkgrid')
plt.title("Mouse Survival During Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Surviving Mice")
plt.grid(alpha = 0.5)
for i in review_count:
    chart_data = stats.sem(mouse_table[review_drugs[i]])
    plt.errorbar(x_axis, mouse_table[review_drugs[i]], yerr = chart_data, marker= markers[i], color= colors[i], alpha = 0.5, label = review_drugs[i])

plt.legend(bbox_to_anchor=(1.05,1),loc= 2, borderaxespad = 0.)
plt.show() 

#chart of mouse survival rates per all drugs
plt.style.use('seaborn-darkgrid')
plt.title("Mouse Survival During Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Surviving Mice")
plt.grid(alpha = 0.5)
for i in count:
    chart_data = stats.sem(mouse_table[drugs[i]])
    plt.errorbar(x_axis, mouse_table[drugs[i]], yerr = chart_data, marker= markers[i], color= colors[i], alpha = 0.5, label = drugs[i])

plt.legend(bbox_to_anchor=(1.05,1),loc= 2, borderaxespad = 0.)
plt.show() 

png

png

#rate of change for each drug
drugdelta=tumor_table.pct_change(periods=9).dropna()
percent_change = round(drugdelta * 100, 2)
percent_change
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Drug Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo Propriva Ramicane Stelasyn Zoniferol
Timepoint
45 -19.48 42.52 46.12 57.03 53.92 51.3 47.24 -22.32 52.09 46.58
# create the bar chart of tumor shrinkage in reviewed drugs
fig, ax = plt.subplots()
plt.style.use('seaborn-darkgrid')
performance = {}
for x in review_count:
    performance[review_drugs[x]] = float(percent_change[review_drugs[x]])

x_axis = np.arange(0, len(review_drugs) )
tick_locations = []
for x in x_axis:
    tick_locations.append(x + 0.4)
plt.xlim(-0.25, len(review_drugs))
plt.ylim(min(performance.values()) - 5, max(performance.values()) + 5)
plt.title("Tumor Volume Change  Over 45 Day Treatment")
plt.ylabel("% Change in Tumor Size")
plt.axhline(y= 0, c='black')
bar_colors = pd.Series(list(performance.values()))
bar_colors = bar_colors > 0
bar_colors = bar_colors.map({True: "Red", False: "Green"})
plt.xticks(tick_locations, performance)
bar_chart= plt.bar(x_axis, performance.values(), color =bar_colors, alpha=0.75, align="edge")

def autolabels(rects):
    for rect in rects:
        h=rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., .8*h, '-%.2f%%' % float(h), ha='center', va='bottom', fontsize=10)

        
    # label the bars 
autolabels(bar_chart)        
plt.show()

png

# create the bar chart of tumor shrinkage in all drugs
plt.style.use('seaborn-darkgrid')
fig, ax = plt.subplots()
performance = {}
for x in count:
    performance[drugs[x]] = float(percent_change[drugs[x]])

x_axis = np.arange(0, len(drugs))
tick_locations = []
for x in x_axis:
    tick_locations.append(x + 0.4)
plt.xlim(-0.25, len(drugs))
plt.ylim(min(performance.values()) - 5, max(performance.values()) + 5)
plt.title("Tumor Volume Change  Over 45 Day Treatment")
plt.ylabel("% Change in Tumor Size")
plt.axhline(y= 0, c='black')
bar_colors = pd.Series(list(performance.values()))
bar_colors = bar_colors > 0
bar_colors = bar_colors.map({True: "Red", False: "Green"})
plt.xticks(tick_locations, performance, rotation="vertical" )
all_bar= plt.bar(x_axis, performance.values(), color =bar_colors, alpha=0.75, align="edge")


def autolabels(rects):
    for rect in rects:
        h=rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., .8*h, '%.0f%%' % float(h), ha='center', va='bottom', fontsize=10)

        

autolabels(all_bar) 
plt.show()

png

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published