In [126]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

mouse_metadata_path = ("data/Mouse_metadata.csv")
study_results_path = ("data/Study_results.csv")

mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

mouse_study_raw = pd.merge(study_results, mouse_metadata, on = "Mouse ID")
mouse_study_raw

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,b128,5,45.651331,0,Capomulin,Female,9,22
2,b128,10,43.270852,0,Capomulin,Female,9,22
3,b128,15,43.784893,0,Capomulin,Female,9,22
4,b128,20,42.731552,0,Capomulin,Female,9,22
...,...,...,...,...,...,...,...,...
1888,m601,25,33.118756,1,Capomulin,Male,22,17
1889,m601,30,31.758275,1,Capomulin,Male,22,17
1890,m601,35,30.834357,1,Capomulin,Male,22,17
1891,m601,40,31.378045,1,Capomulin,Male,22,17


In [104]:
unique_mice_raw = mouse_study_raw["Mouse ID"].unique() #used to check number of mice ID not used in other code
print(f"There are {len(unique_mice_raw)} unique Mice IDs in the raw data.")

duplicated_mice = mouse_study_raw.loc[mouse_study_raw.duplicated(subset = ["Mouse ID", "Timepoint"])]
dup_mouse = duplicated_mice["Mouse ID"].unique()[0]

# Works but look into if there is a way to not hard code the slice here
dup_mouse_data = mouse_study_raw.loc[mouse_study_raw["Mouse ID"] == dup_mouse]
dup_mouse_data

There are 249 unique Mice IDs in the raw data.


Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
860,g989,0,45.0,0,Propriva,Female,21,26
861,g989,0,45.0,0,Propriva,Female,21,26
862,g989,5,48.786801,0,Propriva,Female,21,26
863,g989,5,47.570392,0,Propriva,Female,21,26
864,g989,10,51.745156,0,Propriva,Female,21,26
865,g989,10,49.880528,0,Propriva,Female,21,26
866,g989,15,51.325852,1,Propriva,Female,21,26
867,g989,15,53.44202,0,Propriva,Female,21,26
868,g989,20,55.326122,1,Propriva,Female,21,26
869,g989,20,54.65765,1,Propriva,Female,21,26


In [106]:
# Creates new dataframe without the duplicated mouse
clean_mouse_study = mouse_study_raw.loc[mouse_study_raw["Mouse ID"] != dup_mouse]

unique_mice_clean = clean_mouse_study["Mouse ID"].unique()
print(f"There are {len(unique_mice_clean)} unique mice in the cleaned data")
clean_mouse_study

There are 248 unique mice in the cleaned data


Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,b128,5,45.651331,0,Capomulin,Female,9,22
2,b128,10,43.270852,0,Capomulin,Female,9,22
3,b128,15,43.784893,0,Capomulin,Female,9,22
4,b128,20,42.731552,0,Capomulin,Female,9,22
...,...,...,...,...,...,...,...,...
1888,m601,25,33.118756,1,Capomulin,Male,22,17
1889,m601,30,31.758275,1,Capomulin,Male,22,17
1890,m601,35,30.834357,1,Capomulin,Male,22,17
1891,m601,40,31.378045,1,Capomulin,Male,22,17


In [130]:
mouse_study_by_drug = clean_mouse_study[["Tumor Volume (mm3)", "Drug Regimen"]]
mouse_study_by_drug = mouse_study_by_drug.groupby(["Drug Regimen"])

In [148]:
tumor_vol_mean = mouse_study_by_drug ["Tumor Volume (mm3)"].mean()
tumor_vol_median = mouse_study_by_drug ["Tumor Volume (mm3)"].median()
tumor_vol_var = mouse_study_by_drug ["Tumor Volume (mm3)"].var()
tumor_vol_stdD = mouse_study_by_drug ["Tumor Volume (mm3)"].std()
# working on getting this right, issue because the type is a bygroup, work with cleaned data.
# tumor_vol_stdE = st.sem(mouse_study_by_drug["Tumor Volume (mm3)"])

mouse_study_by_drug_summary = pd.DataFrame({
    "Mean Tumor Volume" : tumor_vol_mean,
    "Median Tumor Volume" : tumor_vol_median,
    "Tumor Volum Variance" : tumor_vol_var,
    "Tumor Volume Std. Dev." : tumor_vol_stdD
})

mouse_study_by_drug_summary

Unnamed: 0_level_0,Mean Tumor Volume,Median Tumor Volume,Tumor Volum Variance,Tumor Volume Std. Dev.
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Capomulin,40.675741,41.557809,24.947764,4.994774
Ceftamin,52.591172,51.776157,39.290177,6.268188
Infubinol,52.884795,51.820584,43.128684,6.567243
Ketapril,55.235638,53.698743,68.553577,8.279709
Naftisol,54.331565,52.509285,66.173479,8.134708
Placebo,54.033581,52.288934,61.168083,7.821003
Propriva,52.32093,50.446266,43.852013,6.622085
Ramicane,40.216745,40.673236,23.486704,4.846308
Stelasyn,54.233149,52.431737,59.450562,7.710419
Zoniferol,53.236507,51.818479,48.533355,6.966589


In [147]:
tumor_vol_stdE = st.sem(clean_mouse_study["Tumor Volume (mm3)"])