In [27]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

# Study data files
mouse_metadata_path = "Resources/Mouse_metadata.csv"
study_results_path = "Resources/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

mouseData_df = pd.DataFrame(mouse_metadata)
studyResults_df = pd.DataFrame(study_results)

fullData_df = pd.merge(mouseData_df,studyResults_df, on="Mouse ID", how="outer")
# fullData_df


# Checking the number of mice.
miceCount = len(fullData_df["Mouse ID"].unique())
# miceCount


# Getting the duplicate mice by ID number that shows up for Mouse ID and Timepoint. 
# Optional: Get all the data for the duplicate mouse ID. 
columns_to_check = ["Mouse ID", "Timepoint"]
duplicateData_df = fullData_df.loc[fullData_df[columns_to_check].duplicated()]
# duplicateData_df


# Create a clean DataFrame by dropping the duplicate mouse by its ID.
IDS = []

for ID in duplicateData_df["Mouse ID"]:
    if ID not in IDS:
        IDS.append(ID)
    
indexedData_df = fullData_df.set_index("Mouse ID")
cleanData_df = indexedData_df.drop(IDS).reset_index()
# cleanData_df.loc[cleanData_df["Mouse ID"]=="g989", :]


# Checking the number of mice in the clean DataFrame.
cleanCount = len(cleanData_df["Mouse ID"].unique())
# cleanCount



249
248


# Summary Statistics

In [None]:
cleanData_df

In [37]:
meanTV = cleanData_df[["Drug Regimen","Tumor Volume (mm3)"]].groupby("Drug Regimen").mean()
medianTV = cleanData_df[["Drug Regimen","Tumor Volume (mm3)"]].groupby("Drug Regimen").median()
varTV = cleanData_df[["Drug Regimen","Tumor Volume (mm3)"]].groupby("Drug Regimen").var()
stdTV = cleanData_df[["Drug Regimen","Tumor Volume (mm3)"]].groupby("Drug Regimen").std()
semTV = cleanData_df[["Drug Regimen","Tumor Volume (mm3)"]].groupby("Drug Regimen").sem()

merge1 = pd.merge(meanTV, medianTV, on="Drug Regimen").rename(columns={"Tumor Volume (mm3)_x":"Mean Tumor Volume" ,
                                                                       "Tumor Volume (mm3)_y":"Median Tumor Volume"})
merge1

merge2 = pd.merge(varTV, stdTV, on="Drug Regimen").rename(columns={"Tumor Volume (mm3)_x":"Tumor Volume Variance" ,
                                                                       "Tumor Volume (mm3)_y":"Tumor Volume Std. Dev."})
merge2

merge3 = pd.merge(merge1, merge2, on="Drug Regimen")
merge3

summaryTV_df = pd.merge(merge3, semTV, on="Drug Regimen").rename(columns={"Tumor Volume (mm3)":"Tumor Volume Std. Err."})
summaryTV_df

Unnamed: 0_level_0,Mean Tumor Volume,Median Tumor Volume,Tumor Volume Variance,Tumor Volume Std. Dev.,Tumor Volume Std. Err.
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Capomulin,40.675741,41.557809,24.947764,4.994774,0.329346
Ceftamin,52.591172,51.776157,39.290177,6.268188,0.469821
Infubinol,52.884795,51.820584,43.128684,6.567243,0.492236
Ketapril,55.235638,53.698743,68.553577,8.279709,0.60386
Naftisol,54.331565,52.509285,66.173479,8.134708,0.596466
Placebo,54.033581,52.288934,61.168083,7.821003,0.581331
Propriva,52.32093,50.446266,43.852013,6.622085,0.544332
Ramicane,40.216745,40.673236,23.486704,4.846308,0.320955
Stelasyn,54.233149,52.431737,59.450562,7.710419,0.573111
Zoniferol,53.236507,51.818479,48.533355,6.966589,0.516398


In [23]:
tests = ['mean','median','var','std','sem']
cleanData_df[["Drug Regimen","Tumor Volume (mm3)"]].groupby("Drug Regimen").agg({'Tumor Volume (mm3)':tests})

Unnamed: 0_level_0,Tumor Volume (mm3),Tumor Volume (mm3),Tumor Volume (mm3),Tumor Volume (mm3),Tumor Volume (mm3)
Unnamed: 0_level_1,mean,median,var,std,sem
Drug Regimen,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Capomulin,40.675741,41.557809,24.947764,4.994774,0.329346
Ceftamin,52.591172,51.776157,39.290177,6.268188,0.469821
Infubinol,52.884795,51.820584,43.128684,6.567243,0.492236
Ketapril,55.235638,53.698743,68.553577,8.279709,0.60386
Naftisol,54.331565,52.509285,66.173479,8.134708,0.596466
Placebo,54.033581,52.288934,61.168083,7.821003,0.581331
Propriva,52.32093,50.446266,43.852013,6.622085,0.544332
Ramicane,40.216745,40.673236,23.486704,4.846308,0.320955
Stelasyn,54.233149,52.431737,59.450562,7.710419,0.573111
Zoniferol,53.236507,51.818479,48.533355,6.966589,0.516398


# Bar and Pie Charts