In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

# Study data files
mouse_metadata_path = "data/Mouse_metadata.csv"
study_results_path = "data/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

# Combine the data into a single dataset, merge on Mouse ID?
combo_df = pd.merge(study_results, mouse_metadata, how="left", on=["Mouse ID", "Mouse ID"])

# Display the data table for preview, full df has 1893 rows
combo_df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'data/Mouse_metadata.csv'

In [None]:
# Checking the number of mice.
mouse_count = combo_df.nunique(axis=0)
print(mouse_count)
# 249 mice!

In [None]:
# Getting the duplicate mice by ID number that shows up for Mouse ID and Timepoint. 
duplicate_df = combo_df[combo_df.duplicated(["Mouse ID","Timepoint"], keep=False)]
duplicate_df
# g989 is the only one that shows up

In [None]:
# Optional: Get all the data for the duplicate mouse ID. 
combo_df.loc[combo_df["Mouse ID"] == "g989"]
# total of 13 rows with the g989 duplicate

In [None]:
# Create a clean DataFrame by dropping the duplicate mouse by its ID.
combo_df.drop(combo_df[combo_df["Mouse ID"] == "g989"].index, inplace = True)
clean_df = combo_df
clean_df
# combo_df had 1893 rows, clean_df has 1880, all 13 g989 entries are removed!

In [None]:
# Checking the number of mice in the clean DataFrame.
second_mouse_count = clean_df.nunique(axis=0)
print(second_mouse_count)
# 248 mice!

In [None]:
# Generate a summary statistics table of mean, median, variance, standard deviation, 
# and SEM of the tumor volume for each regimen

# Use groupby and summary statistical methods to calculate the following procaprties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume.
tumor_mean = clean_df.groupby(["Drug Regimen"]).mean()["Tumor Volume (mm3)"]
tumor_median= clean_df.groupby(["Drug Regimen"]).median()["Tumor Volume (mm3)"]
tumor_variance = clean_df.groupby(["Drug Regimen"]).var()["Tumor Volume (mm3)"]
tumor_std_dev = clean_df.groupby(["Drug Regimen"]).std()["Tumor Volume (mm3)"]
tumor_std_sem = clean_df.groupby(["Drug Regimen"]).sem()["Tumor Volume (mm3)"]

# Assemble the resulting series into a single summary DataFrame.
drug_summary = pd.DataFrame({"Mean Tumor Volume": tumor_mean,
                             "Median Tumor Volume": tumor_median,
                             "Tumor Volume Variance": tumor_variance,
                             "Tumor Volume Std. Dev.": tumor_std_dev,
                             "Tumor Volume Std. Err.": tumor_std_sem})
drug_summary