In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

In [None]:
# Study data files
mouse_metadata_path = "data/Mouse_metadata.csv"
study_results_path = "data/Study_results.csv"

In [None]:
# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

In [None]:
# Combine the data into a single dataset
mouse_data = pd.merge(mouse_metadata, study_results, how="left", on=["Mouse ID", "Mouse ID"])
# Display the data table for preview
mouse_data.head()

In [None]:
# Checking the number of mice.
len(mouse_metadata)

In [None]:
# Getting the duplicate mice by ID number that shows up for Mouse ID and Timepoint. 
duplicate_rows=mouse_data.duplicated(["Mouse ID", "Timepoint"], keep=False)
duplicate_mice=mouse_data[duplicate_rows]["Mouse ID"].unique()
duplicate_mice

In [None]:
# Create a clean DataFrame by dropping the duplicate mouse by its ID.
clean_mouse_data=mouse_data[~mouse_data["Mouse ID"].isin(duplicate_mice)]
clean_mouse_data

In [None]:
# Checking the number of mice in the clean DataFrame.
clean_mouse_data["Mouse ID"].nunique()

In [None]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Use groupby and summary statistical methods to calculate the following properties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume. 
# Assemble the resulting series into a single summary DataFrame.

grouped_data = clean_mouse_data.groupby('Drug Regimen')

mean = grouped_data['Tumor Volume (mm3)'].mean()
median = grouped_data['Tumor Volume (mm3)'].median()
variance = grouped_data['Tumor Volume (mm3)'].var(ddof=0)
std_dev = grouped_data['Tumor Volume (mm3)'].std(ddof=0)
sem = grouped_data['Tumor Volume (mm3)'].sem()

summary_stats = pd.DataFrame({
    'Mean': mean,
    'Median': median,
    'Variance': variance,
    'Std. Dev.': std_dev,
    'SEM': sem
})
summary_stats

In [None]:
# Generate a summary statistics table of mean, median, variance, standard deviation, 
# and SEM of the tumor volume for each regimen

# Using the aggregation method, produce the same summary statistics in a single line.
summary_stats = clean_mouse_data.groupby('Drug Regimen').agg({'Tumor Volume (mm3)': ['mean', 'median', 'var', 'std', 'sem']})
summary_stats

In [None]:
# Generate a bar plot showing the total number of timepoints for all mice tested for each drug regimen using Pandas.
mice_count = clean_mouse_data.groupby('Drug Regimen')["Timepoint"].sum()
mice_count = mice_count.sort_values(ascending=False)
ax = mice_count.plot(kind='bar', color='blue')
ax.set_xlabel('Drug Regimen')
ax.set_ylabel('Number of Mice Tested')
plt.show()


In [None]:
# Generate a bar plot showing the total number of timepoints for all mice tested for each drug regimen using pyplot.
timepoints = clean_mouse_data.groupby('Drug Regimen')['Timepoint'].sum()
timepoints = timepoints.sort_values(ascending=False)
plt.bar(timepoints.index, timepoints.values)


plt.show()