In [16]:
%matplotlib notebook

 Observations and Insights

In [17]:
 # Dependencies and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts

# Study data files
mouse_metadata_path = "/Users/matthewvicario/MatPlotLib/MatPlotLib-Challenge/meta_data.csv"
study_results_path = "/Users/matthewvicario/MatPlotLib/MatPlotLib-Challenge/study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

# Combine the data into a single dataset
combined_mouse_data = pd.merge(mouse_metadata, study_results, on='Mouse ID', how='outer')

# Display the data table for preview
combined_mouse_data.head()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.0,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1


In [18]:
# Checking the number of mice
mice_count = combined_mouse_data["Mouse ID"].nunique()
mice_count

249

In [19]:
# Getting the duplicate mice by ID number that shows up for Mouse ID and Timepoint. 
# Create a clean DataFrame by dropping the duplicate mouse by its ID.
new_mouse_data = pd.DataFrame(combined_mouse_data.drop_duplicates(["Mouse ID", "Timepoint"]))
new_mouse_data.head()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.0,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1


In [20]:
# Checking the number of mice in the clean DataFrame.
mice_count

249

 Summary Statistics

In [29]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen
# This method is the most straighforward, creating multiple series and putting them all together at the end.
new_mouse_sorted = new_mouse_data.sort_values(["Tumor Volume (mm3)"], ascending=True)

regimens = new_mouse_data.groupby(["Drug Regimen"])
volume_mean = round(regimens["Tumor Volume (mm3)"].mean(),2)
volume_median = round(regimens["Tumor Volume (mm3)"].median(),2)
volume_std = round(regimens["Tumor Volume (mm3)"].std(),2)
volume_var = round(regimens["Tumor Volume (mm3)"].var(),2)
volume_sems = round(regimens["Tumor Volume (mm3)"].sem(),2)

regimen_stats_df = pd.DataFrame({"Drug Regimen": [regimens],
                                   "Volume Mean": [volume_mean],
                                   "Volume Median" : [volume_median],
                                   "Volume Std Dev": [volume_std],
                                   "Volume Variance": [volume_var],
                                   "Volume SEM": [volume_sems]})
regimens       


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fcf7f9d4e50>

Bar and Pie Charts

In [None]:
# Generate a bar plot showing the total number of mice for each treatment throughout the course of the study using pandas or pyplot 


In [None]:
# Generate a pie plot showing the distribution of female versus male mice using pandas or pyplot