## Observations and Insights

## Dependencies and starter code

In [15]:
# Dependencies
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import sem

# Study data files
mouse_metadata = "data/Mouse_metadata.csv"
study_results = "data/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata)
study_results = pd.read_csv(study_results)

# Combine the data into a single dataset

mouse_metadata.head()
# Col Names Mouse ID, Drug Regimen, Sex, Age_months, Weight (g)
# 249 non-null rows

study_results.head()
# Col names Mouse ID, Timepoint, Tumor Volume (mm3), metastatic sites
# 1893 non-null rows
# 249 mouses

combined_data = pd.merge(mouse_metadata, study_results, on = "Mouse ID")
combined_data.info()
# Did not lose any rows. No null values in columns. hooray.gif

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1893 entries, 0 to 1892
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Mouse ID            1893 non-null   object 
 1   Drug Regimen        1893 non-null   object 
 2   Sex                 1893 non-null   object 
 3   Age_months          1893 non-null   int64  
 4   Weight (g)          1893 non-null   int64  
 5   Timepoint           1893 non-null   int64  
 6   Tumor Volume (mm3)  1893 non-null   float64
 7   Metastatic Sites    1893 non-null   int64  
dtypes: float64(1), int64(4), object(3)
memory usage: 133.1+ KB


## Summary statistics

In [23]:
# Generate a summary statistics table of mean, median, variance, 
# standard deviation, and SEM of the tumor volume for each regimen

ramicane_data = combined_data[combined_data["Drug Regimen"] == "Ramicane"]
capomulin_data = combined_data[combined_data["Drug Regimen"] == "Capomulin"]
infubinol_data = combined_data[combined_data["Drug Regimen"] == "Infubinol"]
placebo_data = combined_data[combined_data["Drug Regimen"] == "Placebo"]
ceftamin_data = combined_data[combined_data["Drug Regimen"] == "Ceftamin"]
stelasyn_data = combined_data[combined_data["Drug Regimen"] == "Stelasyn"]
zoniferol_data = combined_data[combined_data["Drug Regimen"] == "Zoniferol"]
ketapril_data = combined_data[combined_data["Drug Regimen"] == "Ketapril"]
propiva_data = combined_data[combined_data["Drug Regimen"] == "Propiva"]
naftisol_data = combined_data[combined_data["Drug Regimen"] == "Naftisol"]

In [24]:
#Ramicane
ramicane_data = ramicane_data[["Drug Regimen","Tumor Volume (mm3)"]]
ramicane_mean = ramicane_data["Tumor Volume (mm3)"].mean()
ramicane_median = ramicane_data["Tumor Volume (mm3)"].median()
ramicane_var = np.var(ramicane_data["Tumor Volume (mm3)"], ddof = 0)
ramicane_std = ramicane_data["Tumor Volume (mm3)"].std()
ramicane_sem = ramicane_data["Tumor Volume (mm3)"].sem()

In [25]:
#Capomulin
capomulin_data = capomulin_data[["Drug Regimen","Tumor Volume (mm3)"]]
capomulin_mean = capomulin_data["Tumor Volume (mm3)"].mean()
capomulin_median = capomulin_data["Tumor Volume (mm3)"].median()
capomulin_var = np.var(capomulin_data["Tumor Volume (mm3)"], ddof = 0)
capomulin_std = capomulin_data["Tumor Volume (mm3)"].std()
capomulin_sem = capomulin_data["Tumor Volume (mm3)"].sem()

In [26]:
#Infubinol
infubinol_data = infubinol_data[["Drug Regimen","Tumor Volume (mm3)"]]
infubinol_mean = infubinol_data["Tumor Volume (mm3)"].mean()
infubinol_median = infubinol_data["Tumor Volume (mm3)"].median()
infubinol_var = np.var(infubinol_data["Tumor Volume (mm3)"], ddof = 0)
infubinol_std = infubinol_data["Tumor Volume (mm3)"].std()
infubinol_sem = infubinol_data["Tumor Volume (mm3)"].sem()

In [29]:
#placebo
placebo_data = placebo_data[["Drug Regimen","Tumor Volume (mm3)"]]
placebo_mean = placebo_data["Tumor Volume (mm3)"].mean()
placebo_median = placebo_data["Tumor Volume (mm3)"].median()
placebo_var = np.var(placebo_data["Tumor Volume (mm3)"], ddof = 0)
placebo_std = placebo_data["Tumor Volume (mm3)"].std()
placebo_sem = placebo_data["Tumor Volume (mm3)"].sem()

In [30]:
#Ceftamin
ceftamin_data = ceftamin_data[["Drug Regimen","Tumor Volume (mm3)"]]
ceftamin_mean = ceftamin_data["Tumor Volume (mm3)"].mean()
ceftamin_median = ceftamin_data["Tumor Volume (mm3)"].median()
ceftamin_var = np.var(ceftamin_data["Tumor Volume (mm3)"], ddof = 0)
ceftamin_std = ceftamin_data["Tumor Volume (mm3)"].std()
ceftamin_sem = ceftamin_data["Tumor Volume (mm3)"].sem()

In [31]:
#stelasyn
stelasyn_data = stelasyn_data[["Drug Regimen","Tumor Volume (mm3)"]]
stelasyn_mean = stelasyn_data["Tumor Volume (mm3)"].mean()
stelasyn_median = stelasyn_data["Tumor Volume (mm3)"].median()
stelasyn_var = np.var(stelasyn_data["Tumor Volume (mm3)"], ddof = 0)
stelasyn_std = stelasyn_data["Tumor Volume (mm3)"].std()
stelasyn_sem = stelasyn_data["Tumor Volume (mm3)"].sem()

In [32]:
#zoniferol
zoniferol_data = zoniferol_data[["Drug Regimen","Tumor Volume (mm3)"]]
zoniferol_mean = zoniferol_data["Tumor Volume (mm3)"].mean()
zoniferol_median = zoniferol_data["Tumor Volume (mm3)"].median()
zoniferol_var = np.var(zoniferol_data["Tumor Volume (mm3)"], ddof = 0)
zoniferol_std = zoniferol_data["Tumor Volume (mm3)"].std()
zoniferol_sem = zoniferol_data["Tumor Volume (mm3)"].sem()

In [33]:
#ketapril
ketapril_data = ketapril_data[["Drug Regimen","Tumor Volume (mm3)"]]
ketapril_mean = ketapril_data["Tumor Volume (mm3)"].mean()
ketapril_median = ketapril_data["Tumor Volume (mm3)"].median()
ketapril_var = np.var(ketapril_data["Tumor Volume (mm3)"], ddof = 0)
ketapril_std = ketapril_data["Tumor Volume (mm3)"].std()
ketapril_sem = ketapril_data["Tumor Volume (mm3)"].sem()

In [35]:
#propiva
propiva_data = propiva_data[["Drug Regimen","Tumor Volume (mm3)"]]
propiva_mean = propiva_data["Tumor Volume (mm3)"].mean()
propiva_median = propiva_data["Tumor Volume (mm3)"].median()
propiva_var = np.var(propiva_data["Tumor Volume (mm3)"], ddof = 0)
propiva_std = propiva_data["Tumor Volume (mm3)"].std()
propiva_sem = propiva_data["Tumor Volume (mm3)"].sem()

In [36]:
#naftisol
naftisol_data = naftisol_data[["Drug Regimen","Tumor Volume (mm3)"]]
naftisol_mean = naftisol_data["Tumor Volume (mm3)"].mean()
naftisol_median = naftisol_data["Tumor Volume (mm3)"].median()
naftisol_var = np.var(naftisol_data["Tumor Volume (mm3)"], ddof = 0)
naftisol_std = naftisol_data["Tumor Volume (mm3)"].std()
naftisol_sem = naftisol_data["Tumor Volume (mm3)"].sem()

In [3]:
# # Generate a summary statistics table of mean, median, variance, 
# # standard deviation, and SEM of the tumor volume for each regimen

# # 'Ramicane', 'Capomulin', 'Infubinol', 'Placebo', 'Ceftamin', 
# # 'Stelasyn', 'Zoniferol', 'Ketapril', 'Propriva', 'Naftisol'

# regimen_tumor_size = combined_data[["Drug Regimen", "Tumor Volume (mm3)"]]

# mean_tumor_volume = regimen_tumor_size.groupby(["Drug Regimen"])
# mean_tumor_volume = mean_tumor_volume.mean()

# median_tumor_volume = regimen_tumor_size.groupby(["Drug Regimen"])
# median_tumor_volume = median_tumor_volume.median()

# #display(mean_tumor_volume, median_tumor_volume)

# #ramicane
# ramicane_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Ramicane"]
# ramicane_var = ramicane_var["Tumor Volume (mm3)"]
# ramicane_var = np.var(ramicane_var, ddof = 0)

# #capomulin
# capomulin_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Capomulin"]
# capomulin_var = capomulin_var["Tumor Volume (mm3)"]
# capomulin_var = np.var(capomulin_var, ddof = 0)

# #infubinol
# infubinol_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Infubinol"]
# infubinol_var = infubinol_var["Tumor Volume (mm3)"]
# infubinol_var = np.var(infubinol_var, ddof = 0)

# #placebo
# placebo_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Placebo"]
# placebo_var = placebo_var["Tumor Volume (mm3)"]
# placebo_var = np.var(placebo_var, ddof = 0)

# #ceftamin
# ceftamin_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Ceftamin"]
# ceftamin_var = ceftamin_var["Tumor Volume (mm3)"]
# ceftamin_var = np.var(ceftamin_var, ddof = 0)

# #stelasyn
# stelasyn_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Stelasyn"]
# stelasyn_var = stelasyn_var["Tumor Volume (mm3)"]
# stelasyn_var = np.var(stelasyn_var, ddof = 0)

# #zoniferol
# zoniferol_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Zoniferol"]
# zoniferol_var = zoniferol_var["Tumor Volume (mm3)"]
# zoniferol_var = np.var(zoniferol_var, ddof = 0)

# #ketapril
# ketapril_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Ketapril"]
# ketapril_var = ketapril_var["Tumor Volume (mm3)"]
# ketapril_var = np.var(ketapril_var, ddof = 0)

# #propriva
# propriva_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Propriva"]
# propriva_var = propriva_var["Tumor Volume (mm3)"]
# propriva_var = np.var(propriva_var, ddof = 0)

# #naftisol
# naftisol_var = regimen_tumor_size[regimen_tumor_size["Drug Regimen"]  == "Naftisol"]
# naftisol_var = naftisol_var["Tumor Volume (mm3)"]
# naftisol_var = np.var(naftisol_var, ddof = 0)


## Bar plots

65.81770759495988

In [None]:
# Generate a bar plot showing number of data points for each 
# treatment regimen using pandas

In [None]:
# Generate a bar plot showing number of data points for each 
# treatment regimen using pyplot

## Pie plots

In [None]:
# Generate a pie plot showing the distribution of female versus male mice 
# using pandas

In [None]:
# Generate a pie plot showing the distribution of female versus male mice 
# using pyplot

## Quartiles, outliers and boxplots

In [None]:
# Calculate the final tumor volume of each mouse across four of the most 
# promising treatment regimens. Calculate the IQR and quantitatively determine 
# if there are any potential outliers. 

In [None]:
# Generate a box plot of the final tumor volume of each mouse across 
# four regimens of interest

## Line and scatter plots

In [None]:
# Generate a line plot of time point versus tumor volume for a mouse 
# treated with Capomulin

In [None]:
# Generate a scatter plot of mouse weight versus average tumor volume for 
# the Capomulin regimen

In [None]:
# Calculate the correlation coefficient and linear regression model 
# for mouse weight and average tumor volume for the Capomulin regimen