In [1]:
%matplotlib inline 

import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import math

In [None]:
# 1. ONE-SAMPLE T-TEST: 
# To check whether a sample mean differs from the population means

In [2]:
# Generate data

pop = np.random.normal(48, 35, 150000)
sample = np.random.normal(25, 10, 200)

print(pop.mean())
print(sample.mean())

48.140930602625325
25.052334460090407


In [3]:
# Conduct a t-test at a 95% confidence level - Using stats.ttest_1samp()
# Fail to reject the null hypothesis

stats.ttest_1samp(sample, pop.mean()) # (sample data, Pop mean)


Ttest_1sampResult(statistic=-31.51295063616169, pvalue=2.723931244013571e-79)

In [None]:
# 2. TWO-SAMPLE T-TEST
# To check whether the means of two indepent samples differ from one another

In [5]:
# Generate data

sample_1 = np.random.normal(18, 33, 30)
sample_2 = np.random.normal(18, 13, 20)

print(sample_1.mean())
print(sample_2.mean())

18.504320351865278
13.959900038357992


In [6]:
# Conduct two-sample t-test at a 95% confidence level - Using stats.ttest_ind() 
# Fail to reject the null hypothesis


stats.ttest_ind(sample_1, sample_2, equal_var=False)  # Assume samples have equal variance?

Ttest_indResult(statistic=0.7207080944497771, pvalue=0.47543173860330656)

In [None]:
# 3. Paired T-Test
# To check whether the means of samples from the same group differ

In [8]:
# Generate data

np.random.seed(11)

before = stats.norm.rvs(scale=30, loc=250, size=100)

after = before + stats.norm.rvs(scale=5, loc=-1.25, size=100)

weight_df = pd.DataFrame({"weight_before":before,
                          "weight_after":after,
                          "weight_change":after-before})

weight_df.describe() # Check a summary table

Unnamed: 0,weight_before,weight_after,weight_change
count,100.0,100.0,100.0
mean,250.345546,249.115171,-1.230375
std,28.132539,28.422183,4.783696
min,170.400443,165.91393,-11.495286
25%,230.421042,229.148236,-4.046211
50%,250.830805,251.134089,-1.413463
75%,270.637145,268.927258,1.738673
max,314.700233,316.720357,9.759282


In [9]:
# Conduct paired t-test at a 95% confidence level - Using stats.ttest_rel() 
# Reject the null hypothesis. Patients significantly lost 1.23 pounds after the treatment.  

stats.ttest_rel(before, after)

Ttest_relResult(statistic=2.5720175998568284, pvalue=0.011596444318439859)