### Imports

In [1]:
from scipy import stats
import numpy as np

### Generate Data

In [2]:
# Generate data
N = 10 # Size of each group
a = np.random.randn(N) + 2 # Normal/Gaussian distributed data with mean 2
b = np.random.randn(N) # Normal/Gaussian distributed data with mean 0

In [3]:
a

array([ 3.05833752, -1.22903911,  2.9885035 ,  2.36158897,  2.67411   ,
        3.27384326,  1.08638842,  1.98293385,  1.25733   ,  1.75825336])

In [4]:
b

array([ 0.75157889,  0.87754102, -0.85788306,  0.19454317, -0.46037028,
        0.27022969,  0.68190496, -0.27027175,  1.51359477,  0.63500727])

### Method 0: Fully Manual Calcs

In [5]:
mean_a = sum(a)/len(a)

In [6]:
mean_b = sum(b)/len(b)

In [7]:
var_a_manual = 1/(len(a)-1)*sum([(i - mean_a)**2 for i in a])

In [8]:
var_b_manual = 1/(len(b)-1)*sum([(i - mean_b)**2 for i in b])

In [9]:
print('Mean, var a: {}, {} | Mean, var b: {}, {}'.format(mean_a, var_a_manual, mean_b, var_b_manual))

Mean, var a: 1.9212249775389147, 1.7905201237206094 | Mean, var b: 0.3335874669985309, 0.5014625453762026


### Method 1: np Manual Calcs

In [10]:
# Calc variances
var_a = a.var(ddof=1) # ddof=1 goes from MLE of variance to unbiased estimate (N-1 instead of N)
var_b = b.var(ddof=1)

In [11]:
print('Mean, var a: {}, {} | Mean, var b: {}, {}'.format(a.mean(), var_a, b.mean(), var_b))

Mean, var a: 1.9212249775389154, 1.7905201237206099 | Mean, var b: 0.3335874669985309, 0.5014625453762026


In [12]:
# Calc pooled stdev
s = np.sqrt((var_a + var_b) / 2)

In [13]:
# Calc t stat
t = (a.mean() - b.mean()) / (s * np.sqrt(2.0/N))

In [14]:
# Calc degrees of freedom
df = 2*N - 2

In [15]:
# Get P-value
p = 1 - stats.t.cdf(t, df=df)

In [16]:
print("t:{}, p:{}".format(t, 2*p))

t:3.3162379626958884, p:0.0038410912076545234


### Method 2: Builtin Scipy Functions

In [17]:
t2, p2 = stats.ttest_ind(a, b)

In [18]:
print("t2:{}, p2:{}".format(t2, p2))

t2:3.3162379626958884, p2:0.0038410912076545125
