### Sample Sums of Squares
$SS$ = $\Sigma({X}-\overline{X})^2$ = $\Sigma{X}^2$ $-$ $\dfrac{(\Sigma{X})^2}{n}$

### THE POOLED VARIANCE

$S_P^2 = \dfrac{SS_1 + SS_2}{n_1 + n_2 - 2}$

$S_P^2 = \dfrac{({n_1 - 1})s_1^2 + ({n_2 - 1})s_2^2}{n_1 + n_2 - 2}$

### STANDARD ERROR

$S_{\overline{X}_1-\overline{X}_2} = \sqrt{\dfrac{S^2_P}{n_1} + \dfrac{S^2_P}{n_2}}$

In [1]:
import scipy.stats as stats
import math

# Given data
mean_treatment = 110
mean_control = 108
std_error = 1.80
sample_size = 35
significance_level = 0.01

# Calculate the t-statistic
t_statistic = (mean_treatment - mean_control) / (std_error * math.sqrt(2/sample_size))

# Calculate degrees of freedom
degrees_of_freedom = 2 * sample_size - 2

# Calculate critical t-value
alpha = significance_level / 2  # Two-tailed test
critical_t_value = stats.t.ppf(1 - alpha, degrees_of_freedom)

# Print results
print(f"T-Statistic: {t_statistic}")
print(f"Critical T-Value: {critical_t_value}")
print(f"Degrees of Freedom: {degrees_of_freedom}")

# Check if we reject the null hypothesis
if abs(t_statistic) > critical_t_value:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

# Print the standard error
print(f"Standard Error: {std_error}")

T-Statistic: 4.648111258522642
Critical T-Value: 2.6500812928169553
Degrees of Freedom: 68
Reject the null hypothesis
Standard Error: 1.8


In [2]:
import statistics
import math

homo = [39.1,11.0,33.4,19.5,35.7,8.7,38.0,20.7,13.7,11.4,41.5,23.0,14.9,26.4,46.1,24.1,18.4,14.3,20.7,35.7,13.7,17.2,36.8,5.3,19.5,26.4,23.0,38.0,54.1,6.3,32.2,28.8,20.7,10.3,11.4]
print(len(homo))
nonhomo = [24.0,10.1,20.0,30.9,26.9,17.0,16.1,14.1,22.0,5.2,35.8,-0.7,-1.7,6.2,13.1,18.0,14.1,19.0,27.9,19.0,-1.7,25.9,20.0,14.1,-15.5,11.1,23.0,30.9,33.8]
print(len(nonhomo))

homo_mean = statistics.mean(homo)
homo_variance = statistics.variance(homo)
print(f'homo_mean = {homo_mean}')
print(f'homo_variance = {homo_variance}')
nonhomo_mean = statistics.mean(nonhomo)
nonhomo_variance = statistics.variance(nonhomo)
print(f'nonhomo_mean = {nonhomo_mean}')
print(f'nonhomo_variance = {nonhomo_variance}')

35
29
homo_mean = 24.0
homo_variance = 148.87058823529412
nonhomo_mean = 16.50344827586207
nonhomo_variance = 139.15891625615762


In [3]:
X1 = [39.1,11.0,33.4,19.5,35.7,8.7,38.0,20.7,13.7,11.4,41.5,23.0,14.9,26.4,46.1,24.1,18.4,14.3,20.7,35.7,13.7,17.2,36.8,5.3,19.5,26.4,23.0,38.0,54.1,6.3,32.2,28.8,20.7,10.3,11.4]
X2 = [24.0,10.1,20.0,30.9,26.9,17.0,16.1,14.1,22.0,5.2,35.8,-0.7,-1.7,6.2,13.1,18.0,14.1,19.0,27.9,19.0,-1.7,25.9,20.0,14.1,-15.5,11.1,23.0,30.9,33.8]

def t_Test_two_samples(X1,X2):
    import statistics
    import math
    n1 = len(X1)
    n2 = len(X2)
    u_hyp = 0
    
    mean_X1 = statistics.mean(X1)
    mean_X2 = statistics.mean(X2)
    print(f'mean_X1 = {mean_X1}; mean_X2 = {mean_X2}')
    
    sum_of_X1_list = sum(X1)
    sum_of_X2_list = sum(X2)
    print(f'sum_of_X1_list = {sum_of_X1_list}; sum_of_X2_list = {sum_of_X2_list}')
    
    square_of_each_X1 = [num1**2 for num1 in X1]
    square_of_each_X2 = [num2**2 for num2 in X2]
    
    sum_of_square_of_each_X1 = sum(square_of_each_X1)
    sum_of_square_of_each_X2 = sum(square_of_each_X2)
    print(f'sum_of_square_of_each_X1 = {sum_of_square_of_each_X1}; sum_of_square_of_each_X2 = {sum_of_square_of_each_X2}')
    
    SS1 = sum_of_square_of_each_X1 - (sum_of_X1_list**2/n1)
    SS2 = sum_of_square_of_each_X2 - (sum_of_X2_list**2/n1)
    print(f'SS1 = {SS1}; SS2 = {SS2}')
    
    pooled_variance = (SS1+SS2)/(n1+n2-2)
    print(f'pooled_variance = {pooled_variance}')
    
    std_error = math.sqrt((pooled_variance/n1)+(pooled_variance/n2))
    print(f'std_error = {std_error}')
    
    t_ratio = ((mean_X1-mean_X2)-u_hyp)/std_error
    print(f't_ratio = {t_ratio}')
t_Test_two_samples(X1,X2)

mean_X1 = 24.0; mean_X2 = 16.50344827586207
sum_of_X1_list = 839.9999999999998; sum_of_X2_list = 478.6
sum_of_square_of_each_X1 = 25221.599999999995; sum_of_square_of_each_X2 = 11794.999999999998
SS1 = 5061.600000000006; SS2 = 5250.486857142855
pooled_variance = 166.32398156682035
std_error = 3.2384291667802017
t_ratio = 2.314872840523282


### THE POOLED VARIANCE

$S_P^2 = \dfrac{SS_1 + SS_2}{n_1 + n_2 - 2}$

$S_P^2 = \dfrac{({n_1 - 1})s_1^2 + ({n_2 - 1})s_2^2}{n_1 + n_2 - 2}$

In [4]:
import numpy as np
import scipy.stats as stats

# Given data
homo = [39.1, 11.0, 33.4, 19.5, 35.7, 8.7, 38.0, 20.7, 13.7, 11.4, 41.5, 23.0, 14.9, 26.4, 46.1, 24.1, 18.4, 14.3, 20.7, 35.7, 13.7, 17.2, 36.8, 5.3, 19.5, 26.4, 23.0, 38.0, 54.1, 6.3, 32.2, 28.8, 20.7, 10.3, 11.4]
nonhomo = [24.0, 10.1, 20.0, 30.9, 26.9, 17.0, 16.1, 14.1, 22.0, 5.2, 35.8, -0.7, -1.7, 6.2, 13.1, 18.0, 14.1, 19.0, 27.9, 19.0, -1.7, 25.9, 20.0, 14.1, -15.5, 11.1, 23.0, 30.9, 33.8]

# homo = [5,20,7,23,30,24,9,8,20,12]
# nonhomo = [13,6,6,5,3,6,10,20,9,12]

# homo = [12,5,11,11,9,18]
# nonhomo = [7,3,4,6,3,13]

# Calculate the pooled variance
var_homo = np.var(homo, ddof=1)
var_nonhomo = np.var(nonhomo, ddof=1)
pooled_var = ((len(homo) - 1) * var_homo + (len(nonhomo) - 1) * var_nonhomo) / (len(homo) + len(nonhomo) - 2)

# Calculate the sum of squares
ss_homo = (len(homo) - 1) * var_homo
ss_nonhomo = (len(nonhomo) - 1) * var_nonhomo

# Calculate the standard error
se = np.sqrt(pooled_var * (1/len(homo) + 1/len(nonhomo)))

# Calculate the t ratio
t_ratio = (np.mean(homo) - np.mean(nonhomo)) / se

# Calculate Cohen's d
cohen_d = (np.mean(homo) - np.mean(nonhomo)) / np.sqrt(pooled_var)

# Print results
print(f"Pooled Variance: {pooled_var}")
print(f"Sum of Squares (Homo): {ss_homo}")
print(f"Sum of Squares (Nonhomo): {ss_nonhomo}")
print(f"Standard Error: {se}")
print(f"T Ratio: {t_ratio}")
print(f"Cohen's d: {cohen_d}")

Pooled Variance: 144.4846718576196
Sum of Squares (Homo): 5061.6
Sum of Squares (Nonhomo): 3896.4496551724133
Standard Error: 3.0183378779888312
T Ratio: 2.483668836019449
Cohen's d: 0.6236639684839361


In [5]:
X1 = [39.1, 11.0, 33.4, 19.5, 35.7, 8.7, 38.0, 20.7, 13.7, 11.4, 41.5, 23.0, 14.9, 26.4, 46.1, 24.1, 18.4, 14.3, 20.7, 35.7, 13.7, 17.2, 36.8, 5.3, 19.5, 26.4, 23.0, 38.0, 54.1, 6.3, 32.2, 28.8, 20.7, 10.3, 11.4]
X2 = [24.0, 10.1, 20.0, 30.9, 26.9, 17.0, 16.1, 14.1, 22.0, 5.2, 35.8, -0.7, -1.7, 6.2, 13.1, 18.0, 14.1, 19.0, 27.9, 19.0, -1.7, 25.9, 20.0, 14.1, -15.5, 11.1, 23.0, 30.9, 33.8]

# X1 = [5,20,7,23,30,24,9,8,20,12]
# X2 = [13,6,6,5,3,6,10,20,9,12]

# X1 = [12,5,11,11,9,18]
# X2 = [7,3,4,6,3,13]
def t_Test_two_samples(X1,X2):
    import statistics
    import math
    n1 = len(X1)
    n2 = len(X2)
    u_hyp = 0
    
    mean_X1 = statistics.mean(X1)
    mean_X2 = statistics.mean(X2)
    print(f'mean_X1 = {mean_X1}; mean_X2 = {mean_X2}')
    
    sum_of_X1_list = sum(X1)
    sum_of_X2_list = sum(X2)
    print(f'sum_of_X1_list = {sum_of_X1_list}; sum_of_X2_list = {sum_of_X2_list}')
    
    square_of_each_X1 = [num1**2 for num1 in X1]
    square_of_each_X2 = [num2**2 for num2 in X2]
    
    sum_of_square_of_each_X1 = sum(square_of_each_X1)
    sum_of_square_of_each_X2 = sum(square_of_each_X2)
    print(f'sum_of_square_of_each_X1 = {sum_of_square_of_each_X1}; sum_of_square_of_each_X2 = {sum_of_square_of_each_X2}')
    
    SS1 = sum_of_square_of_each_X1 - (sum_of_X1_list**2/n1)
    SS2 = sum_of_square_of_each_X2 - (sum_of_X2_list**2/n1)
    print(f'SS1 = {SS1}; SS2 = {SS2}')
    
    pooled_variance = (SS1+SS2)/(n1+n2-2)
    print(f'pooled_variance = {pooled_variance}')
    
    std_error = math.sqrt((pooled_variance/n1)+(pooled_variance/n2))
    print(f'std_error = {std_error}')
    
    t_ratio = ((mean_X1-mean_X2)-u_hyp)/std_error
    print(f't_ratio = {t_ratio}')
t_Test_two_samples(X1,X2)

mean_X1 = 24.0; mean_X2 = 16.50344827586207
sum_of_X1_list = 839.9999999999998; sum_of_X2_list = 478.6
sum_of_square_of_each_X1 = 25221.599999999995; sum_of_square_of_each_X2 = 11794.999999999998
SS1 = 5061.600000000006; SS2 = 5250.486857142855
pooled_variance = 166.32398156682035
std_error = 3.2384291667802017
t_ratio = 2.314872840523282


In [6]:
import scipy.stats as stats
import math

# Given data
mean_treatment = 110
mean_control = 108
std_error = 1.80
sample_size = 35
significance_level = 0.01

# Calculate the standardized effect size (Cohen's d)
mean_difference = mean_treatment - mean_control
pooled_std_dev = math.sqrt(((sample_size - 1) * std_error ** 2) / (2 * sample_size - 2))
cohen_d = mean_difference / pooled_std_dev

# Calculate the critical t-value
alpha = significance_level / 2  # Two-tailed test
degrees_of_freedom = 2 * sample_size - 2
critical_t_value = stats.t.ppf(1 - alpha, degrees_of_freedom)

# Calculate the margin of error for the 99% confidence interval
margin_of_error = critical_t_value * std_error * math.sqrt(2/sample_size)

# Construct the confidence interval
confidence_interval_lower = mean_difference - margin_of_error
confidence_interval_upper = mean_difference + margin_of_error

# Print results
print(f"Cohen's d (Effect Size): {cohen_d}")
print(f"99% Confidence Interval: ({confidence_interval_lower}, {confidence_interval_upper})")
print(f"Critical T-Value: {critical_t_value}")
print(f"Degrees of Freedom: {degrees_of_freedom}")

Cohen's d (Effect Size): 1.5713484026367723
99% Confidence Interval: (0.8597169278347874, 3.1402830721652126)
Critical T-Value: 2.6500812928169553
Degrees of Freedom: 68


In [7]:
import numpy as np

# Given data
mean_treatment = 110
mean_control = 108
std_error = 1.80
sample_size = 35

# Calculate Cohen's d
cohen_d = (mean_treatment - mean_control) / std_error

# Calculate pooled variance
var_treatment = std_error**2 * (sample_size - 1)
var_control = std_error**2 * (sample_size - 1)
pooled_var = (var_treatment + var_control) / (sample_size + sample_size - 2)

# Calculate estimated standard error
estimated_se = np.sqrt(pooled_var * (1/sample_size + 1/sample_size))

# Print results
print(f"Cohen's d: {cohen_d}")
print(f"Pooled Variance: {pooled_var}")
print(f"Estimated Standard Error: {estimated_se}")

Cohen's d: 1.1111111111111112
Pooled Variance: 3.24
Estimated Standard Error: 0.4302822993603817


In [8]:
import scipy.stats as stats

# Given data
mean_letter_grade = 86.2
mean_pass_fail = 81.6
std_error = 1.50
sample_size = 20  # Equal numbers in both groups

# Calculate the t-statistic
t_statistic = (mean_letter_grade - mean_pass_fail) / (std_error * (2 / sample_size)**0.5)

# Calculate degrees of freedom
degrees_of_freedom = 2 * sample_size - 2

# Calculate the critical value of t for a two-tailed test at the 0.05 significance level
alpha = 0.05
critical_t_value = stats.t.ppf(1 - alpha / 2, degrees_of_freedom)

# Calculate pooled variance
pooled_var = ((sample_size - 1) * std_error ** 2 + (sample_size - 1) * std_error ** 2) / (2 * sample_size - 2)

# Calculate estimated standard error
estimated_se = (pooled_var * (1/sample_size + 1/sample_size))**0.5

# Print results
print(f"T-Statistic: {t_statistic}")
print(f"Critical T-Value: {critical_t_value}")
print(f"Pooled Variance: {pooled_var}")
print(f"Estimated Standard Error: {estimated_se}")

T-Statistic: 9.697651491183048
Critical T-Value: 2.024394164575136
Pooled Variance: 2.25
Estimated Standard Error: 0.4743416490252569
