In [12]:
import numpy as np
from scipy.stats import t

# Data: times between keystrokes (in seconds)
keystroke_times = [0.24, 0.22, 0.26, 0.34, 0.35, 0.32, 0.33, 0.29, 0.19, 
                   0.36, 0.30, 0.15, 0.17, 0.28, 0.38, 0.40, 0.37, 0.27]

# Calculate sample mean and standard deviation
mean_time = np.mean(keystroke_times)
std_dev = np.std(keystroke_times, ddof=1)  # Use ddof=1 for sample standard deviation
n = len(keystroke_times)

# Degrees of freedom
df = n - 1
print(f"Sample Mean: {mean_time:.4f},\nSample Standard Deviation: {std_dev:.4f},\nSample Size: {n}")

# t critical value for 99% confidence interval
alpha = 0.05
t_critical = t.ppf(1 - alpha/2, df)
print(f"t Critical Value (95% CI): {t_critical:.4f}")

# Margin of error
margin_of_error = t_critical * (std_dev / np.sqrt(n))

# Confidence interval
lower_bound = mean_time - margin_of_error
upper_bound = mean_time + margin_of_error

print(f"95% Confidence Interval: ({lower_bound:.4f}, {upper_bound:.4f})")

t_statistic = (mean_time - 0.2) / (std_dev / np.sqrt(n))
p_value = 2 * (1 - t.cdf(abs(t_statistic), df))
print(f"t Statistic: {t_statistic:.4f}")
if t_statistic > t_critical or t_statistic < -t_critical:
    print("Reject the null hypothesis: The mean time is significantly different from 0.2 seconds.")
else:
    print("Fail to reject the null hypothesis: The mean time is not significantly different from 0.2 seconds.") 

Sample Mean: 0.2900,
Sample Standard Deviation: 0.0740,
Sample Size: 18
t Critical Value (95% CI): 2.1098
95% Confidence Interval: (0.2532, 0.3268)
t Statistic: 5.1625
Reject the null hypothesis: The mean time is significantly different from 0.2 seconds.


In [6]:
import numpy as np
from scipy.stats import t
from scipy.stats import f

alpha = 0.05
# Data for the two groups
A = [11.9, 12.1, 12.8, 12.2, 12.5, 11.9, 12.5, 11.8, 12.4, 12.9]
B = [12.1, 12.0, 12.9, 12.2, 12.7, 12.6, 12.6, 12.8, 12.0, 13.1]

# Sample sizes
n_A = len(A)
n_B = len(B)
print(f"Sample size of A: {n_A}")
print(f"Sample size of B: {n_B}")

# Sample means
mean_A = np.mean(A)
mean_B = np.mean(B)
print(f"Mean of A: {mean_A:.4f}")
print(f"Mean of B: {mean_B:.4f}")

# Sample standard deviations
std_A = np.std(A, ddof=1)
std_B = np.std(B, ddof=1)
print(f"Standard deviation of A: {std_A:.4f}, variance: {std_A**2:.4f}")
print(f"Standard deviation of B: {std_B:.4f}, variance: {std_B**2:.4f}")

# F-test for equality of variances
F = (std_A**2) / (std_B**2) if std_A > std_B else (std_B**2) / (std_A**2)
df1 = n_A - 1 if std_A > std_B else n_B - 1
df2 = n_B - 1 if std_A > std_B else n_A - 1
F_critical = f.ppf(1 - alpha/2, df1, df2)
print(f"F-statistic: {F:.4f}")
print(f"Critical F-value: {F_critical:.4f}")

if F < F_critical:
    print("Fail to reject the null hypothesis: Variances can be assumed equal.")
else:
    print("Reject the null hypothesis: Variances cannot be assumed equal.")

# Pooled standard deviation (if variances are equal)
if F < F_critical:
    sp = np.sqrt(((n_A - 1) * std_A**2 + (n_B - 1) * std_B**2) / (n_A + n_B - 2))
    print(f"Pooled standard deviation: {sp:.4f}")

    # t-statistic
    t_stat = (mean_A - mean_B) / (sp * np.sqrt(1/n_A + 1/n_B))
    print(f"t-statistic: {t_stat:.4f}")

    # Degrees of freedom
    df = n_A + n_B - 2
    print(f"Degrees of freedom: {df}")

    # Critical t-value for one-tailed test at alpha = 0.05
    t_critical = t.ppf(alpha, df)
    print(f"Critical t-value: {t_critical:.4f}")

    # Decision
    if t_stat < t_critical:
        print("Reject the null hypothesis: A worked better than B.")
    else:
        print("Fail to reject the null hypothesis: No evidence that A worked better than B.")
else:
    print("Cannot proceed with pooled t-test as variances are not equal.")

Sample size of A: 10
Sample size of B: 10
Mean of A: 12.3000
Mean of B: 12.5000
Standard deviation of A: 0.3830, variance: 0.1467
Standard deviation of B: 0.3972, variance: 0.1578
F-statistic: 1.0758
Critical F-value: 4.0260
Fail to reject the null hypothesis: Variances can be assumed equal.
Pooled standard deviation: 0.3902
t-statistic: -1.1462
Degrees of freedom: 18
Critical t-value: -1.7341
Fail to reject the null hypothesis: No evidence that A worked better than B.


In [7]:
# Given data
x1_mean = 6.7  # Mean of server 1
x2_mean = 7.2  # Mean of server 2
sigma1 = 0.5   # Standard deviation of server 1
sigma2 = 0.5   # Standard deviation of server 2
n1 = 30        # Sample size of server 1
n2 = 20        # Sample size of server 2
z_critical = 1.96  # Critical z-value for 95% confidence level

# Step 1: Calculate the standard error (SE)
se = ((sigma1**2 / n1) + (sigma2**2 / n2))**0.5
print(f"Standard Error (SE): {se:.4f}")

# Step 2: Calculate the z-statistic
z = (x1_mean - x2_mean) / se
print(f"z-statistic: {z:.4f}")

# Step 3: Compare z-statistic with critical z-value
if abs(z) > z_critical:
    print("Reject the null hypothesis: There is a significant difference between the two servers.")
else:
    print("Fail to reject the null hypothesis: No significant difference between the two servers.")

Standard Error (SE): 0.1443
z-statistic: -3.4641
Reject the null hypothesis: There is a significant difference between the two servers.


In [1]:
import numpy as np

# Data for November 15 and November 29
nov_15 = [20.9, 17.1, 15.8, 18.8, 20.1, 15.6, 14.8, 24.1, 18.9, 12.5]
nov_29 = [21.4, 16.7, 16.4, 19.2, 19.9, 16.6, 15.0, 24.0, 19.2, 13.2]

# Step 1: Calculate the differences
differences = np.array(nov_15) - np.array(nov_29)
print(f"Differences: {differences}")

# Step 2: Calculate the mean of the differences
mean_diff = np.mean(differences)
print(f"Mean of differences: {mean_diff:.4f}")

# Step 3: Calculate the standard deviation of the differences
std_diff = np.std(differences, ddof=1)  # Use ddof=1 for sample standard deviation
print(f"Standard deviation of differences: {std_diff:.4f}")

# Step 4: Calculate the standard error of the mean (SEM)
n = len(differences)
sem = std_diff / np.sqrt(n)
print(f"Standard error of the mean (SEM): {sem:.4f}")

# Step 5: Calculate the t-statistic
t_stat = mean_diff / sem
print(f"t-statistic: {t_stat:.4f}")

# Step 6: Compare the t-statistic with the critical t-value
t_critical = 2.262  # Given t-critical value for t(9, 0.975)
print(f"Critical t-value: {t_critical}")

# Step 7: Decision
if abs(t_stat) > t_critical:
    print("Reject the null hypothesis: There is a significant change in air pollution levels.")
else:
    print("Fail to reject the null hypothesis: No significant change in air pollution levels.")

Differences: [-0.5  0.4 -0.6 -0.4  0.2 -1.  -0.2  0.1 -0.3 -0.7]
Mean of differences: -0.3000
Standard deviation of differences: 0.4346
Standard error of the mean (SEM): 0.1374
t-statistic: -2.1828
Critical t-value: 2.262
Fail to reject the null hypothesis: No significant change in air pollution levels.
