In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import io

In [2]:
# CSV data for t-test
t_test_csv = """Employee,Salary
1,52000
2,48000
3,50500
4,53500
5,51000
6,49500
7,54000
8,50000
9,52500
10,51500"""

#  T-test for small sample dataset
# Load t-test data
t_data = pd.read_csv(io.StringIO(t_test_csv))
salaries_t = t_data['Salary']
industry_standard = 50000
alpha = 0.05

In [3]:
# Calculate sample statistics
sample_mean_t = salaries_t.mean()
sample_std_t = salaries_t.std(ddof=1)  # ddof=1 for sample standard deviation
n_t = len(salaries_t)

In [4]:
print("T-test Results:")
print(f"Sample Mean: {sample_mean_t:.2f}")
print(f"Sample Standard Deviation: {sample_std_t:.2f}")
print(f"Sample Size: {n_t}")

T-test Results:
Sample Mean: 51250.00
Sample Standard Deviation: 1844.66
Sample Size: 10


In [5]:
# Two-tailed t-test: H0: μ = 50000 vs H1: μ ≠ 50000
t_stat_two, p_value_two = stats.ttest_1samp(salaries_t, popmean=industry_standard)
print("\nTwo-tailed t-test:")
print(f"t-statistic: {t_stat_two:.4f}")
print(f"p-value: {p_value_two:.4f}")
if p_value_two < alpha:
    print("Reject H0: The average salary differs from $50,000.")
else:
    print("Fail to reject H0: No evidence that the average salary differs from $50,000.")


Two-tailed t-test:
t-statistic: 2.1429
p-value: 0.0607
Fail to reject H0: No evidence that the average salary differs from $50,000.


In [6]:
# One-tailed t-test: H0: μ >= 50000 vs H1: μ < 50000
p_value_one = p_value_two / 2 if t_stat_two < 0 else 1 - (p_value_two / 2)
print("\nOne-tailed t-test (less than):")
print(f"t-statistic: {t_stat_two:.4f}")
print(f"p-value: {p_value_one:.4f}")
if p_value_one < alpha and t_stat_two < 0:
    print("Reject H0: The average salary is less than $50,000.")
else:
    print("Fail to reject H0: No evidence that the average salary is less than $50,000.")


One-tailed t-test (less than):
t-statistic: 2.1429
p-value: 0.9696
Fail to reject H0: No evidence that the average salary is less than $50,000.


In [7]:
#Z-test for large sample dataset

# CSV data for Z-test
z_test_csv = """Employee,Salary
1,51000
2,52000
3,49500
4,50500
5,50000
6,51500
7,49000
8,52500
9,53000
10,50000
11,51000
12,49500
13,50500
14,50000
15,51500
16,49000
17,52500
18,53000
19,50000
20,51000
21,52000
22,49500
23,50500
24,50000
25,51500
26,49000
27,52500
28,53000
29,50000
30,51000"""
# Load Z-test data
z_data = pd.read_csv(io.StringIO(z_test_csv))
salaries_z = z_data['Salary']
pop_std = 2500  # Known population standard deviation
n_z = len(salaries_z)

In [8]:
# Calculate sample mean
sample_mean_z = salaries_z.mean()

print("\nZ-test Results:")
print(f"Sample Mean: {sample_mean_z:.2f}")
print(f"Population Standard Deviation: {pop_std}")
print(f"Sample Size: {n_z}")


Z-test Results:
Sample Mean: 50866.67
Population Standard Deviation: 2500
Sample Size: 30


In [9]:
# Two-tailed Z-test: H0: μ = 50000 vs H1: μ ≠ 50000
z_stat = (sample_mean_z - industry_standard) / (pop_std / np.sqrt(n_z))
p_value_two_z = 2 * (1 - stats.norm.cdf(abs(z_stat)))
print("\nTwo-tailed Z-test:")
print(f"Z-statistic: {z_stat:.4f}")
print(f"p-value: {p_value_two_z:.4f}")
if p_value_two_z < alpha:
    print("Reject H0: The average salary differs from $50,000.")
else:
    print("Fail to reject H0: No evidence that the average salary differs from $50,000.")


Two-tailed Z-test:
Z-statistic: 1.8988
p-value: 0.0576
Fail to reject H0: No evidence that the average salary differs from $50,000.


In [10]:
# One-tailed Z-test: H0: μ <= 50000 vs H1: μ > 50000
p_value_one_z = 1 - stats.norm.cdf(z_stat)
print("\nOne-tailed Z-test (greater than):")
print(f"Z-statistic: {z_stat:.4f}")
print(f"p-value: {p_value_one_z:.4f}")
if p_value_one_z < alpha and z_stat > 0:
    print("Reject H0: The average salary is greater than $50,000.")
else:
    print("Fail to reject H0: No evidence that the average salary is greater than $50,000.")


One-tailed Z-test (greater than):
Z-statistic: 1.8988
p-value: 0.0288
Reject H0: The average salary is greater than $50,000.
