In [1]:
import pandas as pd
import numpy as np
import scipy.stats as st
import statsmodels.stats.weightstats as sm

In [3]:
def p_value_reader(p_value, alpha):
    if p_value <= alpha:
        print("Reject Null Hypothesis")
    else:
        print("Fail to reject Null Hypothesis")

In [4]:
df_main = pd.read_csv("tesla_main.csv")
df_main.head()

Unnamed: 0,Production Date,Defects Found,Cars Produced,Weather Condition,Workers on Shift
0,2023-01-01,3,55,Rainy,20
1,2023-01-02,2,57,Rainy,19
2,2023-01-03,1,54,Rainy,21
3,2023-01-04,0,56,Rainy,22
4,2023-01-05,2,59,Rainy,20


In [5]:
# Population mean
mean_pop = 54

# Population standard deviation
sd_pop = 2

# Confidence level
confidence = 0.95

# Significance level (alpha)
alpha = 1 - confidence

# Calculating the sample mean from the 'Cars Produced' column in df_main
mean_sample = df_main['Cars Produced'].mean()
print(f"The sample mean is {mean_sample}")

# Calculating the sample size from the 'Cars Produced' column in df_main
sample_size = df_main['Cars Produced'].count()
print(f"The sample size is {sample_size}")

The sample mean is 55.10909090909091
The sample size is 55


In [6]:
# Calculate the Z-score using the formula: (sample mean - population mean) / (population standard deviation / sqrt(sample size))
z_score = (mean_sample - mean_pop) / (sd_pop / np.sqrt(sample_size))
print(f"The Z-score is {z_score}")

The Z-score is 4.112619161025777


In [7]:
# Calculate the p-value from the Z-score (two-tailed test)
tails = 2
p_value = st.norm.sf(abs(z_score)) * tails
print(f"The p-value is {p_value}")

The p-value is 3.9119543361101206e-05


In [10]:
p_value_reader(p_value, alpha)

Reject Null Hypothesis


In [11]:
# Build a function to compute the z-test
def ztest(mean_pop, mean_sample, sample_size, sd_pop, alpha, tails):
    # Calculate the Z-score using the formula: (sample mean - population mean) / (population standard deviation / sqrt(sample size))
    z_score = (mean_sample - mean_pop) / (sd_pop / np.sqrt(sample_size))
    print(f"The Z-score is {z_score}")

    # Calculate the p-value from the Z-score (two-tailed test)
    p_value = st.norm.sf(abs(z_score)) * tails
    print(f"The p-value is {p_value}")

    # Interpret the p-value against the alpha threshold
    p_value_reader(p_value, alpha)

In [12]:
# Apply the function
ztest(mean_pop, mean_sample, sample_size, sd_pop, alpha, tails)

The Z-score is 4.112619161025777
The p-value is 3.9119543361101206e-05
Reject Null Hypothesis


In [13]:
# Target mean for the population
target_mean = 2.2

# Calculate the sample mean from the 'Defects Found' column in df_main
mean_sample = df_main['Defects Found'].mean()
print(f"The sample mean is {mean_sample}")

# Calculate the sample size from the 'Defects Found' column in df_main
sample_size = df_main['Defects Found'].count()
print(f"The sample size is {sample_size}")

# Confidence level
confidence = 0.95

# Significance level (alpha)
alpha = 1 - confidence

# Calculate the sample standard deviation from the 'Defects Found' column in df_main
sample_sd = df_main['Defects Found'].std()
print(f"The SD is {sample_sd}")

The sample mean is 2.3636363636363638
The sample size is 55
The SD is 1.0777829844714388


In [14]:
# Calculate the T-score using the formula: (sample mean - target mean) / (sample standard deviation / sqrt(sample size))
t_score = (mean_sample - target_mean) / (sample_sd / np.sqrt(sample_size))
print(f"The T-score is {t_score}")

The T-score is 1.1259778359082033


In [15]:
# Calculate the p-value from the T-score (two-tailed test) using the Student's t-distribution
tails = 2
p_value = st.t.sf(abs(t_score), df=(sample_size - 1)) * tails
print(f"The p-value is {p_value}")

The p-value is 0.2651542493629725


In [16]:
# Build in method
t_stat, p_value = st.ttest_1samp(
    df_main['Defects Found'],
    popmean=target_mean
)

print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

T-statistic: 1.1259778359082033
P-value: 0.2651542493629725
