In [3]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import plotly.graph_objects as go
import plotly.express as px

# Load data (using packing_machine.csv as it corresponds to davkovac.txt)
x = pd.read_csv("packing_machine.csv", header=None).values.flatten()

In [4]:
# Q-Q Plot
qq = stats.probplot(x, dist="norm")
theoretical_quantiles = qq[0][0]
sample_quantiles = qq[0][1]
slope = qq[1][0]
intercept = qq[1][1]

fig = go.Figure()
fig.add_trace(
    go.Scatter(x=theoretical_quantiles, y=sample_quantiles, mode="markers", name="Data")
)

# Reference line
line_x = np.array([min(theoretical_quantiles), max(theoretical_quantiles)])
line_y = slope * line_x + intercept
fig.add_trace(
    go.Scatter(
        x=line_x, y=line_y, mode="lines", name="Normal Line", line=dict(color="red")
    )
)

fig.update_layout(
    title="Q-Q Plot (Normal)",
    xaxis_title="Theoretical Quantiles",
    yaxis_title="Sample Quantiles",
)
fig.show()

In [5]:
# Shapiro-Wilk Normality Test
shapiro_stat, shapiro_p = stats.shapiro(x)
print(f"Shapiro-Wilk Test: W = {shapiro_stat:.5f}, p-value = {shapiro_p:.4f}")

Shapiro-Wilk Test: W = 0.98406, p-value = 0.7307


In [8]:
# T-test (Default, mu=25)
# In Python's scipy.stats, the default alternative is 'two-sided', matching R's default.
mu_0 = 25
t_stat_def, p_val_def = stats.ttest_1samp(x, mu_0)
print(f"T-test (Default): t = {t_stat_def:.4f}, p-value = {p_val_def:.4f}")

T-test (Default): t = -6.9875, p-value = 0.0000


In [6]:
# T-test (Two-sided, mu=25)
mu_0 = 25
t_stat, p_val = stats.ttest_1samp(x, mu_0, alternative="two-sided")
print(f"T-test (Two-sided): t = {t_stat:.4f}, p-value = {p_val:.4f}")

# Visualization
df = len(x) - 1
x_range = np.linspace(stats.t.ppf(0.001, df), stats.t.ppf(0.999, df), 1000)
y_pdf = stats.t.pdf(x_range, df)

fig = go.Figure()
fig.add_trace(go.Scatter(x=x_range, y=y_pdf, mode="lines", name="T-Distribution"))

# Critical Region (Two-sided, alpha=0.05)
alpha = 0.05
crit_val = stats.t.ppf(1 - alpha / 2, df)
x_crit_right = np.linspace(crit_val, max(x_range), 100)
y_crit_right = stats.t.pdf(x_crit_right, df)
x_crit_left = np.linspace(min(x_range), -crit_val, 100)
y_crit_left = stats.t.pdf(x_crit_left, df)

fig.add_trace(
    go.Scatter(
        x=np.concatenate(([crit_val], x_crit_right, [max(x_range)])),
        y=np.concatenate(([0], y_crit_right, [0])),
        fill="toself",
        fillcolor="rgba(255,0,0,0.3)",
        line=dict(width=0),
        name="Rejection Region",
    )
)
fig.add_trace(
    go.Scatter(
        x=np.concatenate(([min(x_range)], x_crit_left, [-crit_val])),
        y=np.concatenate(([0], y_crit_left, [0])),
        fill="toself",
        fillcolor="rgba(255,0,0,0.3)",
        line=dict(width=0),
        showlegend=False,
    )
)

# Test Statistic
fig.add_vline(x=t_stat, line_color="green", annotation_text=f"t-stat={t_stat:.2f}")

fig.update_layout(
    title=f"T-test (Two-sided, mu={mu_0})", xaxis_title="t-value", yaxis_title="Density"
)
fig.show()

T-test (Two-sided): t = -6.9875, p-value = 0.0000


In [7]:
# T-test (Greater, mu=25)
t_stat_g, p_val_g = stats.ttest_1samp(x, mu_0, alternative="greater")
print(f"T-test (Greater): t = {t_stat_g:.4f}, p-value = {p_val_g:.4f}")

# Visualization
fig = go.Figure()
fig.add_trace(go.Scatter(x=x_range, y=y_pdf, mode="lines", name="T-Distribution"))

# Critical Region (Greater, alpha=0.05)
crit_val_g = stats.t.ppf(1 - alpha, df)
x_crit = np.linspace(crit_val_g, max(x_range), 100)
y_crit = stats.t.pdf(x_crit, df)

fig.add_trace(
    go.Scatter(
        x=np.concatenate(([crit_val_g], x_crit, [max(x_range)])),
        y=np.concatenate(([0], y_crit, [0])),
        fill="toself",
        fillcolor="rgba(255,0,0,0.3)",
        line=dict(width=0),
        name="Rejection Region",
    )
)

# Test Statistic
fig.add_vline(x=t_stat_g, line_color="green", annotation_text=f"t-stat={t_stat_g:.2f}")

fig.update_layout(
    title=f"T-test (Greater, mu={mu_0})", xaxis_title="t-value", yaxis_title="Density"
)
fig.show()

T-test (Greater): t = -6.9875, p-value = 1.0000
