In [40]:
import pandas as pd
from scipy.stats import ttest_ind, ttest_rel, ttest_1samp, f_oneway, shapiro, levene

# Load datasets
df1 = pd.read_csv("Ex1.csv")
df2 = pd.read_csv("Ex2.csv")
df3 = pd.read_csv("Ex3.csv")
df4 = pd.read_csv("Ex4.csv")

In [41]:
# Shapiro function

def normality_result(group_name, group, alpha=0.05):
    stat, p = shapiro(group)
    if p < alpha:
        return f"{group_name} does not have a normal distribution."
    else:
        return f"{group_name} does have a normal distribution."

In [42]:
# Outlier function

def outlier_ratio_check(group, group_name, alpha=0.05):
    group = group.dropna()
    Q1 = group.quantile(0.25)
    Q3 = group.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers = group[(group < lower_bound) | (group > upper_bound)]
    ratio = len(outliers) / len(group)

    return f"Outlier ratio for {group_name}: {ratio:.2%}."

In [43]:
# Levene function

def variance_result(group1, group2, alpha=0.05):
    stat, p = levene(group1, group2)
    if p < alpha:
        return f"The groups are not homoscedastic."
    else:
        return f"The groups are homoscedastic."

In [44]:
# Ex. 1

group1 = df1[df1["Turma"] == "A"]["Nota"]
group2 = df1[df1["Turma"] == "B"]["Nota"]

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Checking variance:
print(variance_result(group1, group2))

# Running related test:
t, p = ttest_ind(group1, group2)
p = p / 2 if t < 0 else 1 - p / 2

alpha = 0.05

if p < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t}, p-value: {p}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 0.00%.
The groups are homoscedastic.
Statistic: 2.25444062957182, p-value: 0.9860197465538326
Fail to reject the null hypothesis.


In [45]:
# Ex. 2

group1 = df2["Antes"]
group2 = df2["Depois"]

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Running related test:
t, p = ttest_rel(group1, group2)
p = p / 2 if t < 0 else 1 - p / 2

alpha = 0.05

if p < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t}, p-value: {p}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 4.00%.
Outlier ratio for Group 2: 0.00%.
Statistic: -16.955329756648446, p-value: 3.661236668369023e-15
Reject the null hypothesis.


In [46]:
# Ex. 3

group1 = df3[df3["Setor"] == "RH"]["Estresse"]
group2 = df3[df3["Setor"] == "TI"]["Estresse"]

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Checking variance:
print(variance_result(group1, group2))

# Running related test:
t, p = ttest_ind(group1, group2)

alpha = 0.05

if p < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t}, p-value: {p}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 0.00%.
The groups are homoscedastic.
Statistic: -2.4687650841812756, p-value: 0.01607631180497714
Reject the null hypothesis.


In [47]:
# Ex. 4

group1 = df4["Tempo_Espera"]

# Checking normality:
print(normality_result("Group 1", group1))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))

# Running related test:
t, p = ttest_1samp(group1, popmean=15)

alpha = 0.05

if p < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t}, p-value: {p}")
print(result)

Group 1 does not have a normal distribution.
Outlier ratio for Group 1: 2.50%.
Statistic: 3.8515611570103028, p-value: 0.00042567755539783337
Reject the null hypothesis.
