In [2]:
import pandas as pd
from scipy.stats import ttest_ind, ttest_rel, ttest_1samp, shapiro, levene

df = pd.read_csv('medical.csv')
df.head()

Unnamed: 0,id,sexo,idade,peso,altura,hdl_antes,hdl_depois,ldl_antes,ldl_depois
0,1,M,51,72.1,1.91,47.3,44.9,127.3,122.0
1,2,F,58,55.4,1.72,59.5,67.1,159.9,145.4
2,3,M,68,82.6,1.64,47.5,41.7,134.5,120.8
3,4,M,71,85.9,1.87,63.0,71.7,139.5,124.7
4,5,M,51,78.8,1.8,43.2,39.6,136.5,141.2


In [12]:
# Shapiro function

def normality_result(group_name, group, alpha=0.05):
    stat, p = shapiro(group)
    if p < alpha:
        return f"{group_name} does not have a normal distribution."
    else:
        return f"{group_name} does have a normal distribution."

In [13]:
# Outlier function

def outlier_ratio_check(group, group_name, alpha=0.05):
    group = group.dropna()
    Q1 = group.quantile(0.25)
    Q3 = group.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers = group[(group < lower_bound) | (group > upper_bound)]
    ratio = len(outliers) / len(group)

    return f"Outlier ratio for {group_name}: {ratio:.2%}."

In [14]:
# Levene function

def variance_result(group1, group2, alpha=0.05):
    stat, p = levene(group1, group2)
    if p < alpha:
        return f"The groups are not homoscedastic."
    else:
        return f"The groups are homoscedastic."

In [15]:
# Ex. 1

group1 = df[(df['sexo'] == 'F') & (df['idade'] > 50)]['hdl_antes']
group2 = df[(df['sexo'] == 'F') & (df['idade'] > 50)]['hdl_depois']

# Checking normality
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Running related test:
t_stat, p_bilateral = ttest_rel(group2, group1)
p_side = p_bilateral / 2 if t_stat > 0 else 1 - p_bilateral / 2

alpha = 0.05

if p_side < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_side}")
print(result)


Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 1.82%.
Statistic: 5.015816208199954, p-value: 3.0303046084999733e-06
Reject the null hypothesis.


In [20]:
# Ex. 2

group1 = df[df['sexo'] == 'F']['ldl_depois']
group2 = df[df['sexo'] == 'M']['ldl_depois']

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Checking variance:
print(variance_result(group1, group2))

# Running related test:
t_stat, p_bilateral = ttest_ind(group2, group1)

alpha = 0.05

if p_bilateral < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_bilateral}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 0.00%.
The groups are homoscedastic.
Statistic: 0.7990910887047518, p-value: 0.4251951193372746
Fail to reject the null hypothesis.


In [21]:
# Ex. 3

group1 = df[df['sexo'] == 'M']['ldl_antes']
group2 = df[df['sexo'] == 'M']['ldl_depois']

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Running related test:
t_stat, p_bilateral = ttest_rel(group2, group1)
p_side = p_bilateral / 2 if t_stat < 0 else 1 - p_bilateral / 2

alpha = 0.05

if p_side < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_side}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 2.00%.
Outlier ratio for Group 2: 0.00%.
Statistic: -6.240867055726904, p-value: 5.423082253275937e-09
Reject the null hypothesis.


In [23]:
# Ex. 4

group1 = df[df['idade'] > 60]['hdl_antes']

# Checking normality:
print(normality_result("Group 1", group1))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))

# Running related test:
t_stat, p_bilateral = ttest_1samp(group1, popmean=50)

alpha = 0.05

if p_bilateral < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_bilateral}")
print(result)

Group 1 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Statistic: -0.70042587612945, p-value: 0.4862401375305343
Fail to reject the null hypothesis.


In [24]:
# Ex. 5

group1 = df[df['peso'] > 90]['hdl_antes']
group2 = df[df['peso'] > 90]['hdl_depois']

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Running related test:
t_stat, p_bilateral = ttest_rel(group2, group1)
p_side = p_bilateral / 2 if t_stat < 0 else 1 - p_bilateral / 2

alpha = 0.05

if p_side < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_side}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 0.00%.
Statistic: 2.813849772652335, p-value: 0.9898753281477018
Fail to reject the null hypothesis.


In [25]:
# Ex. 6

group1 = df[(df['sexo'] == 'M') & (df['idade'] < 40)]['hdl_antes']
group2 = df[(df['sexo'] == 'M') & (df['idade'] < 40)]['hdl_depois']

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Running related test:
t_stat, p_bilateral = ttest_rel(group2, group1)
p_side = p_bilateral / 2 if t_stat > 0 else 1 - p_bilateral / 2

alpha = 0.05

if p_side < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_side}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 0.00%.
Statistic: 4.9187168237899925, p-value: 1.7318079760548178e-05
Reject the null hypothesis.


In [26]:
# Ex. 7

group1 = df[df['idade'] > 65]['ldl_depois']

# Checking normality:
print(normality_result("Group 1", group1))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))

# Running related test:
t_stat, p_bilateral = ttest_1samp(group1, popmean=135)

alpha = 0.05

if p_bilateral < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_bilateral}")
print(result)

Group 1 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Statistic: -0.14978461698161274, p-value: 0.8815030982862501
Fail to reject the null hypothesis.


In [27]:
# Ex. 8

group1 = df[(df['sexo'] == 'F') & (df['peso'] < 60)]['hdl_antes']
group2 = df[(df['sexo'] == 'F') & (df['peso'] < 60)]['hdl_depois']

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Running related test:
t_stat, p_bilateral = ttest_rel(group2, group1)
p_side = p_bilateral / 2 if t_stat < 0 else 1 - p_bilateral / 2

alpha = 0.05

if p_side < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_side}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 0.00%.
Statistic: 1.619963410461149, p-value: 0.93954930861297
Fail to reject the null hypothesis.


In [28]:
# Ex. 9

group1 = df[df['idade'] < 40]['ldl_depois']
group2 = df[df['idade'] >= 40]['ldl_depois']

# Checking normality:
print(normality_result("Group 1", group1))
print(normality_result("Group 2", group2))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))
print(outlier_ratio_check(group2, "Group 2"))

# Checking variance:
print(variance_result(group1, group2))

# Running related test:
t_stat, p_bilateral = ttest_ind(group2, group1)

alpha = 0.05

if p_bilateral < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_bilateral}")
print(result)

Group 1 does have a normal distribution.
Group 2 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Outlier ratio for Group 2: 0.71%.
The groups are homoscedastic.
Statistic: -0.8399650303100675, p-value: 0.401941090504198
Fail to reject the null hypothesis.


In [29]:
# Ex. 10

group1 = df[df['altura'] > 1.80]['hdl_antes']

# Checking normality:
print(normality_result("Group 1", group1))

# Checking outliers
print(outlier_ratio_check(group1, "Group 1"))

# Running related test:
t_stat, p_bilateral = ttest_1samp(group1, popmean=50)

alpha = 0.05

if p_bilateral < alpha:
    result = "Reject the null hypothesis."
else:
    result = "Fail to reject the null hypothesis."

print(f"Statistic: {t_stat}, p-value: {p_bilateral}")
print(result)

Group 1 does have a normal distribution.
Outlier ratio for Group 1: 0.00%.
Statistic: -0.17895922555799065, p-value: 0.8596850575521344
Fail to reject the null hypothesis.
