In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import seaborn as sns
from sklearn.model_selection import train_test_split

iris_data = sns.load_dataset('iris')

y = iris_data['species']
x = iris_data.loc[:, 'sepal_length':'petal_width']

#x, y = 
lr = LogisticRegression(max_iter = 1000)
dt = DecisionTreeClassifier(max_depth = 5)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
lr.fit(x_train, y_train)
dt.fit(x_train, y_train)

s1 = lr.score(x_test, y_test)
s2 = dt.score(x_test, y_test)

print('Model A accuracy: %.2f%%' % (s1*100))
print('Model B accuracy: %.2f%%' % (s2*100))

In [None]:
from sklearn.preprocessing import LabelEncoder

# Map the categorical class labels to numeric classes
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


In [None]:
y_encoded

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=37)

In [None]:
k_fold

In [None]:
cv_lr = cross_validate(lr, x, y_encoded, cv = k_fold, scoring = ('accuracy', 'f1_macro'), return_train_score=True)
cv_dt = cross_validate(dt, x, y_encoded, cv = k_fold, scoring = ('accuracy', 'f1_macro'), return_train_score=True)

In [None]:
cv_lr['test_f1_macro']

In [None]:
cv_dt['test_f1_macro']

In [None]:
import pandas as pd

perf = pd.DataFrame({
  'Model': ['LR'] * 10 + ['DT'] * 10,
  'Performance': list(cv_lr['test_f1_macro']) + list(cv_dt['test_f1_macro'])
})

print(perf)

In [None]:
from scipy.stats import wilcoxon

# perform Wilcoxon signed-rank test
wilcoxon_stat, wilcoxon_pval = wilcoxon(perf[perf['Model'] == 'LR']['Performance'], 
                                        perf[perf['Model'] == 'DT']['Performance'], 
                                        alternative = 'two-sided', 
                                        zero_method = 'zsplit', 
                                        correction=False,
                                       mode = 'approx')


In [None]:
wilcoxon_pval

In [None]:
# display test results
print("Wilcoxon Signed-Rank Test:")
print(f"Statistic: {wilcoxon_stat}")
print(f"p-value: {wilcoxon_pval}")

# calculate means
mean_m1 = perf[perf['Model'] == 'M1']['Performance'].mean()
mean_m2 = perf[perf['Model'] == 'M2']['Performance'].mean()

print(f"Mean Performance for M1: {mean_m1}")
print(f"Mean Performance for M2: {mean_m2}")

# plot with statistical results
sns.boxplot(data=perf, x='Model', y='Performance')
sns.stripplot(data=perf, x='Model', y='Performance', color='black', size=5)


In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create your data
sample1 = [1, 2, 3, 4, 5]
sample2 = [2, 4, 6, 8, 10]
sample3 = [3, 6, 9, 12, 15]

# Combine the samples and create corresponding group labels
data = sample1 + sample2 + sample3
groups = ['Sample 1'] * len(sample1) + ['Sample 2'] * len(sample2) + ['Sample 3'] * len(sample3)

# Perform one-way ANOVA to test for overall differences
model = ols('data ~ groups', data={'data': data, 'groups': groups}).fit()
anova_table = sm.stats.anova_lm(model)

# Perform Tukey's test
tukey_results = pairwise_tukeyhsd(data, groups)

# Print the ANOVA table
print("ANOVA results:")
print(anova_table)

# Print the Tukey's test results
print("\nTukey's test results:")
print(tukey_results)


In [None]:
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create your data
sample1 = [1, 2, 3, 4, 5]
sample2 = [2, 4, 6, 8, 10]
sample3 = [3, 6, 9, 12, 15]

# Combine the samples and create corresponding group labels
data = sample1 + sample2 + sample3
groups = ['Sample 1'] * len(sample1) + ['Sample 2'] * len(sample2) + ['Sample 3'] * len(sample3)

# Perform one-way ANOVA to test for overall differences
model = ols('data ~ groups', data={'data': data, 'groups': groups}).fit()
anova_table = sm.stats.anova_lm(model)

# Perform Tukey's test
tukey_results = pairwise_tukeyhsd(data, groups)

# Print the ANOVA table
print("ANOVA results:")
print(anova_table)

# Print the Tukey's test results
print("\nTukey's test results:")
print(tukey_results)

# Create a boxplot
plt.figure(figsize=(8, 6))
plt.boxplot([sample1, sample2, sample3], labels=['Sample 1', 'Sample 2', 'Sample 3'])
plt.xlabel('Groups')
plt.ylabel('Values')
plt.title('Boxplot of Sample Data')
plt.show()
