## Statistics

In [None]:
# Plot Uncertainty Distribution
plt.figure(figsize=(8, 6))
plt.hist(uncertainty, bins=50, alpha=0.7, color='b', edgecolor='black')
plt.xlabel('Uncertainty (Standard Deviation of Predictions)')
plt.ylabel('Frequency')
plt.title('Uncertainty distribution for the MobileNetV2 – C model')
plt.grid(True)
plt.show()

In [None]:
uncertainty.shape

hist_values, bin_edges = np.histogram(uncertainty, bins=50, range=(0,0.004))
print("Histogram Bins:", bin_edges)
print("Histogram Counts:", hist_values)

In [None]:
uncertainties = []
with open("../models/models_logs/uncertainties.csv", 'r') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        int_row = [int(value) for value in row]
        uncertainties.append(int_row)

plt.rc('xtick', labelsize=10) 

model_names = ['MobileNetV2 – C', 'MobileNetV2 – NC', 'Xception - C', 'Xception - NC', 'InceptionResNetV2 – C', 'InceptionResNetV2 – NC']

plt.figure(figsize=(14, 12))
for i in range(6):
    plt.subplot(3,2,i+1)
    plt.bar(bin_edges[:-1], uncertainties[i], width=np.diff(bin_edges), align="edge", edgecolor="black")
    plt.xlabel("Uncertainty (Standard Deviation of Predictions)")
    plt.ylabel("Frequency")
    plt.title(model_names[i])
    plt.grid(True)

plt.subplots_adjust(hspace=0.4)
plt.savefig('myimage.svg', format='svg', dpi=600)
plt.show()


In [None]:
from scipy.stats import shapiro

m_accs = []
with open("../models/models_logs/model_accuracies.csv", 'r') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        float_row = [float(value) for value in row]
        m_accs.append(float_row)

# Perform Shapiro-Wilk normality test for each model
for i, model in enumerate(m_accs, start=1):
    stat, p = shapiro(model)
    print(f"Model_{i}: Shapiro-Wilk Test Statistic={stat:.4f}, p-value={p:.4f}")
    if p > 0.05:
        print("   Data appears to be normally distributed.\n")
    else:
        print("   Data does NOT appear to be normally distributed.\n")

In [None]:
from scipy.stats import levene

stat, p = levene(m_accs[0], m_accs[1], m_accs[2], m_accs[3], m_accs[4], m_accs[5])
print(f"Levene’s Test Statistic={stat:.4f}, p-value={p:.4f}")

if p > 0.05:
    print("Variances are equal (homogeneity assumption met).")
else:
    print("Variances are NOT equal (homogeneity assumption violated).")

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

model_names = ['MobileNetV2 – C', 'MobileNetV2 – NC', 'Xception - C', 'Xception - NC', 'InceptionResNetV2 – C', 'InceptionResNetV2 – NC']
# Combine results into a DataFrame
data = pd.DataFrame({
    'Accuracy': m_accs[0] + m_accs[1] + m_accs[2] + m_accs[3] + m_accs[4] + m_accs[5],
    'Model': ['Model_1']*5 + ['Model_2']*5 + ['Model_3']*5 + ['Model_4']*5 + ['Model_5']*5 + ['Model_6']*5
})

# Perform one-way ANOVA
model = ols('Accuracy ~ C(Model)', data=data).fit()
anova_results = sm.stats.anova_lm(model, typ=2)

# Display ANOVA table
print(anova_results)

# If ANOVA is significant, perform Tukey's HSD test
from statsmodels.stats.multicomp import pairwise_tukeyhsd

print("-------------")

tukey = pairwise_tukeyhsd(data['Accuracy'], data['Model'], alpha=0.05)
print(tukey)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Combine results into a DataFrame
data = np.concatenate(m_accs)
groups = (['Model_1']*5 + ['Model_2']*5 + ['Model_3']*5 + ['Model_4']*5 + ['Model_5']*5 + ['Model_6']*5)

# Perform Tukey HSD test
tukey = pairwise_tukeyhsd(data, groups, alpha=0.05)

# Convert results to DataFrame
results_df = pd.DataFrame(data=tukey.summary().data[1:], columns=tukey.summary().data[0])
results_df = results_df.rename(columns={"group1": "Model_1", "group2": "Model_2",
                                        "meandiff": "Mean Difference", "p-adj": "p-value",
                                        "lower": "Lower CI", "upper": "Upper CI", "reject": "Significant"})

# Plot Tukey HSD results
plt.figure(figsize=(8, 5))
sns.set(style="whitegrid")

# Extract data for plotting
x_diff = results_df["Mean Difference"]
x_err = [results_df["Mean Difference"] - results_df["Lower CI"], results_df["Upper CI"] - results_df["Mean Difference"]]
y_labels = [f"{a} vs {b}" for a, b in zip(results_df["Model_1"], results_df["Model_2"])]

# Create error bar plot
plt.errorbar(x_diff, y_labels, xerr=x_err, fmt="o", color="blue", capsize=5)

# Add vertical line at zero for reference
plt.axvline(0, linestyle="--", color="red", linewidth=1)

# Customize labels
plt.xlabel("Mean Difference")
plt.ylabel("Model Comparisons")
plt.title("Tukey HSD Test: Pairwise Model Comparisons")
plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Combine results into a DataFrame
data = np.concatenate(m_accs)
groups = (['Model_1']*5 + ['Model_2']*5 + ['Model_3']*5 + ['Model_4']*5 + ['Model_5']*5 + ['Model_6']*5)

# Perform Tukey HSD test
tukey = pairwise_tukeyhsd(data, groups, alpha=0.05)

# Convert results to DataFrame
results_df = pd.DataFrame(data=tukey.summary().data[1:], columns=tukey.summary().data[0])
results_df = results_df.rename(columns={"group1": "Model_1", "group2": "Model_2", "p-adj": "p-value"})

# Extract unique model names
model_names = sorted(set(groups))

# Create a matrix for storing p-values
p_value_matrix = pd.DataFrame(np.ones((len(model_names), len(model_names))), index=model_names, columns=model_names)

# Fill the matrix with p-values from Tukey's test
for _, row in results_df.iterrows():
    p_value_matrix.loc[row["Model_1"], row["Model_2"]] = row["p-value"]
    p_value_matrix.loc[row["Model_2"], row["Model_1"]] = row["p-value"]

# Set diagonal to NaN since models aren't compared with themselves
np.fill_diagonal(p_value_matrix.values, np.nan)

# Mask upper triangle to avoid repetition
mask = np.triu(np.ones_like(p_value_matrix, dtype=bool))

# Plot heatmap
plt.figure(figsize=(6, 5))
sns.heatmap(p_value_matrix, mask=mask, annot=True, cmap="coolwarm", vmin=0, vmax=1, linewidths=0.5, fmt=".3f")
plt.title("Tukey HSD Test - P-value Heatmap (Lower Triangle)")
plt.show()