## McNemar's Test

In [None]:
import numpy as np
from scipy.stats import chi2

In [None]:
# Confusion matrix data for each model
base_data = {'galaxy_correct': 3864, 'galaxy_as_qso': 30, 'galaxy_as_star': 24,
             'qso_correct': 715, 'qso_as_galaxy': 27, 'qso_as_star': 24,
             'star_correct': 2710, 'star_as_galaxy': 23, 'star_as_qso': 83}

vgan_data = {'galaxy_correct': 3783, 'galaxy_as_qso': 25, 'galaxy_as_star': 44,
             'qso_correct': 2527, 'qso_as_galaxy': 54, 'qso_as_star': 128,
             'star_correct': 2612, 'star_as_galaxy': 25, 'star_as_qso': 177}

cgan_data = {'galaxy_correct': 3756, 'galaxy_as_qso': 7, 'galaxy_as_star': 89,
             'qso_correct': 2235, 'qso_as_galaxy': 45, 'qso_as_star': 429,
             'star_correct': 2702, 'star_as_galaxy': 20, 'star_as_qso': 92}

smote_data = {'galaxy_correct': 5135, 'galaxy_as_qso': 41, 'galaxy_as_star': 29,
              'qso_correct': 829, 'qso_as_galaxy': 48, 'qso_as_star': 114,
              'star_correct': 3732, 'star_as_galaxy': 14, 'star_as_qso': 58}

def calculate_b_c_corrected(model1, model2):
    # b: model1 wrong, model2 right
    # c: model1 right, model2 wrong

    # For Galaxy
    b_galaxy = max(0, model1['galaxy_as_qso'] - model2['galaxy_as_qso']) + \
               max(0, model1['galaxy_as_star'] - model2['galaxy_as_star'])
    c_galaxy = max(0, model2['galaxy_as_qso'] - model1['galaxy_as_qso']) + \
               max(0, model2['galaxy_as_star'] - model1['galaxy_as_star'])

    # For QSO
    b_qso = max(0, model1['qso_as_galaxy'] - model2['qso_as_galaxy']) + \
            max(0, model1['qso_as_star'] - model2['qso_as_star'])
    c_qso = max(0, model2['qso_as_galaxy'] - model1['qso_as_galaxy']) + \
            max(0, model2['qso_as_star'] - model1['qso_as_star'])

    # For Star
    b_star = max(0, model1['star_as_galaxy'] - model2['star_as_galaxy']) + \
             max(0, model1['star_as_qso'] - model2['star_as_qso'])
    c_star = max(0, model2['star_as_galaxy'] - model1['star_as_galaxy']) + \
             max(0, model2['star_as_qso'] - model1['star_as_qso'])

    b = b_galaxy + b_qso + b_star
    c = c_galaxy + c_qso + c_star

    return b, c

# Re-calculate b and c for each comparison
b_vgan_base, c_vgan_base = calculate_b_c_corrected(base_data, vgan_data)
b_cgan_base, c_cgan_base = calculate_b_c_corrected(base_data, cgan_data)
b_smote_base, c_smote_base = calculate_b_c_corrected(base_data, smote_data)

b_vgan_cgan, c_vgan_cgan = calculate_b_c_corrected(vgan_data, cgan_data)
b_vgan_smote, c_vgan_smote = calculate_b_c_corrected(vgan_data, smote_data)
b_cgan_smote, c_cgan_smote = calculate_b_c_corrected(cgan_data, smote_data)

# Print the calculated b and c values for each model comparison
print("vGAN-CNN vs. Base Model: b =", b_vgan_base, ", c =", c_vgan_base)
print("cGAN-CNN vs. Base Model: b =", b_cgan_base, ", c =", c_cgan_base)
print("SMOTE vs. Base Model: b =", b_smote_base, ", c =", c_smote_base)
print("\nvGAN-CNN vs. cGAN-CNN: b =", b_vgan_cgan, ", c =", c_vgan_cgan)
print("vGAN-CNN vs. SMOTE: b =", b_vgan_smote, ", c =", c_vgan_smote)
print("cGAN-CNN vs. SMOTE: b =", b_cgan_smote, ", c =", c_cgan_smote)


In [None]:
def mcnemars_test(b, c):
    chi_squared = (np.abs(b - c) - 1) ** 2 / (b + c) if (b + c) != 0 else 0
    df = 1
    p_value = chi2.sf(chi_squared, df)
    return chi_squared, p_value

# Define the significance level
alpha = 0.05

# Updated values of interest for each model compared to the base model
b_vGAN_Base = 242  # For vGAN-CNN in comparison to Base
c_vGAN_Base = 242  # For vGAN-CNN in comparison to Base

b_cGAN_Base = 471  # For cGAN-CNN in comparison to Base
c_cGAN_Base = 471  # For cGAN-CNN in comparison to Base

b_Smote_Base = 93   # For Smote in comparison to Base
c_Smote_Base = 93   # For Smote in comparison to Base


# Perform McNemar's test for each model compared to the base model
chi_squared_vGAN_Base, p_value_vGAN_Base = mcnemars_test(b_vGAN_Base, c_vGAN_Base)
chi_squared_cGAN_Base, p_value_cGAN_Base = mcnemars_test(b_cGAN_Base, c_cGAN_Base)
chi_squared_Smote_Base, p_value_Smote_Base = mcnemars_test(b_Smote_Base, c_Smote_Base)

# Print the results for comparisons with the base model
print("Comparisons with Base Model:")
print("vGAN-CNN vs. Base:")
print("Chi-squared:", chi_squared_vGAN_Base)
print("P-value:", p_value_vGAN_Base)
if p_value_vGAN_Base < alpha:
    print("Statistically significant difference between vGAN-CNN and Base.")
else:
    print("No statistically significant difference between vGAN-CNN and Base.")

print("\ncGAN-CNN vs. Base:")
print("Chi-squared:", chi_squared_cGAN_Base)
print("P-value:", p_value_cGAN_Base)
if p_value_cGAN_Base < alpha:
    print("Statistically significant difference between cGAN-CNN and Base.")
else:
    print("No statistically significant difference between cGAN-CNN and Base.")

print("\nSmote CNN vs. Base:")
print("Chi-squared:", chi_squared_Smote_Base)
print("P-value:", p_value_Smote_Base)
if p_value_Smote_Base < alpha:
    print("Statistically significant difference between Smote CNN and Base.")
else:
    print("No statistically significant difference between Smote CNN and Base.")

# Updated values of interest for each pair of models for comparisons among models
b_vGAN_cGAN = 229  # For vGAN-CNN in comparison to cGAN-CNN
c_vGAN_cGAN = 229  # For vGAN-CNN in comparison to cGAN-CNN

b_cGAN_Smote = 378  # For cGAN-CNN in comparison to Smote
c_cGAN_Smote = 378  # For cGAN-CNN in comparison to Smote

b_Smote_vGAN = 149  # For Smote in comparison to vGAN-CNN
c_Smote_vGAN = 149  # For Smote in comparison to vGAN-CNN

# Perform McNemar's test for each pair of models
chi_squared_vGAN_cGAN, p_value_vGAN_cGAN = mcnemars_test(b_vGAN_cGAN, c_vGAN_cGAN)
chi_squared_cGAN_Smote, p_value_cGAN_Smote = mcnemars_test(b_cGAN_Smote, c_cGAN_Smote)
chi_squared_Smote_vGAN, p_value_Smote_vGAN = mcnemars_test(b_Smote_vGAN, c_Smote_vGAN)

# Print the results for comparisons among models
print("\nComparisons among Models:")
print("vGAN-CNN vs. cGAN-CNN:")
print("Chi-squared:", chi_squared_vGAN_cGAN)
print("P-value:", p_value_vGAN_cGAN)
if p_value_vGAN_cGAN < alpha:
    print("Statistically significant difference between vGAN-CNN and cGAN-CNN.")
else:
    print("No statistically significant difference between vGAN-CNN and cGAN-CNN.")

print("\ncGAN-CNN vs. Smote CNN:")
print("Chi-squared:", chi_squared_cGAN_Smote)
print("P-value:", p_value_cGAN_Smote)
if p_value_cGAN_Smote < alpha:
    print("Statistically significant difference between cGAN-CNN and Smote CNN.")
else:
    print("No statistically significant difference between cGAN-CNN and Smote CNN.")

print("\nSmote CNN vs. vGAN-CNN:")
print("Chi-squared:", chi_squared_Smote_vGAN)
print("P-value:", p_value_Smote_vGAN)
if p_value_Smote_vGAN < alpha:
    print("Statistically significant difference between Smote CNN and vGAN-CNN.")
else:
    print("No statistically significant difference between Smote CNN and vGAN-CNN.")

In [None]:
import numpy as np
from scipy.stats import chi2

def mcnemars_test(b, c):
    chi_squared = (np.abs(b - c) - 1) ** 2 / (b + c) if (b + c) != 0 else 0
    df = 1
    p_value = chi2.sf(chi_squared, df)
    return chi_squared, p_value

# Define the significance level
alpha = 0.05

# Use the calculated b and c values for each comparison
# Replace these values with the ones you calculated
comparisons = {
    "vGAN-CNN vs. Base Model": (5, 247),
    "cGAN-CNN vs. Base Model": (26, 497),
    "SMOTE vs. Base Model": (34, 127),
    "vGAN-CNN vs. cGAN-CNN": (117, 346),
    "vGAN-CNN vs. SMOTE": (165, 16),
    "cGAN-CNN vs. SMOTE": (415, 37)
}

# Perform McNemar's test and print results for each comparison
for comp_name, (b, c) in comparisons.items():
    chi_squared, p_value = mcnemars_test(b, c)
    print(f"{comp_name}:")
    print("Chi-squared:", chi_squared)
    print("P-value:", p_value)
    if p_value < alpha:
        print("Statistically significant difference.")
    else:
        print("No statistically significant difference.")
    print()  # For better separation between comparisons
