# Classification Results

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.anova import AnovaRM
import statsmodels.api as sm 
import pingouin as pg
import scikit_posthocs as sp

In [None]:
#ChatGPT provided method for extracting data
from tbparse import SummaryReader

relative_path = os.path.curdir

reader = SummaryReader(relative_path + "/cnnweak_['drsbru']_20250507-164647")
df_drsbru_acc = reader.scalars

reader = SummaryReader(relative_path + "/cnnweak_['drsprg', 'drsbru']_20250507-205216")
df_both_acc = reader.scalars

reader = SummaryReader(relative_path + "/cnnweak_['drsprg']_20250507-155953")
df_drsprg_acc = reader.scalars

In [None]:
df_both_acc.head()

In [None]:
#Process data

df_drsprg_acc['tag'] = df_drsprg_acc['tag'].str.replace('acc', '')
df_drsprg_acc['radiomics'] = df_drsprg_acc['tag'].str.contains(r'RADIOMICS|rad').map({True: 'True', False: 'False'})
df_drsprg_acc['model'] = df_drsprg_acc['tag'].str.replace(r'RADIOMICS|rad', '', regex=True)
df_drsprg_acc['model'] = df_drsprg_acc['model'].str.strip()
df_drsprg_acc['model'] = df_drsprg_acc['model'].str.replace("3D CNN +", "")
df_drsprg_acc['model'] = df_drsprg_acc['model'].str.replace("BASELINE", "2D ResNet18")

In [None]:
fig = plt.figure(figsize=(8, 5))

# Create the barplot with SD error bars
sns.barplot(
    data=df_drsprg_acc,
    x='model',
    y='value',
    hue='radiomics',
    palette='Purples',
    estimator=np.mean,
    err_kws={"color": "0", "linewidth": 1.5},
    width=0.7,
    errorbar="sd", capsize=.1,
    linewidth=1.5, edgecolor="0",
)


#plt.xticks(rotation=45)
plt.ylabel("Accuracy")
plt.xlabel("Classification Models")
plt.title("10-Fold CV Accuracies With drsprg Data")
plt.legend(title="Radiomic Features")
plt.grid()
plt.ylim(0, 0.9)
plt.tight_layout()
plt.savefig('drsprg_accuracy.pdf', bbox_inches='tight')
plt.show()

# DRSBRU

In [None]:

df_drsbru_acc['tag'] = df_drsbru_acc['tag'].str.replace('acc', '')
df_drsbru_acc['radiomics'] = df_drsbru_acc['tag'].str.contains(r'RADIOMICS|rad').map({True: 'True', False: 'False'})
df_drsbru_acc['model'] = df_drsbru_acc['tag'].str.replace(r'RADIOMICS|rad', '', regex=True)
df_drsbru_acc['model'] = df_drsbru_acc['model'].str.strip()
df_drsbru_acc['model'] = df_drsbru_acc['model'].str.replace("3D CNN +", "")
df_drsbru_acc['model'] = df_drsbru_acc['model'].str.replace("BASELINE", "2D ResNet18")

In [None]:
fig = plt.figure(figsize=(8, 5))
# Create the barplot with SD error bars
sns.barplot(
    data=df_drsbru_acc,
    x='model',
    y='value',
    hue='radiomics',
    palette='Greens',
    estimator=np.mean,
    err_kws={"color": "0", "linewidth": 1.5},
    width=0.7,
    errorbar="sd", capsize=.1,
    linewidth=1.5, edgecolor="0",
)


#plt.xticks(rotation=45)
plt.ylabel("Accuracy")
plt.xlabel("Classification Models")
plt.title("10-Fold CV Accuracies With drsbru Data")
plt.legend(title="Radiomic Features")
plt.grid()
plt.ylim(0, 0.9)
plt.tight_layout()
plt.savefig('drsbru_accuracy.pdf', bbox_inches='tight')
plt.show()

# Both

In [None]:
df_both_acc['tag'] = df_both_acc['tag'].str.replace('acc', '')
df_both_acc['radiomics'] = df_both_acc['tag'].str.contains(r'RADIOMICS|rad').map({True: 'True', False: 'False'})
df_both_acc['model'] = df_both_acc['tag'].str.replace(r'RADIOMICS|rad', '', regex=True)
df_both_acc['model'] = df_both_acc['model'].str.strip()
df_both_acc['model'] = df_both_acc['model'].str.replace("3D CNN +", "")
df_both_acc['model'] = df_both_acc['model'].str.replace("BASELINE", "2D ResNet18").str.strip()
df_both_acc

In [None]:
fig = plt.figure(figsize=(8, 5))

# Create the barplot with SD error bars
sns.barplot(
    data=df_both_acc,
    x='model',
    y='value',
    hue='radiomics',
    palette='Blues',
    estimator=np.mean,
    err_kws={"color": "0", "linewidth": 1.5},
    width=0.7,
    errorbar="sd", capsize=.1,
    linewidth=1.5, edgecolor="0",
)


#plt.xticks(rotation=45)
plt.ylabel("Accuracy")
plt.xlabel("Classification Models")
plt.title("10-Fold CV Accuracies With drsbru and drsprg Data")
plt.legend(title="Radiomic Features")
plt.grid()
plt.ylim(0, 0.9)
plt.tight_layout()
plt.savefig('both_accuracy.pdf', bbox_inches='tight')
plt.show()

Violin Plot with both datasets

In [None]:
fig = plt.figure(figsize=(8, 5))
sns.violinplot(
    data=df_both_acc,
    palette='Blues',
    x='model',
    y='value',
    hue='radiomics',
    split=True,
)
plt.title("10-Fold CV Accuracies With drsbru and drsprg Data")
plt.ylabel("Accuracy")
plt.xlabel("Model")
plt.grid()
plt.savefig('violin_combined.pdf', bbox_inches='tight')
plt.show()

# Checking for normally distributed data

In [None]:
df_both_stat = df_both_acc[df_both_acc["radiomics"] == "False"].pivot(index="step", columns="model", values="value")
df_both_stat

In [None]:
for model in df_both_stat.loc[:, df_both_stat.columns != "Step"]:
    result = stats.shapiro(df_both_stat[model])
    print(model)
    print(result)

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()
for i in range(6):
        model = df_both_stat.columns[i]
        stats.probplot(df_both_stat[model], dist="norm", plot=axes[i])
        axes[i].set_title(f"Q-Q Plot: {model}")
plt.tight_layout()
plt.savefig('QQ_plot_class.pdf', bbox_inches='tight')

In [None]:
df_both_long = df_both_acc
rm_anova = pg.rm_anova(df_both_long, dv='value', subject='step', within='model', correction=True).round(3)
rm_anova

In [None]:
bonferroni = pg.pairwise_tests(dv='value', within='model', subject='step', padjust='bonf', data=df_both_long)
bonferroni