# Classification report comparison with final tests

In [7]:
import json
import pandas as pd
import numpy as np

In [8]:
def load_report(path):
    with open(path, 'r') as f:
        return json.load(f)

In [9]:
def compare_classification_reports(report1, report2):
    classes = set(report1.keys()).intersection(report2.keys())
    metrics = ['precision', 'recall', 'f1-score']
    
    diffs = {
        cls: {
            metric: report2[cls][metric] - report1[cls][metric]
            for metric in metrics
        }
        for cls in classes
    }

    df = pd.DataFrame.from_dict(diffs, orient='index')
    df_sorted = df.sort_values(by='f1-score', ascending=False)

    print("\nüìà Class-wise F1-score differences:")
    print(df_sorted)

    print("\nüî• Top 5 improvements:")
    print(df_sorted.head(5))

    print("\nüíÄ Top 5 declines:")
    print(df_sorted.tail(5))

    # Macro averages
    macro_avg1 = np.mean([[v[m] for m in metrics] for v in report1.values()], axis=0)
    macro_avg2 = np.mean([[v[m] for m in metrics] for v in report2.values()], axis=0)
    macro_delta = dict(zip(metrics, (macro_avg2 - macro_avg1)))

    print("\nüìä Macro average changes:")
    for metric in metrics:
        delta = macro_delta[metric]
        status = "‚¨ÜÔ∏è improved" if delta > 0 else "‚¨áÔ∏è worse"
        print(f"{metric.capitalize()}: {delta:.4f} ({status})")

    return df_sorted

In [21]:
# how is modelA with respect to modelB? modelA - modelB (piu positivi e meglio per A)
modelA = "DeeperSEnd"
modelB = "DeeperCNN"
report1 = load_report(f'models/{modelA}/classification_report.json')
report2 = load_report(f'models/{modelB}/classification_report.json')

comparison_df = compare_classification_reports(report1, report2)
comparison_df


üìà Class-wise F1-score differences:
                                            precision    recall  f1-score
Dendrocopos major_Great Spotted Woodpecker   0.339367  0.083083  0.184211
Dryocopus martius_Black Woodpecker           0.272727  0.125000  0.171429
None                                        -0.058929 -0.010612 -0.019400
Pecking                                     -0.019477 -0.666667 -0.037871
Vegetation                                  -0.027451 -0.210824 -0.051553
samples avg                                 -0.133596 -0.133676 -0.140278
Loxia curvirostra_Common Crossbill          -0.081719 -0.521795 -0.141804
weighted avg                                -0.124177 -0.239621 -0.168094
Turdus philomelos_Song Thrush               -0.117925 -0.461236 -0.189282
Fringilla coelebs_Common Chaffinch          -0.144526 -0.367786 -0.230785
micro avg                                   -0.240814 -0.239621 -0.240219
Periparus ater_Coal Tit                     -0.267453 -0.262117 -0.264866

Unnamed: 0,precision,recall,f1-score
Dendrocopos major_Great Spotted Woodpecker,0.339367,0.083083,0.184211
Dryocopus martius_Black Woodpecker,0.272727,0.125,0.171429
,-0.058929,-0.010612,-0.0194
Pecking,-0.019477,-0.666667,-0.037871
Vegetation,-0.027451,-0.210824,-0.051553
samples avg,-0.133596,-0.133676,-0.140278
Loxia curvirostra_Common Crossbill,-0.081719,-0.521795,-0.141804
weighted avg,-0.124177,-0.239621,-0.168094
Turdus philomelos_Song Thrush,-0.117925,-0.461236,-0.189282
Fringilla coelebs_Common Chaffinch,-0.144526,-0.367786,-0.230785


In [25]:
df_report1 = pd.DataFrame.from_dict(report1, orient='index')
print(modelA)
df_report1.sort_index()

DeeperSEnd


Unnamed: 0,precision,recall,f1-score,support
Aeroplane,0.818182,0.9,0.857143,10.0
Bat,0.0,0.0,0.0,0.0
Certhia familiaris_Eurasian Treecreeper,0.402062,0.672414,0.503226,58.0
Dendrocopos major_Great Spotted Woodpecker,0.307692,0.324324,0.315789,37.0
Dryocopus martius_Black Woodpecker,0.0,0.0,0.0,24.0
Erithacus rubecula_European Robin,0.250909,0.758242,0.377049,182.0
Fringilla coelebs_Common Chaffinch,0.525145,0.788316,0.630366,2636.0
Glaucidium passerinum_Eurasian Pygmy-Owl,0.589744,1.0,0.741935,23.0
Insect,0.0,0.0,0.0,0.0
Lophophanes cristatus_Crested Tit,0.689655,0.465116,0.555556,43.0


In [23]:
df_report2 = pd.DataFrame.from_dict(report2, orient='index')
df_report2.sort_index()

Unnamed: 0,precision,recall,f1-score,support
Certhia familiaris_Eurasian Treecreeper,0.3125,0.064103,0.106383,78.0
Dendrocopos major_Great Spotted Woodpecker,0.647059,0.407407,0.5,27.0
Dryocopus martius_Black Woodpecker,0.272727,0.125,0.171429,24.0
Erithacus rubecula_European Robin,0.0,0.0,0.0,194.0
Fringilla coelebs_Common Chaffinch,0.380619,0.42053,0.39958,2718.0
Glaucidium passerinum_Eurasian Pygmy-Owl,0.166667,0.111111,0.133333,27.0
Lophophanes cristatus_Crested Tit,0.0,0.0,0.0,43.0
Loxia curvirostra_Common Crossbill,0.0047,0.061538,0.008734,65.0
Muscicapa striata_Spotted Flycatcher,0.869565,0.125786,0.21978,159.0
,0.2625,0.01083,0.020802,5817.0
