In [1]:
import pandas as pd

In [16]:
def compute_precision_recall(df, threshold=0.5):
    # Apply threshold to prediction probabilities to classify as positive (1) or negative (0)
    df['pred_label'] = (df['pred'] >= threshold).astype(int)
    
    # Group by 'antenna' and calculate TP, FP, TN, FN, and total count
    metrics = df.groupby('antenna').apply(lambda x: pd.Series({
        'TP': ((x['label'] == 1) & (x['pred_label'] == 1)).sum(),
        'FP': ((x['label'] == 0) & (x['pred_label'] == 1)).sum(),
        'TN': ((x['label'] == 0) & (x['pred_label'] == 0)).sum(),
        'FN': ((x['label'] == 1) & (x['pred_label'] == 0)).sum(),
        'Total Count': x.shape[0]  # Total number of samples for each antenna
    })).reset_index()
    
    # Calculate Precision and Recall
    metrics['Precision'] = metrics['TP'] / (metrics['TP'] + metrics['FP'])
    metrics['Recall'] = metrics['TP'] / (metrics['TP'] + metrics['FN'])
    
    return metrics[['antenna', 'Precision', 'Recall', 'Total Count']].sort_values(by='Total Count', ascending=False)

In [17]:
df = pd.read_csv('test_images/metadata.csv')

In [18]:
df

Unnamed: 0,antenna,label,pred,datetime,image_path
0,SWISS-Landschlacht_62,1,0.951569,2022-07-05 04:00:00,test_images/1/951_SWISS-Landschlacht_62_2022-0...
1,ALMATY_58,1,0.971232,2022-06-17 02:10:00,test_images/1/971_ALMATY_58_2022-06-17_02_10_0...
2,SWISS-Landschlacht_62,1,0.804718,2022-05-03 07:04:00,test_images/1/804_SWISS-Landschlacht_62_2022-0...
3,ALASKA-HAARP_62,1,0.381185,2023-05-05 03:54:00,test_images/1/381_ALASKA-HAARP_62_2023-05-05_0...
4,ALASKA-COHOE_63,1,0.979505,2023-08-14 23:37:00,test_images/1/979_ALASKA-COHOE_63_2023-08-14_2...
...,...,...,...,...,...
5071,ALMATY_58,0,0.031195,2021-04-23 02:19:00,test_images/0/31_ALMATY_58_2021-04-23_02_19_00...
5072,TRIEST_57,0,0.570124,2023-07-09 13:23:00,test_images/0/570_TRIEST_57_2023-07-09_13_23_0...
5073,ALASKA-HAARP_62,0,0.033012,2022-09-12 04:39:00,test_images/0/33_ALASKA-HAARP_62_2022-09-12_04...
5074,ALASKA-HAARP_62,0,0.026951,2022-08-31 18:07:00,test_images/0/26_ALASKA-HAARP_62_2022-08-31_18...


In [19]:
# Compute precision and recall
precision_recall_table = compute_precision_recall(df)
precision_recall_table

  metrics = df.groupby('antenna').apply(lambda x: pd.Series({


Unnamed: 0,antenna,Precision,Recall,Total Count
11,HUMAIN_59,0.352632,0.943662,418
6,Australia-ASSA_62,0.866029,0.918782,413
1,ALASKA-HAARP_62,0.780347,0.957447,387
10,GLASGOW_01,0.794737,0.92638,343
23,SWISS-Landschlacht_62,0.77551,0.904762,342
0,ALASKA-COHOE_63,0.816901,0.95082,323
4,AUSTRIA-UNIGRAZ_01,0.684615,0.89899,303
13,INDIA-OOTY_02,0.488722,0.902778,276
7,BIR_01,0.627907,0.952941,270
9,GERMANY-DLR_63,0.816514,0.946809,231
