In [1]:
import pandas as pd
import json
import copy

In [2]:
def from_json_to_df(json_file_path):
    """
    Convert the classification json file into a simple and usable dataframe. 
    We remove the key "species count" because it is useless here.
    Then to extract specific information of classification you can do df['000001.jpg']['track_info'] and then select the image
    """
    with open(json_file_path) as f:
        d = json.load(f)
        del d['species_count'] 
    df = pd.DataFrame.from_dict(d,orient='index').reset_index(level=0).rename(columns={"index": "source_image_cropped"})
    return df

In [3]:
csv_file_path = "../../0_database/tracking/inat_validation_data_with_sources_and_scores-20230204.csv" # contains the ground truth result for each cropped image
json_file_path = '../../0_database/tracking/set2_maxim-kent.json' # contains the classification result for each cropped image
df_gt = pd.read_csv(csv_file_path)
df_classification = from_json_to_df(json_file_path)

# Extract ground truth data

In [4]:
def extract_ground_truth_label(df_gt,df_classification):
    df = df_classification.merge(df_gt[['source_image_cropped','taxon_rank','taxon_name']],how='left',on='source_image_cropped')
    df.rename(columns={'taxon_rank':'taxon_rank_ground_truth',
                       'taxon_name':'taxon_name_ground_truth'}, inplace = True)
    return df

In [5]:
df_with_ground_truth = extract_ground_truth_label(df_gt,df_classification)

In [6]:
df_with_ground_truth

Unnamed: 0,source_image_cropped,region,date,track_id,prediction,track_info,taxon_rank_ground_truth,taxon_name_ground_truth
0,000001.jpg,Quebec,2022_07_31,2.0,"[[Choristoneura parallela, 29], [Argyrotaenia ...","[[20220731233507-00-91.jpg, 2432, 1485, 2566, ...",genus,Pandemis
1,000002.jpg,Quebec,2022_07_31,3.0,"[[Grammia virguncula, 33], [Martyringa latipen...","[[20220731233507-00-91.jpg, 3389, 1064, 3522, ...",genus,Olethreutes
2,000003.jpg,Quebec,2022_07_31,4.0,"[[Idia denticulalis, 31], [Metalectra discalis...","[[20220731233507-00-91.jpg, 600, 287, 751, 449...",genus,Idia
3,000004.jpg,Quebec,2022_07_31,6.0,"[[Agonopterix robiniella, 23], [Crambidia pall...","[[20220731233507-00-91.jpg, 1689, 740, 1780, 8...",tribe,Sparganothini
4,000005.jpg,Quebec,2022_07_31,7.0,"[[Archips cerasivorana, 25], [Rhynchagrotis cu...","[[20220731233507-00-91.jpg, 3482, 2052, 3649, ...",genus,Apotomis
...,...,...,...,...,...,...,...,...
995,000996.jpg,Quebec,2022_08_02,132.0,"[[Dioryctria disclusa, 21], [Acleris albicoman...","[[20220802024729-00-52.jpg, 3737, 1790, 3859, ...",genus,Acrobasis
996,000997.jpg,Quebec,2022_08_02,131.0,"[[Phaeoura quernaria, 38], [Thyris maculata, 1...","[[20220802024801-00-52.jpg, 1896, 978, 2237, 1...",species,Blepharomastix ranalis
997,000998.jpg,Quebec,2022_08_02,134.0,"[[Macaria granitata, 26], [Harrisimemna trisig...","[[20220802024638-00-52.jpg, 1295, 1285, 1543, ...",genus,Macaria
998,000999.jpg,Quebec,2022_08_02,137.0,"[[Eufidonia discospilata, 15], [Epelis truncat...","[[20220802024638-00-52.jpg, 430, 247, 537, 365...",tribe,Sparganothini


# Create df for evaluation and apply smoothing strategies

In [7]:
def smoothing_max_value(df):
    # ex: [['20220731233507-00-91.jpg', 2432, 1485, 2566, 1642, 'Choristoneura parallela', 29], ['20220731232026-00-79.jpg', 1650, 1277, 1768, 1448, 'Archips argyrospila', 39]]
    #display(df)
    max_confidence_score = df["confidence"].max()
    max_confidence_index = df["confidence"].idxmax()
    new_label = df.iloc[max_confidence_index, df.columns.get_loc("label")]
    return max_confidence_score, new_label

def smoothing_most_frequent(df):
    display(df)
    if df.shape[0]>1:
        new_label = df.label.mode()[0]
        max_confidence_score = df.loc[df['label'] == new_label]['confidence']
    else:
        max_confidence_score, new_label = smoothing_max_value(df)
    print(max_confidence_score, new_label)
    return max_confidence_score, new_label

def smoothing_avg_max(df):
    return 1,'test'

def create_df_evaluation(df):
    # test = df_evaluation.loc['000001.jpg']['track_info']
    # smoothing_max_value(test)
    smoothing_labels_max = []
    scores_max = []
    smoothing_labels_most_frequent = []
    scores_most_frequent = []
    smoothing_labels_avg_max = []
    scores_avg_max = []
    for row in df['track_info'][:10]:
        df_row = pd.DataFrame(row, columns = ['img', 'x1', 'x2', 'y1', 'y2', 'label', 'confidence'])
        score_max, label_max = smoothing_max_value(df_row)
        scores_max.append(score_max)
        smoothing_labels_max.append(label_max)
        ##
        score_most_frequent, label_most_frequent = smoothing_most_frequent(df_row)
        scores_most_frequent.append(score_most_frequent)
        smoothing_labels_most_frequent.append(label_most_frequent)
        ##
        score_avg_max, label_avg_max = smoothing_avg_max(df_row)
        scores_avg_max.append(score_avg_max)
        smoothing_labels_avg_max.append(label_avg_max)
    df['smoothing_label_max'] = smoothing_labels_max
    df['score_max'] = scores_max
    df['smoothing_label_most_frequent'] = smoothing_labels_most_frequent
    df['score_most_frequent'] = scores_most_frequent
    df['smoothing_label_avg_max'] = smoothing_labels_avg_max
    df['score_avg_max'] = scores_avg_max
    return df

In [8]:
df_evaluation = create_df_evaluation(df_with_ground_truth)

Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,2432,1485,2566,1642,Choristoneura parallela,29
1,20220731232026-00-79.jpg,1650,1277,1768,1448,Archips argyrospila,39


1    39
Name: confidence, dtype: int64 Archips argyrospila


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,3389,1064,3522,1230,Grammia virguncula,33
1,20220731232026-00-79.jpg,3254,1412,3443,1577,Holomelina laeta,36


0    33
Name: confidence, dtype: int64 Grammia virguncula


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,600,287,751,449,Idia denticulalis,31


31 Idia denticulalis


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,1689,740,1780,880,Agonopterix robiniella,23
1,20220731232026-00-79.jpg,1326,1602,1421,1735,Agonopterix robiniella,92
2,20220731230656-00-67.jpg,49,1874,165,2004,Pyrausta orphisalis,22
3,20220731235027-00-100.jpg,463,2009,581,2160,Aglossa cuprina,71
4,20220731225403-00-59.jpg,629,1534,723,1694,Choristoneura parallela,15
5,20220731234335-00-96.jpg,831,951,926,1113,Diedra cockerellana,21
6,20220731235615-00-102.jpg,194,1577,277,1717,Holomelina laeta,28
7,20220731225108-00-58.jpg,1028,1650,1120,1775,Platynota exasperatana,9
8,20220731231928-00-79.jpg,308,770,401,901,Cydia latiferreana,94
9,20220731235622-00-102.jpg,962,845,1057,952,Eucosma olivaceana,39


0    23
1    92
Name: confidence, dtype: int64 Agonopterix robiniella


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,3482,2052,3649,2147,Archips cerasivorana,25
1,20220731232026-00-79.jpg,3538,1959,3644,2124,Archips cerasivorana,23
2,20220731230656-00-67.jpg,3968,1214,4096,1354,Sthenopis purpurascens,19
3,20220731235027-00-100.jpg,3899,1197,4096,1375,Synchlora aerata,43
4,20220731225403-00-59.jpg,3623,867,3758,1055,Agonopterix robiniella,61
5,20220731234335-00-96.jpg,138,490,294,638,Callosamia promethea,14
6,20220731235615-00-102.jpg,319,416,453,560,Achatodes zeae,34
7,20220731225108-00-58.jpg,626,1543,728,1696,Argyrotaenia quercifoliana,36
8,20220731231928-00-79.jpg,2012,528,2125,684,Archips argyrospila,33
9,20220731235622-00-102.jpg,2558,0,2686,135,Argyrotaenia quercifoliana,48


0    25
1    23
Name: confidence, dtype: int64 Archips cerasivorana


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,3621,264,3719,391,Korscheltellus gracilis,14
1,20220731232026-00-79.jpg,3982,397,4096,520,Triphosa haesitata,29
2,20220731230656-00-67.jpg,3995,110,4096,240,Pandemis canadana,5
3,20220731235027-00-100.jpg,3373,0,3538,130,Aglossa cuprina,34
4,20220731225403-00-59.jpg,3989,10,4096,214,Argyrotaenia quadrifasciana,21
5,20220731234335-00-96.jpg,1240,481,1388,636,Cenopis reticulatana,45
6,20220731235615-00-102.jpg,1850,856,1983,1030,Argyrotaenia quadrifasciana,97
7,20220731225108-00-58.jpg,2039,1411,2152,1521,Utetheisa ornatrix,19
8,20220731231928-00-79.jpg,2497,180,2618,278,Archips packardiana,16


4    21
6    97
Name: confidence, dtype: int64 Argyrotaenia quadrifasciana


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,3335,1394,3470,1488,Archips argyrospila,17
1,20220731232026-00-79.jpg,2831,1408,2928,1541,Tarache aprica,17
2,20220731230656-00-67.jpg,3173,1463,3273,1600,Polygrammate hebraeicum,5
3,20220731235027-00-100.jpg,3553,608,3678,739,Galleria mellonella,23
4,20220731225403-00-59.jpg,2400,1201,2502,1365,Utetheisa ornatrix,5
5,20220731234335-00-96.jpg,3064,1419,3174,1555,Archips argyrospila,33
6,20220731235615-00-102.jpg,3073,173,3183,310,Extrakunia comstocki,20
7,20220731225108-00-58.jpg,2712,1181,2828,1291,Haploa confusa,19
8,20220731231928-00-79.jpg,2829,1408,2929,1541,Callopistria cordata,12
9,20220731235622-00-102.jpg,3552,1077,3636,1197,Thyris sepulchralis,15


0     17
5     33
12    10
Name: confidence, dtype: int64 Archips argyrospila


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,3239,607,3386,708,Cydia latiferreana,17


17 Cydia latiferreana


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,467,2018,543,2132,Endothenia hebesana,16


16 Endothenia hebesana


Unnamed: 0,img,x1,x2,y1,y2,label,confidence
0,20220731233507-00-91.jpg,533,856,651,1001,Epelis truncataria,30
1,20220731232026-00-79.jpg,3,633,110,739,Apogeshna stenialis,37
2,20220731230656-00-67.jpg,586,456,721,574,Argyrotaenia quercifoliana,54
3,20220731235027-00-100.jpg,507,551,614,677,Elophila ekthlipsis,31
4,20220731225403-00-59.jpg,1685,164,1776,292,Polygrammate hebraeicum,57
5,20220731234335-00-96.jpg,3602,1331,3701,1453,Haploa confusa,43
6,20220731235615-00-102.jpg,3551,1076,3636,1197,Thyris sepulchralis,14
7,20220731225108-00-58.jpg,3814,1212,3900,1336,Haploa confusa,26
8,20220731231928-00-79.jpg,3092,1160,3164,1295,Phobetron pithecium,9
9,20220731235622-00-102.jpg,3486,1314,3555,1437,Psychomorpha epimenis,44


4     57
19    34
24    45
Name: confidence, dtype: int64 Polygrammate hebraeicum


ValueError: Length of values (10) does not match length of index (1000)

def create