In [1]:
import torch
import pandas as pd
import os.path as osp
import numpy as np
from ast import literal_eval
from PIL import Image

In [2]:
## Prepare the  dataset (merge it with the emotion-histograms.)
datasetname = 'ArtEmis' #ArtEmis, Flickr30K,  VizWiz, COCO
assert datasetname == 'ArtEmis' ## Only work with ArtEmis
if datasetname == 'ArtEmis':
    datafile = f'../Dataset/{datasetname}/{datasetname}_IdC/{datasetname}_IdCII_3ErrType.csv'
    img_dir = f"../Dataset/{datasetname}/{datasetname}_IdC/Images/rawImages"
    df = pd.read_csv(datafile)
    df = df[df.split=='test']
else:
    datafile = f'../Dataset/{datasetname}/{datasetname}_IdCII_3ErrType.csv'
    img_dir = f"../Dataset/{datasetname}/Images/rawImages"
    df = pd.read_csv(datafile)
df.reset_index(drop=True,inplace=True)
print('Number of caption sets in the test set:', len(df))
df.img_files = [osp.join(img_dir,imgfile) for imgfile in df.img_files]

Number of caption sets in the test set: 15884


In [3]:
df['captSet_text'] = df['captSet_text'].apply(literal_eval)
df['refCaptSet'] = df['refCaptSet'].apply(literal_eval)

In [4]:
from bert_score import BERTScorer
scorer = BERTScorer(lang="en", rescale_with_baseline=True)

In [5]:
no_imgs =len(df)
cands = []
refs = []
for _,row in df.iterrows():
    refs_ =row['refCaptSet']
    for cand in row['captSet_text']:
        cands.append(cand)
        refs.append(refs_)
    
scores_P, scores_R, scores_F1 = scorer.score(cands, refs)

In [6]:
scores_P = np.array(scores_P).reshape([-1,len(row['captSet_text'])])
scores_R = np.array(scores_R).reshape([-1,len(row['captSet_text'])])
scores_F1 = np.array(scores_F1).reshape([-1,len(row['captSet_text'])])

In [7]:
import numpy as np
no_errType = 3
cnt_corr_all = 0
cnt_incorr_all = 0
print("Dataset:",datasetname,", Number of caption sets:",len(scores_P))
print("scores_P")
for errType in range(1,no_errType+1):
    cnt_corr = 0
    cnt_incorr = 0
    for sim in scores_P:
        if sim[0] > sim[errType]:
            cnt_corr +=1
            cnt_corr_all +=1
        else:
            cnt_incorr +=1
            cnt_incorr_all +=1
    print(f"Accuracy at errType={errType}:{cnt_corr}/{cnt_corr+cnt_incorr}=",cnt_corr/(cnt_corr+cnt_incorr))

print(f"Accuracy for all types:{cnt_corr_all}/{cnt_corr_all+cnt_incorr_all}=",cnt_corr_all/(cnt_corr_all+cnt_incorr_all))

cnt_corr_all = 0
cnt_incorr_all = 0
print("Dataset:",datasetname,", Number of caption sets:",len(scores_R))
print("scores_R")
for errType in range(1,no_errType+1):
    cnt_corr = 0
    cnt_incorr = 0
    for sim in scores_R:
        if sim[0] > sim[errType]:
            cnt_corr +=1
            cnt_corr_all +=1
        else:
            cnt_incorr +=1
            cnt_incorr_all +=1
    print(f"Accuracy at errType={errType}:{cnt_corr}/{cnt_corr+cnt_incorr}=",cnt_corr/(cnt_corr+cnt_incorr))

print(f"Accuracy for all types:{cnt_corr_all}/{cnt_corr_all+cnt_incorr_all}=",cnt_corr_all/(cnt_corr_all+cnt_incorr_all))

cnt_corr_all = 0
cnt_incorr_all = 0
print("Dataset:",datasetname,", Number of caption sets:",len(scores_F1))
print("scores_F1")
for errType in range(1,no_errType+1):
    cnt_corr = 0
    cnt_incorr = 0
    for sim in scores_F1:
        if sim[0] > sim[errType]:
            cnt_corr +=1
            cnt_corr_all +=1
        else:
            cnt_incorr +=1
            cnt_incorr_all +=1
    print(f"Accuracy at errType={errType}:{cnt_corr}/{cnt_corr+cnt_incorr}=",cnt_corr/(cnt_corr+cnt_incorr))

print(f"Accuracy for all types:{cnt_corr_all}/{cnt_corr_all+cnt_incorr_all}=",cnt_corr_all/(cnt_corr_all+cnt_incorr_all))


Dataset: ArtEmis ,Number of images: 15884
scores_P
Accuracy at errType=1:12666/15884= 0.7974061949131201
Accuracy at errType=2:10413/15884= 0.6555653487786451
Accuracy at errType=3:8851/15884= 0.557227398640141
Accuracy for all types:31930/47652= 0.6700663141106354
Dataset: ArtEmis ,Number of images: 15884
scores_R
Accuracy at errType=1:11137/15884= 0.7011458071014858
Accuracy at errType=2:11556/15884= 0.7275245530093175
Accuracy at errType=3:12644/15884= 0.7960211533618736
Accuracy for all types:35337/47652= 0.7415638378242256
Dataset: ArtEmis ,Number of images: 15884
scores_F1
Accuracy at errType=1:12348/15884= 0.7773860488541929
Accuracy at errType=2:11184/15884= 0.7041047595064216
Accuracy at errType=3:10509/15884= 0.6616091664568119
Accuracy for all types:34041/47652= 0.7143666582724755
