In [1]:
import numpy as np
import pandas as pd
import json
from evaluator import EvalAIAnswerProcessor, TextVQAAccuracyEvaluator
import pickle

In [2]:
eval = TextVQAAccuracyEvaluator() # accuracy evaluator

In [4]:
val_data = pd.read_json("../Data/textvqa/TextVQA_0.5.1_val.json")
val_gt = val_data['data'].apply(pd.Series) # extract the data portion
reference = val_gt[["question_id", "question", "answers", "question_tokens"]] # extract question id, question text, answers, question_tokens

In [5]:
# model zoo
models = {}
models["multimodal-baselines"] = ["hotpot-lorra", "hotpot-without-mmt", "hotpot-without-object-label", "hotpot"]
models["unimodal-baselines"] = ["object", "ocr", "question"]
models["competitive-baselines"] = ["lorra", "m4c", "tap"]

In [6]:
# load in prediction results from models
pred = {}
paths = {}
for k,v in models.items():
    pred[k] = {i:pd.read_json("./pred/"+k+"/textvqa-val-"+i+".json") for i in v}
    

In [7]:
# sanity check to ensure question ids are the same
# compute accuracy of each model on each question 
question_list = set(pred["multimodal-baselines"]["hotpot"]["question_id"])
for k,v in pred.items():
    for i in v.keys():
        # if the list of questions don't match
        if set(pred[k][i]["question_id"]) != question_list: 
            print("issue")
        pred[k][i] = pred[k][i].merge(reference, on='question_id', how='inner')
        pred[k][i].rename(columns={"answer":"prediction"}, inplace=True)
        pred[k][i]["accu_"+i] = pred[k][i].apply(lambda row: eval.eval_pred_list([{"pred_answer":row.prediction, "gt_answers":row.answers}]), axis=1)

In [8]:
grade_book = reference.copy() # save the accuracy of all models on all questions
for k,v in pred.items():
    for i in v.keys():
        print(k, i, np.mean(pred[k][i]["accu_"+i]))
        grade_book = grade_book.merge(right=pred[k][i][["question_id", "accu_"+i]], on="question_id", how="inner")


multimodal-baselines hotpot-lorra 0.291819999999999
multimodal-baselines hotpot-without-mmt 0.4137999999999987
multimodal-baselines hotpot-without-object-label 0.45415999999999984
multimodal-baselines hotpot 0.45321999999999935
unimodal-baselines object 0.1271999999999999
unimodal-baselines ocr 0.25255999999999895
unimodal-baselines question 0.18999999999999942
competitive-baselines lorra 0.27573999999999876
competitive-baselines m4c 0.39233999999999836
competitive-baselines tap 0.49846


In [9]:
with open("model_preds.pickle", "wb") as f:
    pickle.dump(pred, f)
grade_book.to_csv("grade_book.csv", index=False)

In [10]:
subsets = ["obj", "pos"]
for s in subsets:
    f = open("./subsets/val_"+s+"_question.txt", "r")
    data = f.read().split(",")  
    df = grade_book[grade_book["question_id"].isin([int(d) for d in data])]
    df.to_csv("./gradebooks/grade_book_"+s+".csv", index=False)