In [None]:
import sys

sys.path.append("..")
from scoring_pipeline import ScoringPipeline
import json
import os
import torch
import pandas as pd
from tqdm import tqdm

In [None]:
def compute_scores(results_dir, task):
    scorer = ScoringPipeline()

    gather = []
    for results_file in tqdm(os.listdir(results_dir)):
        results_path = os.path.join(results_dir, results_file)

        with open(results_path, "r") as f:
            results = json.load(f)

            # post-processing llava output
            answers = results["answers"]
            for ans in answers:
                ans["answer"] = ans["answer"].split("ASSISTANT: ")[-1]

            if task == "vqav2":
                ann_root = "./data/vqav2/annotations"
                q_root = "./data/vqav2/questions"

                # results["answers"] = answers
                results["annotations"] = os.path.join(ann_root, "v2_mscoco_val2014_annotations.json")
                results["questions"] = os.path.join(q_root, "v2_OpenEnded_mscoco_val2014_questions.json")

                score = scorer.compute_scores(results, task)
                # print(score)

                record = dict(
                    vision_bits=results["vision_bits"],
                    language_bits=results["language_bits"],
                )

                record.update(score)

                print(record)
                break

            elif task == "gqa":
                score = scorer.compute_scores(answers, task)["acc"]

                record = dict(
                    vision_bits=results["vision_bits"],
                    language_bits=results["language_bits"],
                    acc=score,
                )

            gather.append(record)

    return pd.DataFrame(gather)

In [None]:
results_path = "./llava/gptq/gqa" + "/results_v4_l4.json"


with open(results_path, "r") as f:
    results = json.load(f)

    # post-processing llava output
    answers = results["answers"]
    for ans in answers:
        ans["answer"] = ans["answer"].split("ASSISTANT: ")[-1]

answers

In [None]:
# TODO:
results_dir = "./llava/awq/gqa"
df_gqa_awq = compute_scores(results_dir, "gqa")

In [None]:
df_gqa_awq

In [None]:
df_gqa_awq.to_csv("./final_results/llava/llava_awq_gqa.csv", index=None)

In [None]:
# TODO:
results_dir = "./llava/awq/vqav2"
df_vqav2_awq = compute_scores(results_dir, "vqav2")

In [None]:
df_vqav2_awq.head(5)

In [None]:
df_vqav2_awq.to_csv("./final_results/llava/awq_vqav2.csv", index=None)

In [None]:
# TODO:
results_dir = "./llava/gptq/vqav2"
df_vqav2_gptq = compute_scores(results_dir, "vqav2")

In [None]:
df_vqav2_gptq.to_csv("./final_results/llava/gptq_vqav2.csv", index=None)

In [None]:
# compute scores across

results_dir = "./llava/gptq/gqa"
scorer = ScoringPipeline()

gather = []
for results_file in os.listdir(results_dir):
    results_path = os.path.join(results_dir, results_file)

    with open(results_path, "r") as f:
        results = json.load(f)

        # post-processing llava output
        answers = results["answers"]
        for ans in answers:
            ans["answer"] = ans["answer"].split("ASSISTANT: ")[-1]

        # print(scorer.compute_scores(answers, "gqa"))
        score = scorer.compute_scores(answers, "gqa")["acc"]

        record = dict(
            vision_bits=results["vision_bits"],
            language_bits=results["language_bits"],
            acc=score,
        )

        gather.append(record)

In [None]:
gather

In [None]:
df_gqa = pd.DataFrame(gather)

In [None]:
df_gqa

In [None]:
df_gqa.to_csv("./final_results/llava/gptq_gqa.csv", index=None)

In [None]:
results_dir = "./llava/gptq/gqa"

In [None]:
# gqa ans
ans_path = "./llava/full_precision/gqa_test_do_pad/results_v8_l4.json"

with open(ans_path, "r") as f:
    results = json.load(f)

In [None]:
results.keys()

In [None]:
for res in results:
    res["answer"] = res["answer"].split("ASSISTANT: ")[-1]


def compute_gqa_results(results, scorer, save_path=None):
    gqa_results = scorer.compute_scores(results, "gqa")
    print(gqa_results)


# if save_path:
#     with open(save_path, "w") as f:
#         json.dump(gqa_results, f)

In [None]:
def compute_gqa_results(results, scorer, save_path=None):
    gqa_results = scorer.compute_scores(results, "gqa")
    print(gqa_results)
    # if save_path:
    #     with open(save_path, "w") as f:
    #         json.dump(gqa_results, f)

In [None]:
# vqav2 ans
ans_path = "./llava/full_precision/vqav2_test_do_pad/results_v8_l4.json"

with open(ans_path, "r") as f:
    results = json.load(f)

In [None]:
results.keys()

In [None]:
answers = results["answers"]
for ans in answers:
    ans["answer"] = ans["answer"].split("ASSISTANT: ")[-1]

In [None]:
results["answers"][0]

In [None]:
ann_root = "./data/vqav2/annotations"
q_root = "./data/vqav2/questions"

# results["answers"] = answers
results["annotations"] = os.path.join(ann_root, "v2_mscoco_val2014_annotations.json")
results["questions"] = os.path.join(q_root, "v2_OpenEnded_mscoco_val2014_questions.json")

In [None]:
results.keys()

In [None]:
def compute_vqa_results(results, scorer, save_path=None):
    vqa_results = scorer.compute_scores(results, "vqav2")
    print(vqa_results)
    if save_path:
        with open(save_path, "w") as f:
            json.dump(vqa_results, f)


scorer = ScoringPipeline()
compute_vqa_results(results, scorer)

In [None]:
len(results)