In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# !pip uninstall torchaudio -y
# !pip uninstall torchmetrics -y
# !pip install torchaudio
!pip install torchmetrics==0.11.4


In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
!pip install summ_eval

In [None]:
!pip install wmd

In [None]:
from collections import Counter
import math

class BleuMetric:
    def __init__(self, n=4):
        self.n = n
        self.weights = [1/self.n]*self.n
        self.reference_counters = []
        self.reference_length = 0

    def compute_ngram_counts(self, sentence):
        ngram_counts = []
        for n in range(1, self.n+1):
            ngram_counts.append(Counter(zip(*[sentence[i:] for i in range(n)])))
        return ngram_counts

    def compute_reference_counters(self, references):
        for reference in references:
            reference_counters = self.compute_ngram_counts(reference)
            self.reference_counters.append(reference_counters)
            self.reference_length += len(reference)

    def compute_modified_precision(self, hypothesis, reference_counters):
        hypothesis_counters = self.compute_ngram_counts(hypothesis)
        clipped_counters = []
        for i in range(self.n):
            clipped_counter = hypothesis_counters[i] & reference_counters[i]
            clipped_counters.append(sum(clipped_counter.values()))
        hypothesis_length = len(hypothesis)
        if hypothesis_length == 0:
            return 0
        else:
            modified_precision = [clipped_counters[i] / hypothesis_length for i in range(self.n)]
            return modified_precision

    def compute_brevity_penalty(self, hypothesis_length):
        if hypothesis_length >= self.reference_length:
            return 1
        else:
            brevity_penalty = math.exp(1 - self.reference_length / hypothesis_length)
            return brevity_penalty

    def compute_bleu(self, hypotheses, references):
        if not self.reference_counters:
            self.compute_reference_counters(references)
        total_modified_precision = [0]*self.n
        total_hypothesis_length = 0
        for hypothesis in hypotheses:
            hypothesis_length = len(hypothesis)
            total_hypothesis_length += hypothesis_length
            best_modified_precision = [0]*self.n
            for reference_counters in self.reference_counters:
                modified_precision = self.compute_modified_precision(hypothesis, reference_counters)
                for i in range(self.n):
                    best_modified_precision[i] = max(best_modified_precision[i], modified_precision[i])
            for i in range(self.n):
                total_modified_precision[i] += self.weights[i] * math.log(best_modified_precision[i] + 1e-12)
        brevity_penalty = self.compute_brevity_penalty(total_hypothesis_length)
        bleu_score = brevity_penalty * math.exp(sum(total_modified_precision))
        return bleu_score


In [None]:
#from summ_eval.rouge_metric import RougeMetric
# rouge = RougeMetric()
# from sys import exit
import nltk
nltk.download('stopwords')


from summ_eval.bert_score_metric import BertScoreMetric
bert_score=BertScoreMetric()

from summ_eval.blanc_metric import BlancMetric
blanc_score=BlancMetric()

# from summ_eval.mover_score_metric import  MoverScoreMetric
# mover_score= MoverScoreMetric()

from summ_eval.sentence_movers_metric import SentenceMoversMetric
sentence_mover_score=SentenceMoversMetric()

from summ_eval.summa_qa_metric import  SummaQAMetric
summa_qa_score= SummaQAMetric()

# from summ_eval.supert_metric import SupertMetric
# supert_score =SupertMetric()

from summ_eval.meteor_metric import  MeteorMetric
meteor_score =  MeteorMetric()

# from summ_eval.s3_metric import S3Metric
# s3_metric =S3Metric()

# from summ_eval.syntactic_metric import  SyntacticMetric
# syntactic_score =SyntacticMetric()

from summ_eval.cider_metric import CiderMetric
cider_score = CiderMetric()

from summ_eval.chrfpp_metric import  ChrfppMetric
chrfpp_score= ChrfppMetric()

# from summ_eval.bleu_metric import BleuMetric
# blue_score_4 = BleuMetric() # BLEU 4
# blue_score_4 = BleuMetric(n=4) # BLEU 4
# blue_score_3 = BleuMetric(n=3) # BLEU 3
# blue_score_2 = BleuMetric(n=2) # BLEU 2
# blue_score_1 = BleuMetric(n=1) # BLEU 1

In [None]:
!pip install -U  git+https://github.com/bheinzerling/pyrouge.git

In [None]:
!export ROUGE_HOME={os.path.join(dirname, "ROUGE-1.5.5/")}

In [None]:
!pip install rouge

In [None]:
# !pip install torchmetrics

In [None]:
!pip install nltk

In [None]:
import nltk
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('omw-1.4')
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize

def compute_meteor_score(hypotheses, references):

    meteor_scores = []
    for ref, hyp in zip(references, hypotheses):
        # Convert the sentences to lowercase
        # ref = str(ref).lower()
        # hyp = str(hyp).lower()

        # Tokenize the hypothesis sentences
        hyps_tokenized = word_tokenize(hyp)

        # Tokenize the reference sentences
        refs_tokenized = word_tokenize(ref)

        # Compute the METEOR score
        meteor_score_ = meteor_score([refs_tokenized], hyps_tokenized)

        meteor_scores.append(meteor_score_)

    # Compute the average METEOR score across the batch
    avg_meteor_score = sum(meteor_scores) / len(meteor_scores)

    return avg_meteor_score


In [None]:
!pip install transformers

In [None]:
!pip install -U sentence-transformers

In [None]:
!pip install git+https://github.com/neural-dialogue-metrics/Distinct-N.git


In [None]:
!pip install distinct_n

In [None]:

def Evaluate_R(summaries, references,type:str="rouge"):

  #summaries- A list of summaries
  #references- A list of predicted summaries

  if(type=="rouge"):
    score_dict = rouge.evaluate_batch(summaries, references)

  if(type=="bert_score"):
    score_dict=bert_score.evaluate_batch(summaries,references)

  if(type=="blanc_score"):
    score_dict=blanc_score.evaluate_batch(summaries,references)

  if(type=="mover_score"):
    score_dict=mover_score.evaluate_batch(summaries,references)

  if(type=="sentence_mover_score"):
    score_dict=sentence_mover_score.evaluate_batch(summaries,references)

  if(type=="summa_qa_score"):
    score_dict=summa_qa_score.evaluate_batch(summaries,references)

  if(type=="supert_score"):
    score_dict=supert_score.evaluate_batch(summaries,references)

  if(type=="meteor_score"):
    score_dict=meteor_score.evaluate_batch(summaries,references)

  if(type=="s3_metric"):
    score_dict=s3_metric.evaluate_batch(summaries,references)

  if(type=="syntactic_score"):
    score_dict=syntactic_score.evaluate_batch(summaries,references)

  if(type=="cider_score"):
    score_dict=cider_score.evaluate_batch(summaries,references)

  if(type=="chrfpp_score"):
    score_dict=chrfpp_score.evaluate_batch(summaries,references)

  if(type=="blue_score_4"):
    # score_dict=blue_score_4.evaluate_batch(summaries,references)
    score_dict = BleuMetric(n=4).compute_bleu(summaries, references)

  if(type=="blue_score_3"):
    # score_dict=blue_score_3.evaluate_batch(summaries,references)
    score_dict = BleuMetric(n=3).compute_bleu(summaries, references)

  if(type=="blue_score_2"):
    # score_dict=blue_score_2.evaluate_batch(summaries,references)
    score_dict = BleuMetric(n=2).compute_bleu(summaries, references)

  if(type=="blue_score_1"):
    # score_dict=blue_score_1.evaluate_batch(summaries,references)
    score_dict = BleuMetric(n=1).compute_bleu(summaries, references)
  return score_dict


In [None]:
!pip install -U torchtext==0.6.0

In [None]:
!pip list | grep torchtext

In [None]:
# #########IMP EXCEL TO JSON###############
# import pandas as pd

# # Path to your Excel file
# excel_file = "/content/convert.xlsx"

# # Read the Excel file into a DataFrame
# df = pd.read_excel(excel_file)

# # Convert the DataFrame to a JSON object
# json_data = df.to_json(orient="records")

# # Write the JSON object to a file
# json_file = "/content/convert.json"
# with open(json_file, "w") as file:
#     file.write(json_data)

# print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchtext
from torchtext.vocab import Vectors
# from torchtext.vocab import GloVe
# glove_vectors = Vectors(name="/content/glove.840B.300d.txt")  # Replace with the path to your GloVe vectors
glove_vectors = Vectors(name="/content/drive/MyDrive/Resume/glove.42B.300d.txt")


In [None]:
def tokenize(sentence):
    return sentence.split()

# Embedding Average

In [None]:

# Step 3: Calculate the average word embeddings for each sentence
def sentence_embedding(sentence, word_vectors):
    # words = tokenizer(sentence)
    words = tokenize(sentence)
    # Convert words to word embeddings and calculate the average
    # word_embeddings = [word_vectors.get_vector(word) for word in words if word in word_vectors.stoi]
    word_embeddings = [word_vectors.vectors[word_vectors.stoi[word]] for word in words if word in word_vectors.stoi]

    if word_embeddings:
        return torch.mean(torch.stack(word_embeddings), dim=0)
    else:
        return torch.zeros(word_vectors.vectors.shape[1])  # Return zeros if no valid word embeddings found

# Step 4: Compare sentence-level embeddings to compute the similarity score
def embedding_average_similarity(hypothesis, reference, word_vectors):
    # Calculate sentence embeddings
    hyp_embedding = sentence_embedding(hypothesis, word_vectors)
    ref_embedding = sentence_embedding(reference, word_vectors)

    # Calculate cosine similarity between the sentence embeddings
    similarity_score = F.cosine_similarity(hyp_embedding.unsqueeze(0), ref_embedding.unsqueeze(0))

    return similarity_score.item()

# # Example usage:
# hypothesis_sentence = "This is an example sentence."
# reference_sentence = "This is a sample sentence."

# similarity_score = embedding_average_similarity(hypothesis_sentence, reference_sentence, glove_vectors)
# print(f"Embedding Average Similarity Score: {similarity_score:.4f}")

# Embedding Greedy Matching

In [None]:
# Step 3: Calculate the cosine similarity between tokens in the reference and hypothesis
def greedy_matching_similarity(hypothesis, reference, word_vectors):
    # Tokenize both the hypothesis and reference
    hyp_tokens = tokenize(hypothesis)
    ref_tokens = tokenize(reference)

    # Initialize a matrix to store cosine similarity scores
    similarity_matrix = torch.zeros((len(ref_tokens), len(hyp_tokens)))

    # Calculate cosine similarity for each pair of tokens
    for i, ref_token in enumerate(ref_tokens):
        for j, hyp_token in enumerate(hyp_tokens):
            if ref_token in word_vectors.stoi and hyp_token in word_vectors.stoi:
                ref_embedding = word_vectors.vectors[word_vectors.stoi[ref_token]]
                hyp_embedding = word_vectors.vectors[word_vectors.stoi[hyp_token]]
                similarity = F.cosine_similarity(ref_embedding.unsqueeze(0), hyp_embedding.unsqueeze(0))
                similarity_matrix[i][j] = similarity

    # Forward direction: Match reference tokens to hypothesis tokens
    forward_scores = similarity_matrix.max(dim=1)[0]  # Greedily select the maximum similarity for each reference token
    forward_avg_score = forward_scores.mean()

    # Reverse direction: Match hypothesis tokens to reference tokens
    reverse_scores = similarity_matrix.max(dim=0)[0]  # Greedily select the maximum similarity for each hypothesis token
    reverse_avg_score = reverse_scores.mean()

    # Take the average of forward and reverse scores to ensure symmetry
    aggregate_score = (forward_avg_score + reverse_avg_score) / 2.0

    return aggregate_score.item()

# # Example usage:
# hypothesis_sentence = "This is an example sentence."
# reference_sentence = "This is a sample sentence."

# similarity_score = greedy_matching_similarity(hypothesis_sentence, reference_sentence, glove_vectors)
# print(f"Greedy Matching Similarity Score: {similarity_score:.4f}")

# Embedding Vector Extrema

In [None]:
# Step 3: Calculate the Vector Extrema sentence embedding
def vector_extrema_embedding(sentence, word_vectors):
    words = tokenize(sentence)

    # Initialize extrema embedding with zeros
    extrema_embedding = torch.zeros(word_vectors.vectors.shape[1])

    # Calculate dimension-wise max and min over word embeddings
    for dim in range(word_vectors.vectors.shape[1]):
        dim_word_embeddings = []
        for word in words:
            if word in word_vectors.stoi:
                dim_word_embeddings.append(word_vectors.vectors[word_vectors.stoi[word]][dim])
        if dim_word_embeddings:
            max_value = max(dim_word_embeddings)
            min_value = min(dim_word_embeddings)
            # Use the absolute maximum value between max and min for each dimension
            extrema_embedding[dim] = max(max_value, abs(min_value))

    return extrema_embedding

# Step 4: Calculate cosine similarity between reference and hypothesis sentence embeddings
def vector_extrema_similarity(hypothesis, reference, word_vectors):
    # Calculate sentence embeddings using Vector Extrema
    hyp_embedding = vector_extrema_embedding(hypothesis, word_vectors)
    ref_embedding = vector_extrema_embedding(reference, word_vectors)

    # Calculate cosine similarity between the sentence embeddings
    similarity_score = F.cosine_similarity(hyp_embedding.unsqueeze(0), ref_embedding.unsqueeze(0))

    return similarity_score.item()

# Example usage:
# hypothesis_sentence = "This is an example sentence."
# reference_sentence = "This is a sample sentence."

# similarity_score = vector_extrema_similarity(hypothesis_sentence, reference_sentence, glove_vectors)
# print(f"Vector Extrema Similarity Score: {similarity_score:.4f}")

# New Infernece

In [None]:
path = "/content/BART_ep_30mfccs_clip_inTok421024_3e-5_batchS_8_report.json"

In [None]:
import json
with open(path,'r') as f:
  dic = json.load(f)

In [None]:
print(dic[0].keys())

In [None]:
print(len(dic))

In [None]:
summaries = []
references = []

# for key in dic.keys():
#   if('pred' not in dic[key].keys()):
#     continue
#   summaries.append(dic[key]['pred'])
#   references.append(dic[key]['golden'])

# for key in dic:
#   summaries.append(key['pred'])
#   references.append(key['golden'])
for i in range(len(dic)):
  summaries.append(dic[i].get('Gold_report'))
  references.append(dic[i].get('Generated_report'))

  # if('pred' not in dic[key].keys()):
  #   continue
  # summaries.append(dic[key]['pred'])
  # references.append(dic[key]['golden'])


In [None]:
print(summaries)

In [None]:
print(references)

In [None]:
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

In [None]:
# # Calculate BLEU-1 score
# bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
# bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0.5, 0.5, 0, 0))
# bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0.33, 0.33, 0.33, 0))
# bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0.25, 0.25, 0.25, 0.25))
# # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
# print("\nBLEU_1: ",bleu1)
# print("\nBLEU_2: ",bleu2)
# print("\nBLEU_3: ",bleu3)
# print("\nBLEU_4: ",bleu4)

In [None]:
# !pip install torchmetrics

In [None]:
# import torchmetrics
from torchmetrics.functional import bleu_score
# bleu_score(summaries,references)
print("torchmetrics bleu: ",bleu_score(summaries,references))

In [None]:
# score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
score_lists = ["bert_score","cider_score"]
for value in score_lists:
  print("The result of :", value)
  print(Evaluate_R(summaries, references,value))

In [None]:
import json
from rouge import Rouge


scores_list=[]
total = len(dic)
print(total)
rouge = Rouge()
# Load the generated and reference summaries
for entry in dic:
  # print(entry)
  generated_q= entry["Gold_report"]
  # print(generated_q)
  reference_q = entry["Generated_report"]

 # Compute the ROUGE scores

  # scores = rouge.get_scores(generated_q, reference_q)
  # scores_list.append(scores)
  try:
    scores = rouge.get_scores(generated_q, reference_q)
    scores_list.append(scores)
      # Print the scores
  except ValueError as e:
    print("Error:", e)
    continue
  # Print the scores
  # print(scores)

In [None]:
rouge1_r=0
rouge1_p=0
rouge1_f=0


rouge2_r=0
rouge2_p=0
rouge2_f=0


rougel_r=0
rougel_p=0
rougel_f=0
for item in scores_list:
  rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
  rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
  rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


  rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
  rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
  rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


  rougel_r=item[0]["rouge-l"]["r"] + rougel_r
  rougel_p=item[0]["rouge-l"]["p"] + rougel_p
  rougel_f=item[0]["rouge-l"]["f"] + rougel_f


rouge1_r = rouge1_r/total
rouge1_p = rouge1_p/total
rouge1_f = rouge1_f/total


rouge2_r = rouge2_r/total
rouge2_p = rouge2_p/total
rouge2_f = rouge2_f/total


rougel_r = rougel_r/total
rougel_p = rougel_p/total
rougel_f = rougel_f/total


print("\n Average scores:\n")
print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))

In [None]:
# print("The result of : meteor_score")
# print(compute_meteor_score(summaries, references))

In [None]:
# from distinct_n import distinct_n
from distinct_n.metrics import *


distinct_2_score = distinct_n_corpus_level(summaries, 2)
print("Distinct-2 score:", distinct_2_score)
distinct_1_score = distinct_n_corpus_level(summaries, 1 )
print("Distinct-1 score:", distinct_1_score)


In [None]:
# from distinct_n import distinct_n
from distinct_n.metrics import *


distinct_2_score = distinct_n_corpus_level(references, 2)
print("Distinct-2 score Gold Questions:", distinct_2_score)
distinct_1_score = distinct_n_corpus_level(references, 1 )
print("Distinct-1 score Gold Questions:", distinct_1_score)


In [None]:
# import pandas as pd
# import glob
# import json
# import os

# # result_file = pd.Dataframe(column = [""])
# result_file = []

# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/New_Inference/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# # filename = "t5-basecontracross_ChapTOnly_inTok2600_ep_10_NEWInference.json" #1
# # filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
# file_list = glob.glob(path_add+"*.json")
# # path = path_add+filename
# for path in file_list:
#   with open(path,'r') as f:
#     dic = json.load(f)
#   result_dict = {}
#   name, _ = os.path.splitext(path)
#   name = name.split("/")
#   name = name[-1]
#   print(name)



#   summaries = []
#   references = []

#   # for key in dic.keys():
#   #   if('pred' not in dic[key].keys()):
#   #     continue
#   #   summaries.append(dic[key]['pred'])
#   #   references.append(dic[key]['golden'])

#   # for key in dic:
#   #   summaries.append(key['pred'])
#   #   references.append(key['golden'])
#   for i in range(len(dic)):
#     summaries.append(dic[i].get('Generated_Question'))
#     references.append(dic[i].get('Gold_Question'))

#   # Calculate BLEU-1 score
#   bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
#   # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
#   # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
#   # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
#   # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
#   print("\nBLEU_1: ",bleu1)

#   from torchmetrics.functional import bleu_score
#   tm_bleuscore = bleu_score(summaries,references)
#   print("torchmetrics bleu: ",tm_bleuscore)

#   bert_cider = []
#   # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
#   score_lists = ["bert_score","cider_score"]
#   for value in score_lists:
#     print("The result of :", value)
#     print(Evaluate_R(summaries, references,value))
#     bert_cider.append(Evaluate_R(summaries, references,value))




#   import json
#   from rouge import Rouge


#   scores_list=[]
#   total = len(dic)
#   print(total)
#   rouge = Rouge()
#   # Load the generated and reference summaries
#   for entry in dic:
#     # print(entry)
#     generated_q= entry["Generated_Question"]
#     # print(generated_q)
#     reference_q = entry["Gold_Question"]

#   # Compute the ROUGE scores

#     scores = rouge.get_scores(generated_q, reference_q)
#     scores_list.append(scores)
#     # Print the scores
#     # print(scores)


#   rouge1_r=0
#   rouge1_p=0
#   rouge1_f=0


#   rouge2_r=0
#   rouge2_p=0
#   rouge2_f=0


#   rougel_r=0
#   rougel_p=0
#   rougel_f=0
#   for item in scores_list:
#     rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
#     rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
#     rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


#     rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
#     rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
#     rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


#     rougel_r=item[0]["rouge-l"]["r"] + rougel_r
#     rougel_p=item[0]["rouge-l"]["p"] + rougel_p
#     rougel_f=item[0]["rouge-l"]["f"] + rougel_f


#   rouge1_r = rouge1_r/total
#   rouge1_p = rouge1_p/total
#   rouge1_f = rouge1_f/total


#   rouge2_r = rouge2_r/total
#   rouge2_p = rouge2_p/total
#   rouge2_f = rouge2_f/total


#   rougel_r = rougel_r/total
#   rougel_p = rougel_p/total
#   rougel_f = rougel_f/total


#   print("\n Average scores:\n")
#   print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
#   print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
#   print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



#   print("The result of : meteor_score")
#   meteor = compute_meteor_score(summaries, references)
#   print(meteor)

#   # from distinct_n import distinct_n
#   from distinct_n.metrics import *


#   distinct_2_score = distinct_n_corpus_level(summaries, 2)
#   print("Distinct-2 score:", distinct_2_score)
#   distinct_1_score = distinct_n_corpus_level(summaries, 1 )
#   print("Distinct-1 score:", distinct_1_score)

#   result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
#                 "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
#                  "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
#                  "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
#                 "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
#                 "rougel_f": rougel_f}


#   result_file.append(result_dict)
#   # print(result_dict)
# results = pd.DataFrame(result_file).to_excel(path_add+"results.xlsx", index=False)
# results_tex = pd.DataFrame(result_file).to_latex(path_add+"results.tex", index=False)

In [None]:
# print(result_file)
# #

In [None]:
# pip install -U torchtext

In [None]:
# !pip install torchvision

In [None]:
# !pip install torchaudio

In [None]:
# import pandas as pd
# import glob
# import json
# import os

# # result_file = pd.Dataframe(column = [""])
# result_file = []

# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/New_Inference/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# # path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"

# # filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# # filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
# file_list = glob.glob(path_add+"*.json")
# # file_list = [path_add+filename]
# # path = path_add+filename
# for path in file_list:
#   with open(path,'r') as f:
#     dic = json.load(f)
#   result_dict = {}
#   name, _ = os.path.splitext(path)
#   name = name.split("/")
#   name = name[-1]
#   print(name)


#   summaries = []
#   references = []

#   # for key in dic.keys():
#   #   if('pred' not in dic[key].keys()):
#   #     continue
#   #   summaries.append(dic[key]['pred'])
#   #   references.append(dic[key]['golden'])

#   # for key in dic:
#   #   summaries.append(key['pred'])
#   #   references.append(key['golden'])
#   for i in range(len(dic)):
#     summaries.append(str(dic[i].get('Generated_Question')))
#     references.append(dic[i].get('Gold_Question'))

#   # Calculate BLEU-1 score
#   bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
#   # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
#   # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
#   # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
#   # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
#   print("\nBLEU_1: ",bleu1)

#   from torchmetrics.functional import bleu_score
#   tm_bleuscore = bleu_score(summaries,references)
#   print("torchmetrics bleu: ",tm_bleuscore)

#   bert_cider = []
#   # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
#   score_lists = ["bert_score","cider_score"]
#   for value in score_lists:
#     print("The result of :", value)
#     print(Evaluate_R(summaries, references,value))
#     bert_cider.append(Evaluate_R(summaries, references,value))




#   import json
#   from rouge import Rouge


#   scores_list=[]
#   total = len(dic)
#   print(total)
#   rouge = Rouge()
#   # Load the generated and reference summaries
#   for entry in dic:
#     # print(entry)

#     if not entry["Generated_Question"].strip() or entry["Generated_Question"]==".":
#       generated_q= "NA"
#     else:
#       generated_q= entry["Generated_Question"]


#     # print(generated_q)
#     reference_q = entry["Gold_Question"]

#   # Compute the ROUGE scores
#     print("generated_q",generated_q)
#     scores = rouge.get_scores(generated_q, reference_q)
#     scores_list.append(scores)
#     # Print the scores
#     # print(scores)


#   rouge1_r=0
#   rouge1_p=0
#   rouge1_f=0


#   rouge2_r=0
#   rouge2_p=0
#   rouge2_f=0


#   rougel_r=0
#   rougel_p=0
#   rougel_f=0
#   for item in scores_list:
#     rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
#     rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
#     rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


#     rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
#     rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
#     rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


#     rougel_r=item[0]["rouge-l"]["r"] + rougel_r
#     rougel_p=item[0]["rouge-l"]["p"] + rougel_p
#     rougel_f=item[0]["rouge-l"]["f"] + rougel_f


#   rouge1_r = rouge1_r/total
#   rouge1_p = rouge1_p/total
#   rouge1_f = rouge1_f/total


#   rouge2_r = rouge2_r/total
#   rouge2_p = rouge2_p/total
#   rouge2_f = rouge2_f/total


#   rougel_r = rougel_r/total
#   rougel_p = rougel_p/total
#   rougel_f = rougel_f/total


#   print("\n Average scores:\n")
#   print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
#   print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
#   print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



#   print("The result of : meteor_score")
#   meteor = compute_meteor_score(summaries, references)
#   print(meteor)

#   # from distinct_n import distinct_n
#   from distinct_n.metrics import *


#   distinct_2_score = distinct_n_corpus_level(summaries, 2)
#   print("Distinct-2 score:", distinct_2_score)
#   distinct_1_score = distinct_n_corpus_level(summaries, 1 )
#   print("Distinct-1 score:", distinct_1_score)

#   # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
#   #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
#   #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
#   #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
#   #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
#   #               "rougel_f": rougel_f}

#   result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
#                 "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
#                  "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
#                  "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
#                 "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
#                 "rougel_f": rougel_f}

#   result_file.append(result_dict)
#   # print(result_dict)
# results = pd.DataFrame(result_file).to_excel(path_add+"results_temp.xlsx", index=False)
# results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_temp.tex", index=False)


In [None]:
#######################################################################llava####################################################################################

In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/50_qwenlm.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/50_qwenlm.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/drive/MyDrive/only_report/Model Path"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/drive/MyDrive/only_report/Model Path/BART_ep_50tag_crosstandiaclip_inTok421024_3e-6_batchS_4_report_3e-6.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_report')))
    references.append(dic[i].get('Generated_report'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Gold_report"].strip() or entry["Gold_report"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Gold_report"]



    # print(generated_q)
    try:
      reference_q = entry["Generated_report"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"/BART_ep_50tag_crosstandiaclip_inTok421024_3e-6_batchS_4_report_3e-6_score_results.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"/BART_ep_50tag_crosstandiaclip_inTok421024_3e-6_batchS_4_report_3e-6_score_results.tex", index=False)




In [None]:
######################################################################################################################################################

In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Gold_meme_cap"].strip() or entry["Gold_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Gold_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Generated_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_obs.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_obs.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_obs.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_obs.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_obs.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_dep.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_dep.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_dep.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_dep.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_dep.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_slp.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_slp.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_slp.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_slp.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_slp.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_wp.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_wp.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_wp.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_wp.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_wp.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_old.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_old.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_old.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_old.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_old.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_old_dep.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_old_dep.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_old_dep.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_old_dep.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_old_dep.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_old_obs.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_old_obs.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_old_obs.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_old_obs.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_old_obs.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_old_slp.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_old_slp.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_old_slp.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_old_slp.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_old_slp.tex", index=False)




In [None]:
#########IMP EXCEL TO JSON###############
import pandas as pd

# Path to your Excel file
excel_file = "/content/MeSum_old_wp.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Convert the DataFrame to a JSON object
json_data = df.to_json(orient="records")

# Write the JSON object to a file
json_file = "/content/MeSum_old_wp.json"
with open(json_file, "w") as file:
    file.write(json_data)

print("Excel file converted to JSON. JSON file saved as:", json_file)


In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/MeSum_old_wp.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_meme_cap')))
    references.append(dic[i].get('Generated_meme_cap'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_meme_cap"].strip() or entry["Generated_meme_cap"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_meme_cap"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_meme_cap"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_old_wp.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_old_wp.tex", index=False)




In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/meme_gemini_gptv4_wp.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('meme_caption')))
    references.append(dic[i].get('GPTV4'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["GPTV4"].strip() or entry["GPTV4"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["GPTV4"]



    # print(generated_q)
    try:
      reference_q = entry["meme_caption"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_GPTV4_WP_temp.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_GPTV4_WP_temp.tex", index=False)




In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/meme_gemini_gptv4_slp.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('meme_caption')))
    references.append(dic[i].get('GPTV4'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["GPTV4"].strip() or entry["GPTV4"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["GPTV4"]



    # print(generated_q)
    try:
      reference_q = entry["meme_caption"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"results_GPTV4_SLP_temp.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"results_GPTV4_SLP_temp.tex", index=False)




In [None]:
# Question Answering


import pandas as pd
import glob
import json
import os
average_metric =[]
greedy_metric =[]
extrema_metric =[]
def Average(lst):
    return sum(lst) / len(lst)
# result_file = pd.Dataframe(column = [""])
result_file = []

path_add = "/content/drive/MyDrive/only_report/Model Path/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/final_jsons/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlyt5/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/temp/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/onlybart/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/bs2/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/check/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/falcon/"

# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_embedd/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/ve_embmtrcs/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/video_dependent/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/oldsumm/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt_turbo/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/t5_final/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/categories/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/gpt4/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/vqg_baseline_finetuned/"
# path_add = "/content/drive/MyDrive/VideoQG/Model Path/Haadia/Model Path/flant5/"

# filename = "t5-baseTrans_inTok2600_ep_50_NEWInference.json" #1
# filename = "BART_ep_50_OUT_1024_Inference_NER_FIL_Img_cap_VTitle_NoTrans_inTok2600_Haad_Inference.json" #2
file_list = glob.glob("/content/drive/MyDrive/anas/only_report/Model Path/BART_ep_20tag_crosstandiaclip_inTok421024_3e-5_batchS_12_report.json")
# file_list = [path_add+filename]
# path = path_add+filename
for path in file_list:
  with open(path,'r') as f:
    print(path)
    dic = json.load(f)
  result_dict = {}
  name, _ = os.path.splitext(path)
  name = name.split("/")
  name = name[-1]
  print(name)


  summaries = []
  references = []

  # for key in dic.keys():
  #   if('pred' not in dic[key].keys()):
  #     continue
  #   summaries.append(dic[key]['pred'])
  #   references.append(dic[key]['golden'])

  # for key in dic:
  #   summaries.append(key['pred'])
  #   references.append(key['golden'])
  for i in range(len(dic)):
    summaries.append(str(dic[i].get('Gold_report')))
    references.append(dic[i].get('Generated_report'))

  # Calculate BLEU-1 score
  bleu1 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(1.0, 0, 0, 0))
  # bleu2 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 1.0, 0, 0))
  # bleu3 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 1.0, 0))
  # bleu4 = nltk.translate.bleu_score.corpus_bleu(references, summaries, weights=(0, 0, 0, 1.0))
  # bleu1 = nltk.translate.bleu_score.corpus_bleu(references, references, weights=(1.0, 0, 0, 0))
  print("\nBLEU_1: ",bleu1)

  from torchmetrics.functional import bleu_score
  tm_bleuscore = bleu_score(summaries,references)
  print("torchmetrics bleu: ",tm_bleuscore)

  bert_cider = []
  # score_lists = ["bert_score","cider_score", "blue_score_4","blue_score_3","blue_score_2","blue_score_1"]
  score_lists = ["bert_score","cider_score"]
  for value in score_lists:
    print("The result of :", value)
    # print(Evaluate_R(summaries, references,value))
    bert_cider.append(Evaluate_R(summaries, references,value))




  import json
  from rouge import Rouge


  scores_list=[]
  total = len(dic)
  print(total)
  rouge = Rouge()
  # Load the generated and reference summaries
  for entry in dic:
    # print(entry)

    if not entry["Generated_report"].strip() or entry["Generated_report"]==".":
      generated_q= "NA"
    else:
      generated_q= entry["Generated_report"]



    # print(generated_q)
    try:
      reference_q = entry["Gold_report"]
      average_metric.append(embedding_average_similarity(generated_q, reference_q, glove_vectors))
      greedy_metric.append(greedy_matching_similarity(generated_q, reference_q, glove_vectors))
      extrema_metric.append(vector_extrema_similarity(generated_q, reference_q, glove_vectors))
    except Exception as e:
      print("Error:", e)
      continue
  # Compute the ROUGE scores
    # print("generated_q",generated_q)
    # scores = rouge.get_scores(generated_q, reference_q)
    # scores_list.append(scores)
    try:
      scores = rouge.get_scores(generated_q, reference_q)
      scores_list.append(scores)
    # Print the scores
    except Exception as e:
      print("Error:", e)
      continue
    # Print the scores
    # print(scores)


  rouge1_r=0
  rouge1_p=0
  rouge1_f=0


  rouge2_r=0
  rouge2_p=0
  rouge2_f=0


  rougel_r=0
  rougel_p=0
  rougel_f=0
  for item in scores_list:
    rouge1_r=item[0]["rouge-1"]["r"] + rouge1_r
    rouge1_p=item[0]["rouge-1"]["p"] + rouge1_p
    rouge1_f=item[0]["rouge-1"]["f"] + rouge1_f


    rouge2_r=item[0]["rouge-2"]["r"] + rouge2_r
    rouge2_p=item[0]["rouge-2"]["p"] + rouge2_p
    rouge2_f=item[0]["rouge-2"]["f"] + rouge2_f


    rougel_r=item[0]["rouge-l"]["r"] + rougel_r
    rougel_p=item[0]["rouge-l"]["p"] + rougel_p
    rougel_f=item[0]["rouge-l"]["f"] + rougel_f


  rouge1_r = rouge1_r/total
  rouge1_p = rouge1_p/total
  rouge1_f = rouge1_f/total


  rouge2_r = rouge2_r/total
  rouge2_p = rouge2_p/total
  rouge2_f = rouge2_f/total


  rougel_r = rougel_r/total
  rougel_p = rougel_p/total
  rougel_f = rougel_f/total


  print("\n Average scores:\n")
  print("rouge-1 : \t recal: {}, precision: {}, fscore: {}".format(rouge1_r,rouge1_p,rouge1_f))
  print("\nrouge-2 : \t recal: {}, precision: {}, fscore: {}".format(rouge2_r,rouge2_p,rouge2_f))
  print("\nrouge-l : \t recal: {}, precision: {}, fscore: {}".format(rougel_r,rougel_p,rougel_f))



  print("The result of : meteor_score")
  meteor = compute_meteor_score(summaries, references)
  print(meteor)

  # from distinct_n import distinct_n
  from distinct_n.metrics import *


  distinct_2_score = distinct_n_corpus_level(summaries, 2)
  print("Distinct-2 score:", distinct_2_score)
  distinct_1_score = distinct_n_corpus_level(summaries, 1 )
  print("Distinct-1 score:", distinct_1_score)

  # result_dict = {"Model": name, "nltk_bleu1": bleu1, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
  #               "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
  #                "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
  #                "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
  #               "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
  #               "rougel_f": rougel_f}

  result_dict = {"Model": name, "tm_bleu": tm_bleuscore.numpy().tolist(),"cider": bert_cider[1]['cider'],"Meteor": meteor,
                "Distinct1": distinct_1_score,"Distinct2": distinct_2_score,"bert_P": bert_cider[0]['bert_score_precision'],
                 "bert_R": bert_cider[0]['bert_score_recall'], "bert_F1": bert_cider[0]['bert_score_f1'],
                 "rouge1_r" :rouge1_r, "rouge1_p": rouge1_p, "rouge1_f": rouge1_f,
                "rouge2_r": rouge2_r, "rouge2_p": rouge2_p, "rouge2_f": rouge2_f, "rougel_r": rougel_r, "rougel_p": rougel_p,
                "rougel_f": rougel_f, "Embd_Avg": Average(average_metric) , "Embd_Grdy": Average(greedy_metric), "Embd_Extrm": Average(extrema_metric)}

  result_file.append(result_dict)
  print(result_dict)
results = pd.DataFrame(result_file).to_excel(path_add+"BART_ep_50tag_crosstandiaclip_inTok421024_3e-6_batchS_4_report_3e-6_scores.xlsx", index=False)
results_tex = pd.DataFrame(result_file).to_latex(path_add+"BART_ep_50tag_crosstandiaclip_inTok421024_3e-6_batchS_4_report_3e-6_scores.tex", index=False)


