In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
path_csv = "/content/drive/MyDrive/Classroom/MSV/predictions.csv"
path_json = "/content/drive/MyDrive/Classroom/MSV/dataset RSICD/dataset_rsicd.json"
path_img_folder = "/content/drive/MyDrive/Classroom/MSV/dataset RSICD/images"

import json

# Charger le fichier JSON
with open(path_json, 'r') as file:
    data = json.load(file)

# Création du dataset
dataset_map = {}
for item in data["images"]:
    filename = item['filename']
    captions = [sent['raw'] for sent in item['sentences']]
    dataset_map[filename] = captions

In [18]:
import pandas as pd
import re

# Charger le fichier CSV
df = pd.read_csv(path_csv)

# Fonction pour extraire le texte après "### Response:"
def extract_response(text):
    match = re.search(r'### Response:(.*)', text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return ''

# Appliquer la fonction à la colonne 'pred_output'
df['pred_output'] = df['pred_output'].apply(extract_response)

# Sauvegarder les modifications dans un nouveau fichier CSV
df.to_csv("/content/drive/MyDrive/Classroom/MSV/predictions_2.csv", index=False)

In [14]:
df = pd.read_csv("/content/drive/MyDrive/Classroom/MSV/predictions_2.csv")

hypothesis = {f"ref_{i+1}": [row['pred_output']] for i, row in df.iterrows()}
references = {f"ref_{i+1}": dataset_map[row['filename']][:5] for i, row in df.iterrows()}

# Afficher les dictionnaires
print("Hypothesis:", hypothesis)
print(len(hypothesis))
print("References:", references)
print(len(references))

Hypothesis: {'ref_1': ['many green trees are around a piece of land .'], 'ref_2': ['a road is next to a railway station and a parking lot .'], 'ref_3': ['a pond is surrounded by a green meadow .'], 'ref_4': ['some green trees and buildings are in a piece of land .'], 'ref_5': ['a road is between two buildings and two parking lots .'], 'ref_6': ['some buildings and green trees are in a dense residential area .'], 'ref_7': ['a large parking lot is near a residential area .'], 'ref_8': ['many buildings and green trees are in a densely populated residential area .'], 'ref_9': ['many green trees are in a piece of meadow .'], 'ref_10': ['a green meadow is near a piece of water .'], 'ref_11': ['a green field with a few buildings are in a residential area .'], 'ref_12': ['the road is in a dense residential area with many buildings and green trees .'], 'ref_13': ['the red roofs of many buildings are scattered in a densely populated area .'], 'ref_14': ['the white building is surrounded by many 

In [6]:
!pip install tabulate
!pip install pycocoevalcap

Collecting pycocoevalcap
  Downloading pycocoevalcap-1.2-py3-none-any.whl (104.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.3/104.3 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pycocoevalcap
Successfully installed pycocoevalcap-1.2


In [15]:
import pandas as pd
import re
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.meteor.meteor import Meteor
from tabulate import tabulate

def show_metrics(metrics):
    table_data = []
    for metric, value in metrics.items():
        table_data.append([metric, value])
    headers = ['Metric', 'Value']
    print(tabulate(table_data, headers=headers, tablefmt='fancy_grid'))

class MetricsCalculator:
    def __init__(self, references, hypotheses, n_gram=4):
        self.references = references
        self.hypotheses = hypotheses
        self.n_gram = n_gram
        self.bleu = Bleu(self.n_gram)
        self.rouge = Rouge()
        self.cider = Cider()
        self.meteor = Meteor()
        self.scores = {}

    def compute_bleu(self):
        bleu_scores = self.bleu.compute_score(self.references, self.hypotheses)
        for i in range(self.n_gram):
            self.scores[f"BLEU-{i+1}"] = f"{bleu_scores[0][i] * 100:.2f}"  # Convert to percentage

    def compute_rouge_l(self):
        rouge_scores, _ = self.rouge.compute_score(self.references, self.hypotheses)
        self.scores["ROUGE-L"] = f"{rouge_scores * 100:.2f}"

    def compute_cider(self):
        scores, _ = self.cider.compute_score(self.references, self.hypotheses)
        self.scores["CIDEr"] = scores

    def compute_meteor(self):
        scores, _ = self.meteor.compute_score(self.references, self.hypotheses)
        self.scores["METEOR"] = f"{scores * 100:.2f}"

# Create an instance of MetricsCalculator
metrics_calculator = MetricsCalculator(references, hypothesis)
metrics_calculator.compute_bleu()
metrics_calculator.compute_rouge_l()
metrics_calculator.compute_cider()
metrics_calculator.compute_meteor()

scores = metrics_calculator.scores
show_metrics(scores)


{'testlen': 11979, 'reflen': 10972, 'guess': [11979, 10887, 9795, 8703], 'correct': [5466, 1269, 489, 167]}
ratio: 1.0917790740064626
╒══════════╤═══════════╕
│ Metric   │     Value │
╞══════════╪═══════════╡
│ BLEU-1   │ 45.63     │
├──────────┼───────────┤
│ BLEU-2   │ 23.06     │
├──────────┼───────────┤
│ BLEU-3   │ 13.85     │
├──────────┼───────────┤
│ BLEU-4   │  8.45     │
├──────────┼───────────┤
│ ROUGE-L  │ 37.2      │
├──────────┼───────────┤
│ CIDEr    │  0.205775 │
├──────────┼───────────┤
│ METEOR   │ 15.76     │
╘══════════╧═══════════╛
