In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [64]:
path_json = "/content/drive/MyDrive/Classroom/MSV/dataset RSICD/dataset_rsicd.json"
path_img_folder = "/content/drive/MyDrive/Classroom/MSV/dataset RSICD/images"

import json

# Charger le fichier JSON
with open(path_json, 'r') as file:
    data = json.load(file)

# Création du dataset
dataset_map = {}
for item in data["images"]:
    filename = item['filename']
    captions = [sent['raw'] for sent in item['sentences']]
    dataset_map[filename] = captions

In [56]:
# Afficher un exemple
ex = "park_90.jpg"
if ex in dataset_map:
    print("Image path :", ex)
    print("Labels :")
    print(dataset_map[ex])

Image path : park_90.jpg
Labels :
['there are all sorts of play equipment in the park .', 'park is located in a hub, surrounded by the road .', 'some buildings and many green trees are in a park with several ponds .', 'some buildings and many green trees are in a park with several ponds .', 'there are all sorts of play equipment in the park .']


In [65]:
import pandas as pd
import json

# Charger le fichier CSV
df = pd.read_csv('/content/drive/MyDrive/Classroom/MSV/predictions_2.csv')

# Filtrer les lignes où la colonne 'filename' conient le mot 'forest'
filtered_df = df[df['filename'].str.contains('park_', case=False, na=False)]

# Créer les dictionnaires avec les références
hypothesis = {f"ref_{i+1}": [row['pred_output']] for i, row in filtered_df.iterrows()}
references = {f"ref_{i+1}": dataset_map[row['filename']][:5] for i, row in filtered_df.iterrows()}

# Afficher les dictionnaires
print("Hypothesis:", hypothesis)
print(len(hypothesis))
print("References:", references)
print(len(references))


Hypothesis: {'ref_448': ['some buildings and green trees are in a piece of farmland in a park .'], 'ref_449': ['many buildings and some green trees are in a park .'], 'ref_450': ['a small pond is surrounded by many green trees in a park .'], 'ref_451': ['many green trees and buildings are in a park .'], 'ref_452': ['many green trees and buildings are in a park .'], 'ref_453': ['many buildings and green trees are in a parking lot .'], 'ref_454': ['the two sides of a river are green meadows with a few trees .'], 'ref_455': ['many green trees and buildings are in a park .'], 'ref_456': ['a piece of water is surrounded by green trees and several buildings .'], 'ref_457': ['many green trees and some buildings are in a piece of farmland .'], 'ref_458': ['a long river is near a piece of green forest .'], 'ref_459': ['the parking lot is full of cars .'], 'ref_460': ['the road is curved and surrounded by a green field .'], 'ref_461': ['some green trees are in a piece of farmland .'], 'ref_462':

In [42]:
!pip install tabulate
!pip install pycocoevalcap



In [66]:
import pandas as pd
import re
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.meteor.meteor import Meteor
from tabulate import tabulate

def show_metrics(metrics):
    table_data = []
    for metric, value in metrics.items():
        table_data.append([metric, value])
    headers = ['Metric', 'Value']
    print(tabulate(table_data, headers=headers, tablefmt='fancy_grid'))

class MetricsCalculator:
    def __init__(self, references, hypotheses, n_gram=4):
        self.references = references
        self.hypotheses = hypotheses
        self.n_gram = n_gram
        self.bleu = Bleu(self.n_gram)
        self.rouge = Rouge()
        self.cider = Cider()
        self.meteor = Meteor()
        self.scores = {}

    def compute_bleu(self):
        bleu_scores = self.bleu.compute_score(self.references, self.hypotheses)
        for i in range(self.n_gram):
            self.scores[f"BLEU-{i+1}"] = f"{bleu_scores[0][i] * 100:.2f}"  # Convert to percentage

    def compute_rouge_l(self):
        rouge_scores, _ = self.rouge.compute_score(self.references, self.hypotheses)
        self.scores["ROUGE-L"] = f"{rouge_scores * 100:.2f}"

    def compute_cider(self):
        scores, _ = self.cider.compute_score(self.references, self.hypotheses)
        self.scores["CIDEr"] = scores

    def compute_meteor(self):
        scores, _ = self.meteor.compute_score(self.references, self.hypotheses)
        self.scores["METEOR"] = f"{scores * 100:.2f}"

# Create an instance of MetricsCalculator
metrics_calculator = MetricsCalculator(references, hypothesis)
metrics_calculator.compute_bleu()
metrics_calculator.compute_rouge_l()
metrics_calculator.compute_cider()
metrics_calculator.compute_meteor()

scores = metrics_calculator.scores
show_metrics(scores)


{'testlen': 421, 'reflen': 394, 'guess': [421, 386, 351, 316], 'correct': [271, 139, 83, 47]}
ratio: 1.068527918779014
╒══════════╤═══════════╕
│ Metric   │     Value │
╞══════════╪═══════════╡
│ BLEU-1   │ 64.37     │
├──────────┼───────────┤
│ BLEU-2   │ 48.15     │
├──────────┼───────────┤
│ BLEU-3   │ 37.99     │
├──────────┼───────────┤
│ BLEU-4   │ 30.05     │
├──────────┼───────────┤
│ ROUGE-L  │ 53.95     │
├──────────┼───────────┤
│ CIDEr    │  0.539267 │
├──────────┼───────────┤
│ METEOR   │ 28.44     │
╘══════════╧═══════════╛
