In [8]:
import sys
sys.path.append('..')

In [11]:
import pandas as pd
import numpy as np

from sentence_transformers import SentenceTransformer, util
import evaluate
import torch.nn.functional as F
from pathlib import Path

In [12]:
def get_filename(input_csv):
    # Get the file name without directory and extension
    filename = Path(input_csv).stem
    return filename

def measure(output_file, time_prompt):
    st_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

    df = pd.read_csv(output_file)
    res = df['results'].tolist()
    references = df['references'].tolist()

    emb_res = st_model.encode(res, convert_to_tensor=True)
    emb_ref = st_model.encode(references, convert_to_tensor=True)
    score1 = F.cosine_similarity(emb_res, emb_ref, dim=1).mean().item()

    sbert_model = SentenceTransformer('paraphrase-distilroberta-base-v1')
    emb_res = sbert_model.encode(res, convert_to_tensor=True)
    emb_ref = sbert_model.encode(references, convert_to_tensor=True)
    score2 = F.cosine_similarity(emb_res, emb_ref, dim=1).mean().item()

    bleu_metric = evaluate.load("bleu")
    bleu4 = bleu_metric.compute(predictions=res, references=references)
    bleu2 = bleu_metric.compute(predictions=res, references=references, max_order=2)

    # Initialize lists to store individual scores
    bleu4_scores = []
    bleu2_scores = []

    # Compute BLEU scores for each example
    for pred, ref in zip(res, references):
        # Each ref should be a list of reference texts, as BLEU expects multiple references for each prediction
        if not isinstance(ref, list):
            ref = [ref]
            
        # Compute BLEU-4 score
        bleu4 = bleu_metric.compute(predictions=[pred], references=[ref])
        bleu4_scores.append(bleu4['bleu'])
        
        # Compute BLEU-2 score (up to n-gram order 2)
        bleu2 = bleu_metric.compute(predictions=[pred], references=[ref], max_order=2)
        bleu2_scores.append(bleu2['bleu'])

    lowest_values = sorted(enumerate(bleu4_scores), key=lambda x: x[1])[:5]
    lowest_indexes, lowest_scores = zip(*lowest_values)
    print(lowest_indexes, lowest_scores)

    lowest_values = sorted(enumerate(bleu2_scores), key=lambda x: x[1])[:5]
    lowest_indexes, lowest_scores = zip(*lowest_values)
    print(lowest_indexes, lowest_scores)

    rouge_metric = evaluate.load('rouge')
    rouge = rouge_metric.compute(predictions=res,
                      references=references)
    meteor_metric = evaluate.load('meteor')
    meteor = meteor_metric.compute(predictions=res, references=references)

    print(f"{get_filename(output_file)}, {len(res)}, {bleu2['bleu']}, {bleu4['bleu']}, {rouge['rouge1']}, {rouge['rouge2']}, {rouge['rougeL']}, {meteor['meteor']}, {score1}, {score2}, {time_prompt}" + '\n')
 

In [7]:
measure('/home/ali.lawati/mol-incontext/output/mmcl-chebi-3-epochs100-ensure-diff.mistral-7B.csv', 0)



(5, 7, 8, 91, 124) (0.0, 0.0, 0.0, 0.0, 0.0)
(504, 648, 837, 793, 172) (0.00027969294491888777, 0.001107932148943576, 0.0011803083339673264, 0.0029548413131870507, 0.007464021168457002)


[nltk_data] Downloading package wordnet to
[nltk_data]     /home/ali.lawati/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/ali.lawati/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /home/ali.lawati/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


NameError: name 'get_filename' is not defined

In [14]:
bleu_metric = evaluate.load("bleu")
bleu_metric.compute(predictions=["The molecule is a glucitol that is D-glucitol in which the carbon at position 1 is the (13)C isotope. It is a glucitol and a (13)C-modified compound."], references=[["The molecule is the D-enantiomer of glucitol (also known as D-sorbitol). It has a role as a sweetening agent, a laxative, a metabolite, a cathartic, a human metabolite, a food humectant, a Saccharomyces cerevisiae metabolite, an Escherichia coli metabolite and a mouse metabolite. It is an enantiomer of a L-glucitol."]])

{'bleu': 0.0,
 'precisions': [0.42857142857142855,
  0.17647058823529413,
  0.06060606060606061,
  0.0],
 'brevity_penalty': 0.4623520933081964,
 'length_ratio': 0.5645161290322581,
 'translation_length': 35,
 'reference_length': 62}