working with the descriptions by ChatGPT

In [None]:
import csv
import sys
import pandas as pd
import ast

from sentence_transformers import SentenceTransformer, SimilarityFunction
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

from evaluation_a import evaluation_single

# Code von Wiebke
fileNameA = "subtask_a_train.tsv"
fileDirectoryA = "AdMIRe Subtask A Train/train"
dataA = pd.read_csv(fileDirectoryA + "/" + fileNameA, sep='\t')
dataA['expected_order'] = dataA['expected_order'].apply(ast.literal_eval)

In [2]:
# descriptions given by ChatGPT for idiomatic / literal use of compound
# read data von Julio -> csv file

filename_desc = "gpt-desc.csv"
data_desc = pd.read_csv(filename_desc)

In [3]:
def sim_scores(current, sentences):
    # input = current line(example) & embeddings for sentence + captions
    
    scores = {}
    # keys = image names
    # values = scores

    embeddings = model.encode(sentences)
    similarities = model.similarity(embeddings[0], embeddings)
    # compares the embedding for the description of idiomatic / literal use of compound
    # with each of the embeddings, including itself and all the captions

    # [0][x] required because similarities tensor has additional layer
    score1 = similarities[0][1].item()
    scores[current["image1_name"]] = score1

    score2 = similarities[0][2].item()
    scores[current["image2_name"]] = score2

    score3 = similarities[0][3].item()
    scores[current["image3_name"]] = score3

    score4 = similarities[0][4].item()
    scores[current["image4_name"]] = score4

    score5 = similarities[0][5].item()
    scores[current["image5_name"]] = score5
    
    return scores

def rank_images(scores):
    ranking = []
    # scores = dictionary containing the cos similarity scores
    # comparing the sentence with the captions of the five images
    # keys = image names
    # values = scores

    for i in range(5):
        # find key which corresponds to the highest value
        m = max(scores, key=scores.get)
        # add the key (image name) to the ranking
        ranking.append(m)
        # delete the entry in the dictionary
        del scores[m]

    return ranking

In [None]:
# compare embeddings for description of compound (idiomatic/literal) with image captions

data = dataA
#data = dataA[dataA["sentence_type"]=="literal"]
#data = dataA[dataA["sentence_type"]=="idiomatic"]

total_acc = 0
total_spearman = 0

for i in range(len(data)):

    current = data.iloc[i]

    # using info of current["compound"] & current["sentence_type"], get meaning from chatgpt data data_desc
    description = data_desc[(data_desc["compound"]==current["compound"]) 
                            & (data_desc["sentence_type"]==current["sentence_type"])]["Meaning"].item()
    sentences = [description, 
                 current["image1_caption"],
                 current["image2_caption"],
                 current["image3_caption"],
                 current["image4_caption"],
                 current["image5_caption"]]

    scores = sim_scores(current, sentences)
    ranking = rank_images(scores)

    exp_order = current["expected_order"]
    evaluation = evaluation_single(ranking,exp_order)
    total_acc += evaluation[0]
    total_spearman += evaluation[1]

final_acc = total_acc / len(data)
print(final_acc)
final_spearman = total_spearman / len(data)
print(final_spearman)

In [5]:
# comparing captions to literal / idiomatic description
# literal / idiomatic taken from training data

# entire subtask A training data (70):
# final_acc = 0.6142857142857143 
# final_spearman = 0.17428571428571427

# only literal (31):
# final_acc = 0.7096774193548387 -> slight improvement
# final_spearman = 0.31612903225806455

# only idiomatic (39):
# final_acc = 0.5384615384615384 -> great improvement
# final_spearman = 0.061538461538461535

In [6]:
def literal_idiomatic(sentences):
    # sentences = sentence with compound, literal description, idiomatic description

    embeddings = model.encode(sentences)
    similarities = model.similarity(embeddings[0], embeddings[1:])

    # check if literal sim score higher than idiomatic
    # if both are equally close, choose literal 
    if similarities[0][0].item() > similarities[0][1].item():
        return "literal"
    
    return "idiomatic"

In [None]:
matches = 0

for i in range(len(dataA)):

    current = dataA.iloc[i]

    literal_desc = data_desc[(data_desc["compound"]==current["compound"]) 
                            & (data_desc["sentence_type"]=="literal")]["Meaning"].item()
    idiomatic_desc = data_desc[(data_desc["compound"]==current["compound"]) 
                            & (data_desc["sentence_type"]=="idiomatic")]["Meaning"].item()

    prediction = literal_idiomatic([current["sentence"],
                                    literal_desc,
                                    idiomatic_desc])
    
    if prediction == current["sentence_type"]:
        matches += 1

accuracy = matches / len(dataA)
print(accuracy)

# 0.5857142857142857 
# -> determining literal vs idiomatic via similarity of descriptions to sentence
#    is not very effective

different functions for comparing embeddings:
- SimilarityFunction.COSINE
- SimilarityFunction.DOT_PRODUCT
- SimilarityFunction.EUCLIDEAN
- SimilarityFunction.MANHATTAN

https://www.sbert.net/docs/sentence_transformer/usage/semantic_textual_similarity.html

In [None]:
data = dataA
#data = dataA[dataA["sentence_type"]=="literal"]
#data = dataA[dataA["sentence_type"]=="idiomatic"]

#model.similarity_fn_name = SimilarityFunction.COSINE
#model.similarity_fn_name = SimilarityFunction.DOT_PRODUCT
#model.similarity_fn_name = SimilarityFunction.EUCLIDEAN
model.similarity_fn_name = SimilarityFunction.MANHATTAN

total_acc = 0
total_spearman = 0

for i in range(len(data)):

    current = data.iloc[i]

    # using info of current["compound"] & current["sentence_type"], get meaning from chatgpt data data_desc
    description = data_desc[(data_desc["compound"]==current["compound"]) 
                            & (data_desc["sentence_type"]==current["sentence_type"])]["Meaning"].item()
    sentences = [description, 
                 current["image1_caption"],
                 current["image2_caption"],
                 current["image3_caption"],
                 current["image4_caption"],
                 current["image5_caption"]]

    scores = sim_scores(current, sentences)
    ranking = rank_images(scores)

    exp_order = current["expected_order"]
    evaluation = evaluation_single(ranking,exp_order)
    total_acc += evaluation[0]
    total_spearman += evaluation[1]

final_acc = total_acc / len(data)
print(final_acc)
final_spearman = total_spearman / len(data)
print(final_spearman)

In [9]:
# cos

# literal:
# final_acc = 0.7096774193548387
# final_spearman = 0.31612903225806455

# idiomatic:
# final_acc = 0.5384615384615384
# final_spearman = 0.061538461538461535


# dot product -> same as cos

# literal:
# final_acc = 0.7096774193548387
# final_spearman = 0.31612903225806455

# idiomatic:
# final_acc = 0.5384615384615384
# final_spearman = 0.061538461538461535


# negative euclidean distance -> same as cos

# literal:
# final_acc = 0.7096774193548387
# final_spearman = 0.31612903225806455

# idiomatic
# final_acc = 0.5384615384615384
# final_spearman = 0.061538461538461535


# negative manhattan distance

# literal
# final_acc = 0.7096774193548387
# final_spearman = 0.29677419354838713

# idiomatic
# final_acc = 0.5897435897435898
# final_spearman = 0.11025641025641028

# all -> slightly better than cos because better for idiomatic
# final_acc = 0.6428571428571429
# final_spearman = 0.19285714285714278