In [5]:
from PIL import Image
import requests
import torch
from datasets import load_dataset
import pandas as pd

def levenshteinDistance(A, B):
    N, M = len(A), len(B)
    # Create an array of size NxM
    dp = [[0 for i in range(M + 1)] for j in range(N + 1)]

    # Base Case: When N = 0
    for j in range(M + 1):
        dp[0][j] = j
    # Base Case: When M = 0
    for i in range(N + 1):
        dp[i][0] = i
    # Transitions
    for i in range(1, N + 1):
        for j in range(1, M + 1):
            if A[i - 1] == B[j - 1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = 1 + min(
                    dp[i-1][j], # Insertion
                    dp[i][j-1], # Deletion
                    dp[i-1][j-1] # Replacement
                )

    return dp[N][M]


dataset = load_dataset('eduvedras/Img_Desc',split='test',trust_remote_code=True)
filetag = 'pix-desc'
print(dataset)

Dataset({
    features: ['Chart', 'Description', 'Chart_name'],
    num_rows: 15
})


In [6]:
predictions = []
references = []
df = pd.read_csv(f'./{filetag}.csv', sep=',')
i=0
for index,row in df.iterrows():
    predictions.append(row['Prediction'])
    references.append(dataset[i]['Description'])
    i+=1
    
print(references)


['An image showing a decision tree with depth = 2 where the first decision is made with the condition Pclass <= 2.5 and the second with the condition Parch <= 0.5.', 'A multi-line chart showing the overfitting of a mlp where the y-axis represents the accuracy and the x-axis represents the number of iterations ranging from 100 to 1000.', 'A multi-line chart showing the overfitting of gradient boosting where the y-axis represents the accuracy and the x-axis represents the number of estimators ranging from 2 to 2002.', 'A multi-line chart showing the overfitting of random forest where the y-axis represents the accuracy and the x-axis represents the number of estimators ranging from 2 to 2002.', 'A multi-line chart showing the overfitting of k-nearest neighbors where the y-axis represents the accuracy and the x-axis represents the number of neighbors ranging from 1 to 23.', 'A multi-line chart showing the overfitting of a decision tree where the y-axis represents the accuracy and the x-axi

In [7]:
import evaluate

distances = []
for i in range(len(predictions)):
    distances.append(levenshteinDistance(predictions[i], references[i]))

print(distances)

file = open(f"{filetag}.txt", "a")

bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references)
print(results)
file.write(f"BLEU: {results}\n")

meteor = evaluate.load("meteor")
results = meteor.compute(predictions=predictions, references=references)
print(results)
file.write(f"METEOR: {results}\n")

rouge = evaluate.load("rouge")
results = rouge.compute(predictions=predictions, references=references)
print(results)
file.write(f"ROUGE: {results}\n")
file.close()

[1, 13, 12, 0, 0, 0, 0, 1, 10, 16, 11, 9, 0, 0, 10]


'file = open(f"{filetag}.txt", "a")\n\nbleu = evaluate.load("bleu")\nresults = bleu.compute(predictions=predictions, references=references)\nprint(results)\nfile.write(f"BLEU: {results}\n")\n\nmeteor = evaluate.load("meteor")\nresults = meteor.compute(predictions=predictions, references=references)\nprint(results)\nfile.write(f"METEOR: {results}\n")\n\nrouge = evaluate.load("rouge")\nresults = rouge.compute(predictions=predictions, references=references)\nprint(results)\nfile.write(f"ROUGE: {results}\n")\nfile.close()'