<a href="https://colab.research.google.com/github/dml2611/Chinese-Idioms/blob/main/meteor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install nltk
!pip install nltk



In [3]:
# Import libraries
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk import word_tokenize

import pandas as pd
from csv import DictWriter
from nltk.translate.meteor_score import single_meteor_score

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
# Mount the google drive
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [22]:
# Initialise the data input and output paths
data_path = ""  # input file path
outfile_path = ""   # output file path

In [23]:
# Create a dataframe for all model translations, in this case 9 (Google, Microsoft, DeepL, GPT4, Llama2, GLM4, VolcanoTrans, NiuTrans, Baidu)
translations  = pd.read_excel(data_path+ "----")    # excel sheet containing reference and machine translations

# Machine translations for 9 models
candidate_gpt4 = translations['GPT4']
candidate_deepL = translations['Deepl']
candidate_micro = translations['Microsoft']
candidate_llama2 = translations['Llama2']
candidate_googleTrans = translations['Google']
candidate_glm4 = translations['GLM4']
candidate_niuTrans = translations['Niu']
candidate_volcTrans = translations['Volc']
candidate_baidu = translations['Baidu']

# Original translations
reference = translations['HT']

In [24]:
# Define the function for calculating the meteor scores
def calc_meteor(translations, name):
  scores = []
  for i in range(len(reference)):
    scores.append(round(single_meteor_score(word_tokenize(reference[i]), word_tokenize(translations[i])), 4))
  print(name, scores)
  return scores

In [25]:
# Calculate meteor scores for all machine translation systems, in this case 9 (Google, Microsoft, DeepL, GPT4, Llama2, GLM4, VolcanoTrans, NiuTrans, Baidu)
googleTrans_meteor = calc_meteor(candidate_googleTrans, "Google Translation")
ms_meteor = calc_meteor(candidate_micro, "Microsoft")
deepL_meteor = calc_meteor(candidate_deepL, "DeepL")
gpt4_meteor  = calc_meteor(candidate_gpt4, "GPT4")
lm2_meteor  = calc_meteor(candidate_llama2, "Llama2")
glm4_meteor  = calc_meteor(candidate_glm4, "GLM4")
niutrans_meteor  = calc_meteor(candidate_niuTrans, "Niu Trans")
volcTrans_meteor  = calc_meteor(candidate_volcTrans, "Volcano Trans")
baidu_meteor = calc_meteor(candidate_baidu, "Baidu")

Google Translation [0.5439, 0.3046, 0.5188, 0.2657, 0.3085, 0.5268, 0.4114, 0.4167, 0.3394, 0.1639, 0.5229, 0.1487, 0.3341, 0.5271, 0.4956, 0.4864, 0.2119, 0.4118, 0.2775, 0.432, 0.3012, 0.2983, 0.1692, 0.2294, 0.4484, 0.4509, 0.3883, 0.5388, 0.447, 0.2977, 0.0862, 0.5049, 0.1511, 0.4069, 0.4093, 0.2894, 0.4288, 0.528, 0.2951, 0.2514, 0.2952, 0.4115, 0.2273, 0.0735, 0.5472, 0.3262, 0.2696, 0.2363, 0.1667, 0.6409, 0.5176, 0.21, 0.4432, 0.2326, 0.4436, 0.2244, 0.2308, 0.0495, 0.3847, 0.2126, 0.1274, 0.3243, 0.3599, 0.3853, 0.1404, 0.1691, 0.5197, 0.25, 0.1355, 0.1571, 0.3294, 0.4496, 0.4127, 0.3808, 0.2473, 0.3743, 0.1884, 0.3573, 0.1721, 0.651, 0.4668, 0.2174, 0.38, 0.2188, 0.1515, 0.1471, 0.2552, 0.4006, 0.4907, 0.3145, 0.284, 0.3978, 0.3894, 0.1232, 0.2784, 0.3156, 0.2282, 0.4438, 0.3787, 0.3194]
Microsoft [0.5957, 0.2489, 0.3736, 0.3182, 0.1951, 0.2459, 0.3846, 0.351, 0.3507, 0.15, 0.5953, 0.1136, 0.313, 0.4762, 0.4167, 0.5272, 0.2812, 0.4477, 0.2236, 0.4576, 0.2781, 0.3124, 0.2103, 

In [26]:
# Store the results in a python dictionary
dict = {'GoogleTrans': googleTrans_meteor, 'Microsoft': ms_meteor,  'DeepL': deepL_meteor, 'GPT4': gpt4_meteor, 'Llama2': lm2_meteor, 'GLM4': glm4_meteor, 'NiuTrans': niutrans_meteor, 'VolcTrans': volcTrans_meteor, 'Baidu': baidu_meteor}

In [27]:
# Display the results as a python dataframe
df = pd.DataFrame(dict)
df

Unnamed: 0,GoogleTrans,Microsoft,DeepL,GPT4,Llama2,GLM4,NiuTrans,VolcTrans,Baidu
0,0.5439,0.5957,0.3782,0.3234,0.3849,0.3372,0.5279,0.4481,0.3589
1,0.3046,0.2489,0.2843,0.2762,0.2735,0.3591,0.2030,0.2262,0.2741
2,0.5188,0.3736,0.3899,0.4848,0.4651,0.4727,0.4515,0.4491,0.4848
3,0.2657,0.3182,0.3470,0.2103,0.3079,0.3699,0.3728,0.2154,0.2154
4,0.3085,0.1951,0.3286,0.4805,0.2729,0.5188,0.3594,0.1942,0.3786
...,...,...,...,...,...,...,...,...,...
95,0.3156,0.3141,0.3374,0.3795,0.2647,0.3814,0.2386,0.5242,0.3396
96,0.2282,0.3512,0.1568,0.2907,0.2889,0.2770,0.3136,0.2753,0.1235
97,0.4438,0.4438,0.4416,0.4711,0.4688,0.4416,0.3529,0.3833,0.4416
98,0.3787,0.2278,0.2093,0.2598,0.3125,0.1937,0.2616,0.2571,0.3774


In [28]:
# Saving the results
df.to_excel(outfile_path + 'meteor_scores.xlsx', index=False)