In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install sentencepiece
!pip install torch
!pip install transformers




In [3]:
#import pretrained models 
from transformers import AutoTokenizer, AutoModelWithLMHead, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-tw")
#model = AutoModelWithLMHead.from_pretrained("Helsinki-NLP/opus-mt-en-tw")
model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-tw")

In [4]:
#import further libraries that will be needed
import nltk
from nltk.translate.bleu_score import SmoothingFunction
import pandas as pd

In [5]:
def translate(input_sentence, model=model, **kwargs):
    """
    Translate the sentence to the target language using the provided model and any keyword arguments provided.
    input_sentence: List of sentences
    model: model to use for the translation.
    
    Returns:
    List of corresponding translations for the target language
    """

    input_id = tokenizer.encode(input_sentence, return_tensors="pt")
    beam_output = model.generate(input_id,  **kwargs)
    return tokenizer.decode(beam_output[0], skip_special_tokens=True)

In [6]:
def bleuscore_dataframe(dataframe, weights=(0.85,0.15,0,0), smoothFn=None,auto_reweigh=False):
    """
    compute the bleu score of an entire dataframe dataframe should have a predictions and 
    references column
    dataframe: pd.DataFrame - should have "predictions" and "references" columns
    weights is a 4-tuple summing up to 1. indicates weights for unigrams, bigrams etc
    smoothFn:int {0,7}. refer to nltk API for details
    """
    preds = []
    for line in dataframe['predictions'].values.tolist(): 
        line_clean = line.strip().split()
        preds.append(line_clean)
    refs = []
    for line in dataframe["references"].values.tolist(): 
        line_clean = str(line).strip().split()
        refs.append(line_clean)
    refs_2 = [list([(item)]) for item in refs]  # putting it in a form for the nltk bleu method
    if smoothFn in range(8):
        smoothFunc = SmoothingFunction()
        smoothFunc = eval(f"smoothFunc.method{smoothFn}")
    else:
        smoothFunc = smoothFn
    BLEUscore = nltk.translate.bleu_score.corpus_bleu(refs_2, preds, weights=weights,
                                                      smoothing_function=smoothFunc,auto_reweigh=auto_reweigh)
    return BLEUscore

In [7]:
#read the parrarel dataset to be used for checking scores
pairs = pd.read_csv("/content/drive/My Drive/GhanaNLP/DATA/GhanaNLP_data_001.csv")

In [8]:
def predict_sample_sentences(parallel_sentences, sample_size=100):
  """
    Takes a dataframe of parrarel eng-twi sentences and translate the english sentences to target language


    parallel_sentences: pd.DataFrame - should have "English" and "references" columns with "English" sentences in column index 0 and "references in col index 1
    sample_size : number of sentences in the dataframe to translate, default is 100 sentences
    
    Return:
    Returns a dataframe with a third column "Predictions" added which contain the translated sentences
    """

  test_pairs = parallel_sentences.sample(sample_size)
  test_pairs.columns= ["English", "references"]
  test_pairs["predictions"] = test_pairs["English"].apply(translate)
  return test_pairs

In [9]:
#translate some sample sentences
sample_test_pairs = predict_sample_sentences(pairs)

In [10]:
#take a preview of 6 of the sentences
sample_test_pairs.sample(6, replace=False)

Unnamed: 0,English,references,predictions
139,How are you doing?,Ɛte sɛn?,Ɔkwan bɛn so na woreyɛ saa?
659,Yaw is an evil man.,Yaw yɛ nipa bɔne ni/ Yaw tirim wɔ sum.,Yaw yɛ ɔbɔnefo.
102,I am serious,m'ani abre,M'ani abere paa
8,Think about yourself,Dwene wo ho,Susuw wo ho hwɛ
313,What items are you travelling with?,Ɛdeɛn nnoɔma na wo de retu kwan no?,Nneɛma bɛn na wode retu kwan?
645,I'm not taking any chances.,Mɛyɛ birbiara/ Memma m'ani mpa biribiara a ehi...,Merentumi nyɛ ho hwee.


In [11]:
#check some sample bluescores with different paramaters
#1. put more emphasis on unigrams (0.8) and a little bit on bi-grams (0.1) and smoothing function of 7 and no auto_reweigh - Option to re-normalize the weights uniformly.
bleuscore_dataframe(sample_test_pairs[["references","predictions"]], weights=(0.8,0.1,0,0),smoothFn=7)

0.5372258789011285

In [12]:
#2. put more emphasis on un-igrams (0.8)  and smoothing function of 7 and with auto_reweigh
bleuscore_dataframe(sample_test_pairs[["references","predictions"]], weights=(0.8,0,0,0),smoothFn=7, auto_reweigh=True)

0.6187342840449511

In [13]:
#3. put just enough emphasis on un-igrams (0.58)  and smoothing function of 7 and with auto_reweigh
bleuscore_dataframe(sample_test_pairs[["references","predictions"]], weights=(0.58,0,0,0),smoothFn=7, auto_reweigh=True)

0.7060582487467826

In [14]:
#4. put just enough emphasis on un-igrams (0.61)  with no smoothing function and with auto_reweigh
bleuscore_dataframe(sample_test_pairs[["references","predictions"]], weights=(0.61,0,0,0),smoothFn=None, auto_reweigh=True)

0.44204042731236304

In [15]:
#5. spread empahasis accross 5-grams  with smoothing function of 7 and with auto_reweigh
bleuscore_dataframe(sample_test_pairs[["references","predictions"]], weights=(1./5., 1./5., 1./5., 1./5., 1./5.),smoothFn=7,  auto_reweigh=True) 

0.1113540603137754