# RESULTS

THIS IS A JUPYTER NOTEBOOK TO COMPARE OUR MODELS WITH OUR TEST DATA

LOADING TEST DATA

In [31]:
import pandas as pd

# Load data
def load_data(source_file, target_file):
    with open(source_file, 'r') as f:
        sources = f.read().splitlines()
    with open(target_file, 'r') as f:
        targets = f.read().splitlines()
    return sources, targets


test_sources, test_targets = load_data('dataset/filtered_test.source', 'dataset/filtered_test.target')


#### Prediction with LESK

In [32]:
def modified_lesk(sent, k):
    text_words = word_tokenize(sent)
    stop_words = set(stopwords.words("english"))
    temp_words = list(set(text_words) - set(stop_words))
    if len(temp_words) != 0:
        text_words = temp_words
    word_synsets = []
    for i, val in enumerate(text_words):
        word_synsets.extend(wn.synsets(val))

    output_lesk = {}
    context_words = []

    for i, val in enumerate(word_synsets):
        words = word_tokenize(val.definition())
        context_words.extend(words)

    context_words = list(set(context_words) - set(stop_words))
    
    if len(text_words) == 0:
        text_words = [1]  # Prevent division by zero by using a dummy count
    if len(context_words) == 0:
        context_words = [1]  # Prevent division by zero by using a dummy count

    for word in list(wn.all_lemma_names(lang='eng')):
        if len(word) == k and word.isalpha():
            lesk = 0
            for synset in list(wn.synsets(word)):
                def_words = word_tokenize(synset.definition())
                def_words = list(set(def_words) - set(stop_words))
                if len(text_words) > 0:
                    temp1 = len(set(text_words).intersection(def_words)) / len(text_words)
                else:
                    temp1 = 0
                if len(context_words) > 0:
                    temp2 = len(set(context_words).intersection(def_words)) / len(context_words)
                else:
                    temp2 = 0
                t = 0.75 * temp1 + 0.25 * temp2
                lesk = max(t, lesk)
            if lesk > 0:
                output_lesk[word] = lesk

    rank = list(sorted(output_lesk, key=output_lesk.__getitem__, reverse=True))
    return rank[:100]


In [33]:
def evaluate_lesk(test_sources, test_targets):
    correct_count = 0
    total = len(test_sources)
    results = []

    for source, target in zip(test_sources, test_targets):
        predictions = modified_lesk(source, len(target))
        if target in predictions:
            results.append("yes")
            correct_count += 1
        else:
            results.append("no")
    
    # Calculate the probability of correct predictions
    probability_yes = correct_count / total

    return results, probability_yes


In [34]:
evaluate_lesk(test_sources, test_targets)

(['no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'yes',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'yes',
  'no',
  'yes',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'yes',
  'no',
  'no',
  'no',
  'yes',
  'yes',
  'no',
  'no',
  'yes',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'yes',
  'yes',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'yes',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'no',
  'yes

# Transformers

### TF5

In [35]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments

def evaluate(test_sources, test_targets,  tokenizer, model,num_predictions):
    correct_count = 0
    total = len(test_sources)
    results = []
    i=0
    for source, target in zip(test_sources, test_targets):
        if(i%1000==0):
            print(i)
            print(correct_count)
        i+=1
        # Assume target is the correct word and its length is the desired prediction length
        predictions = predict_words3(source,tokenizer, model, len(target), num_predictions)
        
        # Check if the correct target is in the predictions
        if target in predictions:
            results.append("yes")
            correct_count += 1
        else:
            results.append("no")
    
    # Calculate the probability of correct predictions
    probability_yes = correct_count / total

    return results, probability_yes

In [None]:
def predict_words3(clue,tokenizer, model, k, num_predictions=500):
    # Prepare the model input
    input_ids = tokenizer.encode(clue, return_tensors="pt")
    
    # Ensure the model is in evaluation mode
    model.eval()  
    with torch.no_grad():
        outputs = model.generate(
            input_ids, 
            num_return_sequences=num_predictions, 
            max_length=k + 1,  # Add 1 to accommodate for special tokens
            num_beams=num_predictions,
            early_stopping=False  # Stops generation when all beam hypotheses reached the EOS token
        )
    
    # Decode the predictions and filter by length
    predictions = [tokenizer.decode(output_id, skip_special_tokens=True) for output_id in outputs]
    filtered_predictions = [word for word in predictions if len(word) == k]
    
    return filtered_predictions


### pre tune 

In [None]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
evaluate(test_sources,test_targets,tokenizer,model,100)

### fine tunned

In [None]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('results/checkpoint-162000')


In [None]:
evaluate(test_sources,test_targets,tokenizer,model,100)

## BERT

In [None]:
import torch
from transformers import BertTokenizer, BertForMaskedLM

def predict_multiple_answers_bert(model, tokenizer, text, num_answers=3, wanted_size=5):
    model.eval()  # Set the model to evaluation mode
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Tokenize input text and add special tokens
    encoded_input = tokenizer.encode_plus(text, return_tensors="pt", padding='max_length', truncation=True, max_length=128)
    input_ids = encoded_input['input_ids'].to(device)
    attention_mask = encoded_input['attention_mask'].to(device)

    # Identify the position of the masked token
    mask_position = torch.where(input_ids == tokenizer.mask_token_id)[1]

    # Generate predictions
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        predictions = outputs.logits

    # Softmax the result to get probabilities
    softmax = torch.nn.Softmax(dim=-1)
    mask_word_probs = softmax(predictions[0, mask_position])

    # Get the top 'num_answers' token predictions for the masked position
    top_tokens_probs, top_tokens_ids = torch.topk(mask_word_probs, num_answers * 10, dim=1)  # Increase factor to find enough valid candidates
    answers = []

    for idx in top_tokens_ids[0]:
        token = tokenizer.decode([idx]).strip()
        if len(token) == wanted_size:
            answers.append(token)
        if len(answers) >= num_answers:
            break

    # If not enough answers of wanted_size are found, fill the rest with placeholders or a message
    while len(answers) < num_answers:
        answers.append(f"No more {wanted_size}-char words")

    return answers



In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments

def evaluate(test_sources, test_targets,  tokenizer, model,num_predictions):
    correct_count = 0
    total = len(test_sources)
    results = []
    i=0
    for source, target in zip(test_sources, test_targets):
        if(i%1000==0):
            print(i)
            print(correct_count)
        i+=1
        # Assume target is the correct word and its length is the desired prediction length
        predictions = predict_multiple_answers_bert(model,tokenizer, source, num_answers=num_predictions,wanted_size=len(target))
        
        # Check if the correct target is in the predictions
        if target in predictions:
            results.append("yes")
            correct_count += 1
        else:
            results.append("no")
    
    # Calculate the probability of correct predictions
    probability_yes = correct_count / total

    return results, probability_yes

### pre tune 

In [None]:
from transformers import BertTokenizer, BertForMaskedLM
import torch

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')

In [None]:
evaluate(test_sources,test_targets,tokenizer,model,100)

### fine tunned

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

model = BertForMaskedLM.from_pretrained('results/checkpoint-81000')


In [None]:
evaluate(test_sources,test_targets,tokenizer,model,100)