In [1]:
import torch

from flair.data import Sentence
from flair.embeddings import DocumentPoolEmbeddings, FlairEmbeddings, BertEmbeddings

import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [20]:
test_eu = "goldstandard_eu_lexicover.tsv"
test_en = "goldstandard_en_lexicover.tsv"

def get_gold_sentences(filename):
    gold_sentences = []
    with open(filename, 'rt') as f_p:
        for line in f_p:
            if line.startswith('"origin"'): # header
                continue
            
            if not line:
                continue
            
            line = line.rstrip()
            line = line.replace('"', '')
            splitted = line.split('\t')
            gold = splitted[0]
            sim_sentences = splitted[1:11]
            
            if gold:
                gold_sentences_simple = {}
                gold_sentences_simple[gold] = sim_sentences
                gold_sentences.append(gold_sentences_simple)
            
    return gold_sentences

In [21]:
sent_eu = get_gold_sentences(test_eu)
sent_en = get_gold_sentences(test_en)

In [22]:
sent_en

[{'Jokin invited Amaia for lunch': ['Jokin offered an invitation to Amaia for lunch',
   'Jokin asked Amaia, if she wanted to have lunch with him',
   'Amaia received the question from Jokin, if she would like to go for lunch',
   'Jokin offered an invitation to Amaia for a meal',
   'Amaia received an invitation from Jokin',
   'Jokin and Amaia met for lunch',
   'Jokin and Amaia went for lunch together',
   'Jokin and Amaia enjoyed a meal together',
   'Amaia and Jokin decided to go for lunch',
   'Amaia invited Jokin for lunch']},
 {'Jokin invited Amaia for lunch': ['Jokin offered an invitation to Amaia for lunch',
   'Amaia received the question from Jokin, if she would like to go for lunch',
   'Jokin offered an invitation to Amaia for a meal',
   'Jokin and Amaia met for lunch',
   'Amaia invited Jokin for lunch',
   'Amaia and Jokin are friends',
   'I went for lunch yesterday',
   'I invited my mom to a trip',
   'Jokin enjoys having lunch',
   'I saw some ducks at the park']},

In [28]:
# load flair embeddings
flair_embeddings_eu = DocumentPoolEmbeddings([FlairEmbeddings('eu-forward'), FlairEmbeddings('eu-backward')])
flair_embeddings_en = DocumentPoolEmbeddings([FlairEmbeddings('mix-forward'), FlairEmbeddings('mix-backward')])

In [29]:
# load BERT embeddings
# See BERT paper, section 5.3 and table 7
bert_layers = '-1,-2,-3,-4'

bert_cased_embeddings_eu = DocumentPoolEmbeddings([BertEmbeddings('bert-base-multilingual-cased', layers=bert_layers)])
bert_cased_embeddings_en = DocumentPoolEmbeddings([BertEmbeddings('bert-base-cased', layers=bert_layers)])

bert_uncased_embeddings_eu = DocumentPoolEmbeddings([BertEmbeddings('bert-base-multilingual-uncased', layers=bert_layers)])
bert_uncased_embeddings_en = DocumentPoolEmbeddings([BertEmbeddings('bert-base-uncased', layers=bert_layers)])


2019-07-04 21:05:03,309 The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.
2019-07-04 21:05:23,262 The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.


In [7]:
# load ELMo and ELMo multilingual embeddings

In [23]:
def initialize_vectors(sent):
    similarities_all = []
    for i in range(len(sent)):
        similarities_all.append([])

    scores_all = []
    for i in range(len(sent)):
        scores_all.append([])
        
    return similarities_all, scores_all

In [24]:
similarities_all_eu, scores_all_eu = initialize_vectors(sent_eu)
similarities_all_en, scores_all_en = initialize_vectors(sent_en)

In [33]:
def calculate_similarities(gold, sim_sentences, embeddings):
    
    similarities = []
    query = gold

    q = Sentence(query)
    embeddings.embed(q)
    score = 0
    i = 0
    
    for sentence in sim_sentences:
        
        s = Sentence(sentence)
        embeddings.embed(s)

        assert q.embedding.shape == s.embedding.shape
        
        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
        prox = cos(q.embedding, s.embedding)
    
        similarities.append(round(prox.item(), 4))
        if i > 0 and similarities[i] <= similarities[i-1]:
            score += 1
        i += 1
        
    return similarities, score

def print_similarities(gold, sim_sentences, similarities, score):
        
    print(f"Example: {gold}")
    for sentence, similarity in zip(sim_sentences, similarities):
        print(f"{sentence} - {similarity}")
    print(f"Score: {score}\n")  
    

In [31]:
def calculate(gold_sentences, embeddings, similarities_all, scores_all):
    for i in range(len(gold_sentences)):
        gold = list(gold_sentences[i].keys())[0]
        sim_sentences = gold_sentences[i][gold]
        
        similarities, score = calculate_similarities(gold, sim_sentences, embeddings)
        scores_all[i].append(score)
        print_similarities(gold, sim_sentences, similarities, score)
        similarities_all[i].append(similarities)
        
    return similarities_all, scores_all

In [34]:
# calculate similarities based on flair embeddings
print("BASQUE")
similarities_all_eu, scores_all_eu = calculate(sent_eu, flair_embeddings_eu, similarities_all_eu, scores_all_eu)
print("\n\nENGLISH")
similarities_all_en, scores_all_en = calculate(sent_en, flair_embeddings_en, similarities_all_en, scores_all_en)

BASQUE
Example: Jokinek Amaia bazkari batera gonbidatu zuen
Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako - 0.7616
Jokinek Amaiari galdetu zion, ea berarekin bazkaldu nahi zuen - 0.708
Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan? - 0.5921
Amaiak galdera jaso zuen Jokinengandik, ea bazkaltzea joan nahiko lukeen - 0.5961
Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako - 0.7694
Amaiak gonbidapen bat jaso zuen Jokinengandik - 0.7276
Bazkaltzeko elkartu ziren Jokin eta Amaia - 0.5851
Jokinek eta Amaiak otordu bat gozatu zuten elkarrekin - 0.8053
Amaiak eta Jokinek erabaki zuten, elkarrekin bazkaltzera joatea - 0.6851
Amaiak Jokin gonbidatu zuen bazkari batera - 0.8692
Score: 5

Example: Jokinek Amaia bazkari batera gonbidatu zuen
Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako - 0.7616
Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan? - 0.5921
Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako - 0.

In [35]:
# calculate similarities based on bert uncased embeddings
print("BASQUE")
similarities_all_eu, scores_all_eu = calculate(sent_eu, bert_uncased_embeddings_eu, similarities_all_eu, scores_all_eu)
print("\n\nENGLISH")
similarities_all_en, scores_all_en = calculate(sent_en, bert_uncased_embeddings_en, similarities_all_en, scores_all_en)

BASQUE
Example: Jokinek Amaia bazkari batera gonbidatu zuen
Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako - 0.9573
Jokinek Amaiari galdetu zion, ea berarekin bazkaldu nahi zuen - 0.9379
Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan? - 0.9066
Amaiak galdera jaso zuen Jokinengandik, ea bazkaltzea joan nahiko lukeen - 0.9212
Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako - 0.948
Amaiak gonbidapen bat jaso zuen Jokinengandik - 0.9407
Bazkaltzeko elkartu ziren Jokin eta Amaia - 0.9377
Jokinek eta Amaiak otordu bat gozatu zuten elkarrekin - 0.9364
Amaiak eta Jokinek erabaki zuten, elkarrekin bazkaltzera joatea - 0.9138
Amaiak Jokin gonbidatu zuen bazkari batera - 0.9745
Score: 6

Example: Jokinek Amaia bazkari batera gonbidatu zuen
Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako - 0.9573
Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan? - 0.9066
Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako - 0.

In [36]:
# calculate similarities based on bert cased embeddings
print("BASQUE")
similarities_all_eu, scores_all_eu = calculate(sent_eu, bert_cased_embeddings_eu, similarities_all_eu, scores_all_eu)
print("\n\nENGLISH")
similarities_all_en, scores_all_en = calculate(sent_en, bert_cased_embeddings_en, similarities_all_en, scores_all_en)

BASQUE
Example: Jokinek Amaia bazkari batera gonbidatu zuen
Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako - 0.9656
Jokinek Amaiari galdetu zion, ea berarekin bazkaldu nahi zuen - 0.918
Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan? - 0.8871
Amaiak galdera jaso zuen Jokinengandik, ea bazkaltzea joan nahiko lukeen - 0.9011
Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako - 0.9612
Amaiak gonbidapen bat jaso zuen Jokinengandik - 0.927
Bazkaltzeko elkartu ziren Jokin eta Amaia - 0.9237
Jokinek eta Amaiak otordu bat gozatu zuten elkarrekin - 0.9315
Amaiak eta Jokinek erabaki zuten, elkarrekin bazkaltzera joatea - 0.9056
Amaiak Jokin gonbidatu zuen bazkari batera - 0.9716
Score: 5

Example: Jokinek Amaia bazkari batera gonbidatu zuen
Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako - 0.9656
Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan? - 0.8871
Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako - 0.9

In [48]:
fig = plotly.tools.make_subplots(rows=1, cols=10)

for i in range(len(sent_eu)):
    print(f"Sentence #{i}")
    print("\nBASQUE")
    origin_eu = list(sent_eu[i].keys())[0]
    print(origin_eu + '\n')
    for j in range(len(sent_eu[i][origin_eu])):
        print(f"{j+1}. " + sent_eu[i][origin_eu][j])
    print("Scores: Flair: " + str(scores_all_eu[i][0]) + ", BERT uncased: " + str(scores_all_eu[i][1]) + ", BERT cased: " + str(scores_all_eu[i][2]))

    
    trace = go.Heatmap(z=similarities_all_eu[i], y=['Flair', 'BERT uncased', 'BERT cased'])
    data=[trace]
    fig.append_trace(trace, 1, i+1)
    iplot(data, filename='basic-heatmap' + str(i))
          
    print("\nENGLISH")
    origin_en = list(sent_en[i].keys())[0]
    print(origin_en + '\n')
    for j in range(len(sent_en[i][origin_en])):
        print(f"{j+1}. " + sent_en[i][origin_en][j])
    print("Scores: Flair: " + str(scores_all_en[i][0]) + ", BERT uncased: " + str(scores_all_en[i][1]) + ", BERT cased: " + str(scores_all_en[i][2]))
    
    trace = go.Heatmap(z=similarities_all_en[i], y=['Flair', 'BERT uncased', 'BERT cased'])
    data=[trace]
    fig.append_trace(trace, 1, i+1)
    iplot(data, filename='basic-heatmap' + str(i))

This is the format of your plot grid:
[ (1,1) x1,y1 ]     [ (1,2) x2,y2 ]     [ (1,3) x3,y3 ]     [ (1,4) x4,y4 ]     [ (1,5) x5,y5 ]     [ (1,6) x6,y6 ]     [ (1,7) x7,y7 ]     [ (1,8) x8,y8 ]     [ (1,9) x9,y9 ]     [ (1,10) x10,y10 ]

Sentence #0

BASQUE
Jokinek Amaia bazkari batera gonbidatu zuen

1. Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako
2. Jokinek Amaiari galdetu zion, ea berarekin bazkaldu nahi zuen
3. Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan?
4. Amaiak galdera jaso zuen Jokinengandik, ea bazkaltzea joan nahiko lukeen
5. Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako
6. Amaiak gonbidapen bat jaso zuen Jokinengandik
7. Bazkaltzeko elkartu ziren Jokin eta Amaia
8. Jokinek eta Amaiak otordu bat gozatu zuten elkarrekin
9. Amaiak eta Jokinek erabaki zuten, elkarrekin bazkaltzera joatea
10. Amaiak Jokin gonbidatu zuen bazkari batera
Scores: Flair: 5, BERT uncased: 6, BERT cased: 5



ENGLISH
Jokin invited Amaia for lunch

1. Jokin offered an invitation to Amaia for lunch
2. Jokin asked Amaia, if she wanted to have lunch with him
3. Amaia received the question from Jokin, if she would like to go for lunch
4. Jokin offered an invitation to Amaia for a meal
5. Amaia received an invitation from Jokin
6. Jokin and Amaia met for lunch
7. Jokin and Amaia went for lunch together
8. Jokin and Amaia enjoyed a meal together
9. Amaia and Jokin decided to go for lunch
10. Amaia invited Jokin for lunch
Scores: Flair: 4, BERT uncased: 5, BERT cased: 4


Sentence #1

BASQUE
Jokinek Amaia bazkari batera gonbidatu zuen

1. Jokinek gonbidapen bat eskeini zion Amaiari bazkari baterako
2. Jokinek Amaiari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan?
3. Jokinek Amaiari gonbidapen bat eskeini zion otordu baterako
4. Bazkaltzeko elkartu ziren Jokin eta Amaia
5. Amaiak Jokin gonbidatu zuen bazkari batera
6. Amaia eta Jokin lagunak dira
7. Bazkaltzera joan nintzen atzo
8. Nire ama gonbidatu dut bidai batera
9. Jokinek oso gustora bazkaltzen du
10. Ahateak ikusi ditut parkean
Scores: Flair: 5, BERT uncased: 5, BERT cased: 5



ENGLISH
Jokin invited Amaia for lunch

1. Jokin offered an invitation to Amaia for lunch
2. Amaia received the question from Jokin, if she would like to go for lunch
3. Jokin offered an invitation to Amaia for a meal
4. Jokin and Amaia met for lunch
5. Amaia invited Jokin for lunch
6. Amaia and Jokin are friends
7. I went for lunch yesterday
8. I invited my mom to a trip
9. Jokin enjoys having lunch
10. I saw some ducks at the park
Scores: Flair: 5, BERT uncased: 5, BERT cased: 4


Sentence #2

BASQUE
Medikuak gaixoa bazkari batera gonbidatu zuen

1. Medikuak gonbidapen bat eskeini zion gaixoari bazkari baterako
2. Medikuak gaixoari galdetu zion: Nahiko zenuke nirekin bazkaltzera joan?
3. Medikuak gaixoari gonbidapen bat eskeini zion otordu baterako
4. Bazkaltzeko elkartu ziren medikua eta gaixoa
5. gaixoak medikua gonbidatu zuen bazkari batera
6. gaixoa eta medikua lagunak dira
7. Bazkaltzera joan nintzen atzo
8. Nire ama gonbidatu dut bidai batera
9. Medikuak oso gustora bazkaltzen du
10. Ahateak ikusi ditut parkean
Scores: Flair: 6, BERT uncased: 5, BERT cased: 5



ENGLISH
the doctor invited the patient for lunch

1. the doctor offered an invitation to the patient for lunch
2. the doctor asked the patient, if she wanted to have lunch with him
3. the patient received the question from the doctor, if she would like to go for lunch
4. the doctor offered an invitation to the patient for a meal
5. the patient received an invitation from the doctor
6. the doctor and the patient met for lunch
7. the doctor and the patient went for lunch together
8. the doctor and the patient enjoyed a meal together
9. the patient and the doctor decided to go for lunch
10. the patient invited the doctor for lunch
Scores: Flair: 4, BERT uncased: 5, BERT cased: 5


Sentence #3

BASQUE
Medikuak gaixoa bazkari batera gonbidatu zuen

1. Medikuak gaixoa afari batera gonbidatu zuen
2. Medikuak gaixoa bazkari batera gonbidatu du
3. Medikuak gaixoari esan zion, sendatuko zela
4. Irakasleak ikaslea bazkari batera gonbidatu zuen
5. Suhiltzaileak zientzialaria bazkari batera gonbidatu zuen
6. Medikuak ez zuen gaixoa bazkari batera gonbidatu
7. Gaixoak medikua bazkari batera gonbidatu zuen
8. Zuen gaixoa batera bazkari medikuak gonbidatu
9. Midekauk goaxia bakrazi betraa gibondtau zeun
10. Utzi iezadazu laguntzen maletarekin
Scores: Flair: 6, BERT uncased: 4, BERT cased: 5



ENGLISH
the doctor invited the patient for lunch

1. the doctor invited the patient for dinner
2. the doctor has invited the patient for lunch
3. the doctor told the patient, they would get better
4. the teacher invited the student for lunch
5. the firefighter invited the scientist for lunch
6. the patient didn't invite the doctor for lunch
7. the patient invited the doctor for lunch
8. patient invited the lunch the for doctor
9. eth roctod ivtedni hte pietant fro chunl
10. let me help you with that suitcase
Scores: Flair: 5, BERT uncased: 5, BERT cased: 4


In [44]:
def calculate_total_score(scores_all):
    total_scores = [0] * len(scores_all)
    for i in range(len(scores_all)):
        total_scores[0] += scores_all[i][0]
        total_scores[1] += scores_all[i][1]
        total_scores[2] += scores_all[i][2]
    print("Total scores: Flair: " + str(total_scores[0]) + ", BERT uncased: " + str(total_scores[1]) + ", BERT cased: " + str(total_scores[2]))

In [45]:
calculate_total_score(scores_all_eu)
calculate_total_score(scores_all_en)

Total scores: Flair: 22, BERT uncased: 20, BERT cased: 20
Total scores: Flair: 18, BERT uncased: 20, BERT cased: 17
