In [37]:
import torch

from flair.data import Sentence
from flair.embeddings import DocumentPoolEmbeddings, FlairEmbeddings, BertEmbeddings

import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
language = 'en'
gold_filename = "goldstandard_"+language+".tsv"

def get_gold_sentences(filename):
    gold_sentences = {}
    with open(filename, 'rt') as f_p:
        for line in f_p:
            if line.startswith('"origin"'): # header
                continue
            
            if not line:
                continue
            
            line = line.rstrip()
            line = line.replace('"', '')
            splitted = line.split('\t')
            gold = splitted[0]
            sim_sentences = splitted[1:11]
            
            if gold:
                gold_sentences[gold] = sim_sentences
            
    return gold_sentences

In [3]:
gold_sentences = get_gold_sentences(gold_filename)

In [4]:
def calculate_similarities(gold, sim_sentences, embeddings):
    
    similarities = []
    query = gold

    q = Sentence(query)
    embeddings.embed(q)
    score = 0
    i = 0
    
    for sentence in sim_sentences:
        
        s = Sentence(sentence)
        embeddings.embed(s)

        assert q.embedding.shape == s.embedding.shape
        
        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
        prox = cos(q.embedding, s.embedding)
    
        similarities.append(round(prox.item(), 4))
        if i > 0 and similarities[i] <= similarities[i-1]:
            score += 1
        i += 1
        
    return similarities, score

def print_similarities(gold, sim_sentences, similarities, score):
        
    print(f"Example: {gold}")
    for sentence, similarity in zip(sim_sentences, similarities):
        print(f"{sentence} - {similarity}")
    print(f"Score: {score}\n")  
    

In [5]:
# load flair embeddings
if language == 'eu':
    flair_embeds = 'eu'
elif language == 'en':
    flair_embeds = 'news'
flair_embeddings = DocumentPoolEmbeddings([FlairEmbeddings(flair_embeds+'-forward'), FlairEmbeddings(flair_embeds+'-backward')])

In [6]:
# load BERT embeddings
# See BERT paper, section 5.3 and table 7
bert_layers = '-1,-2,-3,-4'

if language == 'en':
    bert_embed = 'bert-base-'
else:
    bert_embed = 'bert-base-multilingual-'

bert_cased_embeddings = DocumentPoolEmbeddings([BertEmbeddings(bert_embed + 'cased', layers=bert_layers)])
bert_uncased_embeddings = DocumentPoolEmbeddings([BertEmbeddings(bert_embed + 'uncased', layers=bert_layers)])


2019-07-01 16:21:37,826 The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.


In [7]:
# load ELMo and ELMo multilingual embeddings

In [31]:
def calculate_and_print(embeddings, similarities_all, scores_all):
    i = 0
    for gold, sim_sentences in gold_sentences.items():
        similarities, score = calculate_similarities(gold, sim_sentences, embeddings)
        scores_all[i].append(score)
        print_similarities(gold, sim_sentences, similarities, score)
        similarities_all[i].append(similarities)
        i += 1
    return similarities_all, scores_all

In [32]:
similarities_all = []
for i in range(len(gold_sentences)):
    similarities_all.append([])

In [33]:
scores_all = []
for i in range(len(gold_sentences)):
    scores_all.append([])

In [34]:
# calculate similarities based on flair embeddings
similarities_all, scores_all = calculate_and_print(flair_embeddings, similarities_all, scores_all)

Example: Jokin offered an invitation to Amaia for lunch
Jokin asked Amaia, if she wanted to have lunch with him - 0.6723
Amaia received the question from Jokin, if she would like to go for lunch - 0.6958
Jokin offered an invitation to Amaia for a meal - 0.9333
Amaia received an invitation from Jokin - 0.7782
Jokin and Amaia met for lunch - 0.7229
Jokin and Amaia went for lunch together - 0.6636
Jokin and Amaia enjoyed a meal together - 0.6665
Amaia and Jokin decided to go for lunch - 0.6988
Amaia invited Jokin for lunch - 0.7967
Score: 3

Example: If you have an extra apple, I would appreciate it a lot
Would you give me an apple you don't need? - 0.737
I would appreciate it if you would give me that extra apple - 0.8606
If you don't need that apple, I would appreciate it a lot - 0.9444
I would be very happy if you gave me an apple - 0.7937
Will you give me an apple? - 0.682
I would like that apple - 0.7183
I have asked you an apple - 0.7371
You have one extra apple - 0.7589
You have of

In [35]:
# calculate similarities based on bert uncased embeddings
similarities_all, scores_all = calculate_and_print(bert_uncased_embeddings, similarities_all, scores_all)

Example: Jokin offered an invitation to Amaia for lunch
Jokin asked Amaia, if she wanted to have lunch with him - 0.8367
Amaia received the question from Jokin, if she would like to go for lunch - 0.8333
Jokin offered an invitation to Amaia for a meal - 0.9623
Amaia received an invitation from Jokin - 0.816
Jokin and Amaia met for lunch - 0.8494
Jokin and Amaia went for lunch together - 0.8251
Jokin and Amaia enjoyed a meal together - 0.7923
Amaia and Jokin decided to go for lunch - 0.8006
Amaia invited Jokin for lunch - 0.9116
Score: 4

Example: If you have an extra apple, I would appreciate it a lot
Would you give me an apple you don't need? - 0.7793
I would appreciate it if you would give me that extra apple - 0.9013
If you don't need that apple, I would appreciate it a lot - 0.948
I would be very happy if you gave me an apple - 0.8307
Will you give me an apple? - 0.7213
I would like that apple - 0.7107
I have asked you an apple - 0.6639
You have one extra apple - 0.6753
You have of

In [36]:
# calculate similarities based on bert cased embeddings
similarities_all, scores_all = calculate_and_print(bert_cased_embeddings, similarities_all, scores_all)

Example: Jokin offered an invitation to Amaia for lunch
Jokin asked Amaia, if she wanted to have lunch with him - 0.9202
Amaia received the question from Jokin, if she would like to go for lunch - 0.9301
Jokin offered an invitation to Amaia for a meal - 0.9912
Amaia received an invitation from Jokin - 0.9501
Jokin and Amaia met for lunch - 0.9466
Jokin and Amaia went for lunch together - 0.9298
Jokin and Amaia enjoyed a meal together - 0.9192
Amaia and Jokin decided to go for lunch - 0.9362
Amaia invited Jokin for lunch - 0.9653
Score: 4

Example: If you have an extra apple, I would appreciate it a lot
Would you give me an apple you don't need? - 0.9149
I would appreciate it if you would give me that extra apple - 0.9491
If you don't need that apple, I would appreciate it a lot - 0.9807
I would be very happy if you gave me an apple - 0.9356
Will you give me an apple? - 0.9033
I would like that apple - 0.9031
I have asked you an apple - 0.8816
You have one extra apple - 0.9068
You have 

In [43]:
fig = plotly.tools.make_subplots(rows=1, cols=10)

for i in range(len(gold_sentences)):
    print(list(gold_sentences.keys())[i] + '\n')
    print("\n".join(gold_sentences[list(gold_sentences.keys())[i]]))
    print("Scores: Flair: " + str(scores_all[i][0]) + ", BERT uncased: " + str(scores_all[i][1]) + ", BERT cased: " + str(scores_all[i][2]))
    
    trace = go.Heatmap(z=similarities_all[i], y=['Flair', 'BERT uncased', 'BERT cased'])
    data=[trace]
    fig.append_trace(trace, 1, i+1)
    iplot(data, filename='basic-heatmap' + str(i))

This is the format of your plot grid:
[ (1,1) x1,y1 ]     [ (1,2) x2,y2 ]     [ (1,3) x3,y3 ]     [ (1,4) x4,y4 ]     [ (1,5) x5,y5 ]     [ (1,6) x6,y6 ]     [ (1,7) x7,y7 ]     [ (1,8) x8,y8 ]     [ (1,9) x9,y9 ]     [ (1,10) x10,y10 ]

Jokin offered an invitation to Amaia for lunch

Jokin asked Amaia, if she wanted to have lunch with him
Amaia received the question from Jokin, if she would like to go for lunch
Jokin offered an invitation to Amaia for a meal
Amaia received an invitation from Jokin
Jokin and Amaia met for lunch
Jokin and Amaia went for lunch together
Jokin and Amaia enjoyed a meal together
Amaia and Jokin decided to go for lunch
Amaia invited Jokin for lunch
Scores: Flair: 3, BERT uncased:4, BERT cased:4


If you have an extra apple, I would appreciate it a lot

Would you give me an apple you don't need?
I would appreciate it if you would give me that extra apple
If you don't need that apple, I would appreciate it a lot
I would be very happy if you gave me an apple
Will you give me an apple?
I would like that apple
I have asked you an apple
You have one extra apple
You have offered me an apple
You have given me an apple as a gift
Scores: Flair: 3, BERT uncased:5, BERT cased:5


Ane congratulated Amaia

Ane said happy birthday to Amaia
Ane told Amaia happy birthday
Ane praised Amaia
Amaia was congratulated by Ane
Ane told Amaia she wished her happiness
Due to Amaia's birthday, she received many good wishes
Ane gave a gift to Amaia
Amaia and Ane congratulated each other
Ane and Amaia were congratulated
Amaia told Ane happy birthday
Scores: Flair: 5, BERT uncased:4, BERT cased:3


Mikel went to the mountain

To the mountain went Mikel
Up the mountain went Mikel
Mikel was in the mountain
Mikel decided to go to the mountain
Mikel made the plan to go to the mountain
Mikel said he's going to the mountain
Mikel made the comment that maybe he'd go to the mountain
Mikel likes hiking
Mikel said he's not going to the mountain
Mikel didn't go to the mountain
Scores: Flair: 4, BERT uncased:5, BERT cased:5


Yesterday you came to our house for dinner

You came for dinner to our house yesterday
Yesterday you had dinner with us in our house
You were in our house having dinner yesterday
Yesterday you accepted the invitation to have dinner at our house
We all had dinner together yesterday
We could've had lunch together yesterday
Yesterday you came for dinner
Tomorrow he'll come for lunch
I went to your house for dinner
Yesterday they will come to your house for dinner
Scores: Flair: 5, BERT uncased:6, BERT cased:5


Happines isn't doing what you like, but liking what you do

Liking what you do makes you happy, not doing what you want
Like what you do and you will be happy
The source of happines is liking what you do
Happines is liking what you do
What is done gladly is happiness
Doing what you enjoy won't make you happy
What you do, not what you like, is happiness
Do what you like in order to be happy
Do the things you like and you will be happy
You can't always do what you like
Scores: Flair: 5, BERT uncased:4, BERT cased:4


Give some color to the gray world

Give some color to the gray world
To the gray world give it color
The world is gray so let's give it some color
Give some color to the dark world
Because the world is gray, we will give it some color
Give color to the world
Give gray to the colorful world
Give gray to the world
Give the world to the gray color
Scores: Flair: 4, BERT uncased:5, BERT cased:4


Because I love, I create

I create because I love
Due to love I create
I love: therefore, I create
The reason for creating is love
The origin of my creating is love
Because I loved I created
After creating I loved
Because you love you create
I don't love, so I don't create
Because I create I love
Scores: Flair: 3, BERT uncased:3, BERT cased:4


If you knew how pretty it is to hear you in basque

It is very pretty to hear you in basque
Hearing you in basque pleases me
It can't be explained how beautiful it is when you speak basque
You can't know how pretty it is to hear you
Knowing how pretty it is that you speak basque is impossible
Hearing you is beautiful
I would like you to know how pretty it is to hear you in basque
If you know how awful it is to hear you in basque
If they knew how pretty it is that I speak basque
Scores: Flair: 4, BERT uncased:4, BERT cased:3
