In [3]:
from flair.embeddings import WordEmbeddings
from flair.data import Sentence
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.modules.distance import PairwiseDistance

# Polish word embeddings

In [4]:
epl = WordEmbeddings('pl')

In [5]:
epl

WordEmbeddings('pl')

In [6]:
epl = WordEmbeddings('pl')

In [8]:
s = Sentence("To koło mam od kogoś")

In [9]:
epl.embed(s)

[Sentence: "To koło mam od kogoś" - 5 Tokens]

In [13]:
s[0].embedding.shape

torch.Size([300])

In [12]:
for token in s:
    print(token)
    print(token.embedding)

Token: 1 To
tensor([ 0.0418, -0.0413, -0.0374, -0.1981,  0.2041,  0.2391, -0.1462, -0.1043,
        -0.0191, -0.1028, -0.0799,  0.0986, -0.0434, -0.0448,  0.0196,  0.2728,
         0.3163,  0.0610, -0.0049, -0.0601, -0.1671,  0.2504, -0.0872, -0.2340,
        -0.2380,  0.1996,  0.0621, -0.1594,  0.0618, -0.1846,  0.0281,  0.0605,
         0.0356, -0.1178,  0.0694,  0.0390,  0.0708,  0.1732,  0.1513, -0.1419,
         0.1438,  0.2432,  0.0163,  0.0297,  0.1232,  0.1187,  0.0604,  0.1181,
         0.0087, -0.1827, -0.0659, -0.2015, -0.2148, -0.0192, -0.0921, -0.0585,
        -0.1186, -0.1189,  0.6087,  0.2210, -0.1680, -0.1459, -0.0339, -0.1243,
        -0.1485,  0.1045,  0.0243,  0.1766, -0.0404,  0.0435,  0.0607, -0.1181,
        -0.2206, -0.2496,  0.1793,  0.0816,  0.0021, -0.1924, -0.0179,  0.0887,
        -0.0606, -0.1847,  0.1331,  0.0290, -0.0723,  0.0686,  0.0051,  0.1974,
         0.2231,  0.2776, -0.1649,  0.1572, -0.0343,  0.0894,  0.0809,  0.1925,
        -0.1422,  0.1927, -0

# Loading Dataset

In [13]:
data = pd.read_excel("LingFeatured NLI_PL_20.03.2020.xlsx")

In [14]:
clean = np.array([x[:-5] for x in data['verb'].unique()])

In [15]:
embeddings = []

In [16]:
for c in clean:
    s = Sentence(c)
    epl.embed(s)
    embeddings.append(torch.cat([w.embedding.view(1, w.embedding.shape[0]) for w in s]))

In [17]:
len(embeddings)

367

In [18]:
embeddings[0].shape

torch.Size([1, 300])

# Distribution of lengths

In [19]:
np.unique([e.shape[0] for e in embeddings ], return_counts=True)

(array([1, 2, 3, 4]), array([224, 111,  25,   7], dtype=int64))

# Last Embeddings

In [20]:
EMB = torch.cat([F.pad(embeddings[i], (0,0,4-embeddings[i].shape[0],0)).view(1,4,300) for i in range(len(embeddings))], 0)

# LSTM test

In [21]:
lstm = nn.LSTM(300, 5, batch_first=True)

In [22]:
e = embeddings[6]

In [23]:
e = e.to(torch.device('cpu'))

In [24]:
out, hidden= lstm(EMB)

In [25]:
torch.cat(hidden,2)[0].shape

torch.Size([367, 10])

# Model LSTM-end-to-end

In [26]:
class LSTMend2end(nn.Module):
    def __init__(self, LSTM_hidden, output_size):
        super(LSTMend2end, self).__init__()
        
        self.lstm = nn.LSTM(300, LSTM_hidden, batch_first=True)
        self.L1 = nn.Linear(2*LSTM_hidden, 2*LSTM_hidden)
        self.L2 = nn.Linear(2*LSTM_hidden, LSTM_hidden)
        self.L3 = nn.Linear(LSTM_hidden, output_size)
        
    def forward(self, X):
        """
        Forward implementation for batch training.
        Params:
            - X: Batch
        
        Output:
            - Y: shape: n-obs, output_size
            
        forward(seq) = seq -> lstm -> linear -> relu -> linear -> relu -> linear -> sigmoid
        """
        _, hidden = self.lstm(X)
        X = torch.cat(hidden, 2)[0]
        X = torch.relu(self.L1(X))
        X = torch.relu(self.L2(X))
        X = torch.sigmoid(self.L3(X))
        
        return X

In [27]:
def MSE(y, y_pred):
    d = y - y_pred
    return d.T @ d / y.shape[0]

In [28]:
l = LSTMend2end(300, 3)

In [29]:
l.forward(EMB)

tensor([[0.4900, 0.5114, 0.5114],
        [0.4892, 0.5103, 0.5130],
        [0.4903, 0.5113, 0.5121],
        ...,
        [0.4902, 0.5103, 0.5116],
        [0.4918, 0.5113, 0.5115],
        [0.4907, 0.5116, 0.5107]], grad_fn=<SigmoidBackward>)

# Model LSTM-SVM

In [30]:
"""
Distance matrix
"""
def dist(M):
    W = M @ M.T
    D = torch.diag(W).reshape(M.shape[0],1)
    return D + D.T - 2*W

In [31]:
"""
Kernel Function

K()
"""
def K(M, gamma=.5):
    D = dist(M)
    return torch.exp(-gamma * D)

In [32]:
"""
LSTM_SVM Model

It is a LSTM model with radial basis function kernel SVM on top (trained at once).

Forward(seq) = seq -> lstm -> K -> ouput 

"""
class LSTM_SVM(nn.Module):
    def __init__(self, LSTM_hidden, data_size, output_size, gamma=1):
        super(LSTM_SVM, self).__init__()
        
        self.lstm = nn.LSTM(300, LSTM_hidden, batch_first=True)
        self.L1 = nn.Linear(data_size, output_size)
        self.gamma = gamma
        
    def forward(self, X):
        """
        Forward implementation for batch training.
        Params:
            - X: Batch
        
        Output:
            - Y: shape: n-obs, output_size
            
        forward(seq) = Forward(seq) = seq -> lstm -> K -> ouput 
        """
        _, hidden = self.lstm(X)
        X = torch.cat(hidden, 2)[0]
        X = K(X, self.gamma)
        X = self.L1(X)
        return X

In [33]:
def SVM_L(y, y_pred):
    return torch.sum(torch.clamp(y_pred[y] + 1, min=0)) + torch.sum(torch.clamp(-y_pred[~y] + 1, min=0))

In [34]:
l = LSTM_SVM(300, 367, 3)

In [35]:
SVM_L(l.forward(EMB) > 0, l.forward(EMB))

tensor(1154.3909, grad_fn=<AddBackward0>)