# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import spacy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

print ("PyTorch version:[%s]."%(torch.__version__))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print ("device:[%s]."%(device))

PyTorch version:[1.13.1+cu117].
device:[cuda:0].


# Make test data

In [2]:
nlp = spacy.load("en_core_web_md")

test_data = [
    "This is a positive sentence with good vibes.",
    "I'm not sure about this, it could be either positive or negative.",
    "Negative sentiment is evident in this statement.",
    "The weather is great today!",
]

labels = [1, 0, -1, 1]

def get_sentence_embedding(sentence):
    doc = nlp(sentence)
    return np.mean([token.vector for token in doc if token.has_vector], axis=0)

In [3]:
X_train = np.array([get_sentence_embedding(sentence) for sentence in test_data])
X_train = torch.tensor(X_train)

## check data' shape

In [4]:
print(f"X_train's shape: {X_train.shape}")

X_train's shape: torch.Size([4, 300])


# Encoder

## MHA

In [5]:
class MultiHeadedAttention(nn.Module):
    def __init__(self,d_feat=512,n_head=8,actv=F.relu,device=None):
        super(MultiHeadedAttention, self).__init__()
        self.d_feat = d_feat
        self.n_head = n_head    #equal with d_k
        self.d_head = d_feat // n_head
        if self.d_head % self.n_head !=0:
            raise ValueError
        self.actv = actv

        self.lin_k = nn.Linear(self.d_feat, self.d_feat)
        self.lin_q = nn.Linear(self.d_feat, self.d_feat)
        self.lin_v = nn.Linear(self.d_feat, self.d_feat)

    def forward(self, Q, K, V):

        Query = self.lin_q(Q)
        Key = self.lin_k(K)
        Value = self.lin_v(V)

        #입력 쿼리는 n * s * d: batch * sequence_length * embedding length
        batch_size = Query.shape[0]
        Queries = Query.view(batch_size, self.n_head, -1, self.d_head)
        Keys = Key.view(batch_size, self.n_head, -1, self.d_head)
        Values = Value.view(batch_size, self.n_head, -1, self.d_head)

        attention_score = torch.matmul(Queries, Keys.permute(0,1,3,2))
        attention_score = attention_score / np.sqrt(Keys.shape[-1])
        attention_score = F.softmax(attention_score, dim = -1)
        # attention_score = batch * sequence_length * embedding length

        attention_score = torch.matmul(attention_score, Values)
        
        x = attention_score.permute(0, 2, 1, 3)
        x = attention_score.view(batch_size, -1, self.d_feat)

        return x

        

        

## FFN

In [6]:
class FeedForwardNetwork(nn.Module):
    def __init__(self,d_feat=512,n_head=8,actv=F.relu,device=None):
        super(FeedForwardNetwork, self).__init__()
        self.d_feat = d_feat
        self.actv = actv
        self.device = device
        self.n_head = n_head

        self.linear_1 = nn.Linear(self.d_feat, self.d_feat * 4)  
        self.linear_2 = nn.Linear(self.d_feat * 4, self.d_feat)       


    
    def forward(self, x):
        x = self.linear_1(x)
        x = self.actv(x)
        x = self.linear_2(x)
        x = self.actv(x)
        
        return x

## Encoder

In [7]:
class Encoder(nn.Module):
    def __init__(self, d_feat=512, n_head=8, actv = F.relu, device=None):
        super(Encoder, self).__init__()
        self.d_feat = d_feat
        self.actv = actv
        self.device = device
        self.n_head = n_head

        self.MHA = MultiHeadedAttention(d_feat=self.d_feat, n_head = self.n_head, actv = self.actv, device = self.device)
        self.FFN = FeedForwardNetwork(d_feat=self.d_feat, n_head = self.n_head, actv = self.actv, device = self.device)

        self.layernorm_1 = nn.LayerNorm(d_feat)
        self.layernorm_2 = nn.LayerNorm(d_feat)

    def forward(self, x):
        output = self.MHA(x, x, x)

        x = x + output
        x = self.layernorm_1(x)
        output = self.FFN(x)
        x = x + output
        x = self.layernorm_2(x)

        return x

## Positional encoding

In [17]:
def positional_encoding(seq_len, dim):
    encoding = np.zeros((seq_len, dim))
    for s in range(seq_len):
        for d in range(dim):
            if d%2==0:
                encoding.append(np.sin(s/(10000**(2*d/512))))
            else:
                encoding.append(np.cos(s/(10000**(2*d/512))))
    return encoding

In [21]:
class SinPositionEmbeddings(nn.Module):
    def __init__(self,dim,T=1000):
        super().__init__()
        self.dim = dim
        self.T = T
    @torch.no_grad()
    def forward(self,steps):
        device = steps.device
        half_dim = self.dim // 2
        embeddings = math.log(self.T) / (half_dim - 1)
        embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings)
        embeddings = steps[:, None] * embeddings[None, :]
        embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1)
        return embeddings
print ("Ready.")

Ready.


In [23]:
emb = SinPositionEmbeddings(512,1000)

In [26]:
import math

In [28]:
positional_encoding(512, 1000)

[0.07951849401287635,
 -0.7796417565933499,
 -0.8768153499257367,
 0.5871325138454507,
 -0.39784743386354393,
 0.900871566926601,
 -0.8636985906486296,
 -0.5641676495642884,
 0.6221856344232485,
 0.9460028672863923,
 -0.7527983928020255,
 0.6122002290769667,
 -0.5015815123488423,
 0.9578080919712857,
 0.9988898426058764,
 -0.9998910188214717,
 -0.8947029135589434,
 0.28285932088140703,
 -0.7794398308682489,
 0.659707307126921,
 -0.9092810289933198,
 -0.18366089997585924,
 -0.444641626467563,
 -0.7214543768204948,
 0.7586792391704953,
 0.5949168719446937,
 -0.1429230856070844,
 0.5732219585792373,
 -0.9990149139222946,
 -0.2759821865448989,
 -0.9322818352965586,
 -0.23323587756940398,
 -0.9932105532388112,
 0.6273858566709366,
 -0.12845999355349746,
 0.6748614953496935,
 0.9186240965772532,
 -0.986596820784401,
 -0.9953283542886341,
 0.98038512862462,
 0.8947092236040856,
 -0.6334290485878976,
 -0.11231371378256404,
 -0.5705197239769877,
 -0.9947132849941112,
 0.6262814470756085,
 -0.43

In [27]:
emb(torch.ones(1))

tensor([[0.8415, 0.8267, 0.8118, 0.7968, 0.7817, 0.7665, 0.7513, 0.7361, 0.7209,
         0.7059, 0.6909, 0.6760, 0.6612, 0.6466, 0.6322, 0.6179, 0.6038, 0.5899,
         0.5762, 0.5627, 0.5495, 0.5364, 0.5236, 0.5110, 0.4986, 0.4864, 0.4745,
         0.4629, 0.4514, 0.4402, 0.4293, 0.4185, 0.4080, 0.3977, 0.3877, 0.3778,
         0.3682, 0.3588, 0.3497, 0.3407, 0.3320, 0.3234, 0.3151, 0.3069, 0.2990,
         0.2912, 0.2837, 0.2763, 0.2691, 0.2621, 0.2552, 0.2486, 0.2420, 0.2357,
         0.2295, 0.2235, 0.2176, 0.2119, 0.2063, 0.2009, 0.1956, 0.1904, 0.1854,
         0.1805, 0.1757, 0.1711, 0.1665, 0.1621, 0.1578, 0.1536, 0.1496, 0.1456,
         0.1417, 0.1380, 0.1343, 0.1307, 0.1273, 0.1239, 0.1206, 0.1174, 0.1143,
         0.1112, 0.1083, 0.1054, 0.1026, 0.0998, 0.0972, 0.0946, 0.0921, 0.0896,
         0.0872, 0.0849, 0.0826, 0.0804, 0.0783, 0.0762, 0.0742, 0.0722, 0.0703,
         0.0684, 0.0666, 0.0648, 0.0631, 0.0614, 0.0597, 0.0581, 0.0566, 0.0551,
         0.0536, 0.0522, 0.0

In [20]:
positional_encoding(2, 512)

[0.9092974268256817,
 -0.35089519414026626,
 0.9581443762382829,
 -0.22269491881909587,
 0.9870462513484951,
 -0.0997625393820518,
 0.9991642001884493,
 0.016065575142331992,
 0.9974799976053368,
 0.12369041342821066,
 0.9847029979641517,
 0.22257561410487225,
 0.9632266229537682,
 0.3126002616542231,
 0.9351182995391969,
 0.39393903352722565,
 0.9021307149638974,
 0.4669673621203432,
 0.8657255873531919,
 0.5321882841120573,
 0.8271038031920692,
 0.5901773905578025,
 0.7872377965789533,
 0.6415424482774963,
 0.7469035352242571,
 0.6868946298623996,
 0.7067105407567295,
 0.7268287375061372,
 0.667129104905188,
 0.761910259564637,
 0.6285143527478001,
 0.7926675179712365,
 0.5911271172152932,
 0.8195875310460997,
 0.5551517777483455,
 0.843114524635237,
 0.5207113196456871,
 0.86365027699697,
 0.487879917794723,
 0.8815556851611701,
 0.4566933595230835,
 0.8971530997654681,
 0.4271576104311921,
 0.910729098901344,
 0.3992558039029435,
 0.9225374660209928,
 0.37295390591897515,
 0.932802

## Data test

In [8]:
encoder = Encoder(d_feat=512,n_head=8, device=device)
output = encoder(torch.randn(16, 512, 512))

In [9]:
output.shape

torch.Size([16, 512, 512])

In [10]:
torch.randn(2, 512).shape

torch.Size([2, 512])