**3.4.2**

In [2]:
import sys
import ast
import numpy as np


def write_array(arr):
    print(repr(arr.tolist()))


def generate_w2v_sgns_samples(text, window_size, vocab_size, ns_rate):
    """
    text - list of integer numbers - ids of tokens in text
    window_size - odd integer - width of window
    vocab_size - positive integer - number of tokens in vocabulary
    ns_rate - positive integer - number of negative tokens to sample per one positive sample

    returns list of training samples (CenterWord, CtxWord, Label)
    """
    n = window_size // 2
    ln = len(text)
    res = []
    
    for i in range(ln):
        pos = [[text[i], text[j], 1] for j in range(i+1, min(i+n+1, ln))] + \
            [[text[i], text[j], 1] for j in range(max(i-n, 0), i)]
        neg = [[text[i], j, 0] for j in np.random.choice(vocab_size, ns_rate*len(pos))]
        res += pos + neg
    
    return res


text = np.array([1, 0, 1, 0, 0, 5, 0, 3, 5, 5, 3, 0, 5, 0, 5, 2, 0, 1, 3])
window_size = 3
vocab_size = 6
ns_rate = 1

result = generate_w2v_sgns_samples(text, window_size, vocab_size, ns_rate)

write_array(np.array(result))

[[1, 0, 1], [1, 5, 0], [0, 1, 1], [0, 1, 1], [0, 3, 0], [0, 3, 0], [1, 0, 1], [1, 0, 1], [1, 3, 0], [1, 5, 0], [0, 0, 1], [0, 1, 1], [0, 2, 0], [0, 5, 0], [0, 5, 1], [0, 0, 1], [0, 0, 0], [0, 5, 0], [5, 0, 1], [5, 0, 1], [5, 0, 0], [5, 5, 0], [0, 3, 1], [0, 5, 1], [0, 2, 0], [0, 3, 0], [3, 5, 1], [3, 0, 1], [3, 5, 0], [3, 5, 0], [5, 5, 1], [5, 3, 1], [5, 2, 0], [5, 5, 0], [5, 3, 1], [5, 5, 1], [5, 1, 0], [5, 4, 0], [3, 0, 1], [3, 5, 1], [3, 1, 0], [3, 1, 0], [0, 5, 1], [0, 3, 1], [0, 3, 0], [0, 4, 0], [5, 0, 1], [5, 0, 1], [5, 5, 0], [5, 4, 0], [0, 5, 1], [0, 5, 1], [0, 2, 0], [0, 2, 0], [5, 2, 1], [5, 0, 1], [5, 1, 0], [5, 2, 0], [2, 0, 1], [2, 5, 1], [2, 5, 0], [2, 5, 0], [0, 1, 1], [0, 2, 1], [0, 2, 0], [0, 3, 0], [1, 3, 1], [1, 0, 1], [1, 5, 0], [1, 2, 0], [3, 1, 1], [3, 1, 0]]


**3.4.3**

In [6]:
import sys
import ast
import numpy as np
    
    
def update_w2v_weights(center_embeddings, context_embeddings, center_word, context_word, label, learning_rate):
    """
    center_embeddings - VocabSize x EmbSize
    context_embeddings - VocabSize x EmbSize
    center_word - int - identifier of center word
    context_word - int - identifier of context word
    label - 1 if context_word is real, 0 if it is negative
    learning_rate - float > 0 - size of gradient step
    
    update center_embeddings and context_embeddings inplace  """
    
    center_embeddings = np.array(center_embeddings)
    context_embeddings = np.array(context_embeddings)
    
    def sigmoid(x):
          return 1 / (1 + np.exp(-x))
    
    predict = sigmoid(center_embeddings[center_word] @ context_embeddings[context_word])
    w = center_embeddings[center_word] - learning_rate*(predict-label)*context_embeddings[context_word]
    d = context_embeddings[context_word] - learning_rate*(predict-label)*center_embeddings[center_word]
    center_embeddings[center_word] = w
    context_embeddings[context_word] = d
    
    print(center_embeddings)
    print(context_embeddings)

    
center_embeddings = [[0.3449417709491044, 0.6762047256081501, 0.9583446027893963],
                     [0.6247126159157468, 0.22038323197740317, 0.29717611444948355],
                     [0.9836099232994968, 0.3847689688960674, 0.033312247867206435],
                     [0.4217704869846559, 0.0023859008971685025, 0.009686915033163657],
                     [0.6933070658521228, 0.9705089533296152, 0.9189360293193337],
                     [0.024858486425111903, 0.11331113152689753, 0.6492144300167894],
                     [0.7861289466352543, 0.227319130535791, 0.8165251907260063],
                     [0.7672181161105678, 0.04865001026002924, 0.07514404284170773]]

context_embeddings = [[0.4628817426583818, 0.7747296319956671, 0.1374808935513827],
                      [0.17026823169513283, 0.4094733988461122, 0.3175531656197459],
                      [0.2910876746161247, 0.6340566555548147, 0.23158010794029804],
                      [0.8449042648180852, 0.4796593509107806, 0.11278090182290745],
                      [0.049097778744511156, 0.6254116250148337, 0.13038703647472905],
                      [0.882545488649187, 0.6223076699449618, 0.1633041302523962],
                      [0.6704032810194875, 0.941803340812521, 0.7358646489592193],
                      [0.9875878745059805, 0.17935677165390562, 0.6798846454394736]]

center_word = 2
context_word = 5
label = 0
learning_rate = 0.342405260598321

update_w2v_weights(center_embeddings, context_embeddings,
                   center_word, context_word, label, learning_rate)    

[[ 0.34494177  0.67620473  0.9583446 ]
 [ 0.62471262  0.22038323  0.29717611]
 [ 0.75615844  0.22438653 -0.00877484]
 [ 0.42177049  0.0023859   0.00968692]
 [ 0.69330707  0.97050895  0.91893603]
 [ 0.02485849  0.11331113  0.64921443]
 [ 0.78612895  0.22731913  0.81652519]
 [ 0.76721812  0.04865001  0.07514404]]
[[0.46288174 0.77472963 0.13748089]
 [0.17026823 0.4094734  0.31755317]
 [0.29108767 0.63405666 0.23158011]
 [0.84490426 0.47965935 0.1127809 ]
 [0.04909778 0.62541163 0.13038704]
 [0.62904747 0.5231442  0.15471883]
 [0.67040328 0.94180334 0.73586465]
 [0.98758787 0.17935677 0.67988465]]


**3.4.4**

In [8]:
import sys
import ast
import numpy as np
       

def generate_ft_sgns_samples(text, window_size, vocab_size, ns_rate, token2subwords):
    """
    text - list of integer numbers - ids of tokens in text
    window_size - odd integer - width of window
    vocab_size - positive integer - number of tokens in vocabulary
    ns_rate - positive integer - number of negative tokens to sample per one positive sample
    token2subwords - list of lists of int - i-th sublist contains list of identifiers of n-grams for token #i (list of subword units)

    returns list of training samples (CenterSubwords, CtxWord, Label)
    """
    
    length = len(text)
    result = []
    for i, center in enumerate(text):
        n_gram = [center]
        for k in token2subwords[center]:
            n_gram.append(k)
        for j in range(window_size//2):
            if i+j+1<length:
                result.append(tuple([n_gram, text[i+j+1], 1]))
                for z in range(ns_rate):
                    result.append(tuple([n_gram, np.random.randint(6), 0]))
            
            if i-j-1>=0:
                result.append(tuple([n_gram, text[i-j-1], 1]))
                for z in range(ns_rate):
                    result.append(tuple([n_gram, np.random.randint(6), 0]))
    return result


text = [1, 2, 0, 1, 4, 0, 4, 1, 5, 4, 5, 4, 5, 1]
window_size = 3
vocab_size = 6
ns_rate = 2
token2subwords = [[17], [10, 12], [20, 20], [7, 13], [], [7, 11]]

result = generate_ft_sgns_samples(text, window_size, vocab_size, ns_rate, token2subwords)

print(repr(result))

[([1, 10, 12], 2, 1), ([1, 10, 12], 0, 0), ([1, 10, 12], 3, 0), ([2, 20, 20], 0, 1), ([2, 20, 20], 4, 0), ([2, 20, 20], 5, 0), ([2, 20, 20], 1, 1), ([2, 20, 20], 0, 0), ([2, 20, 20], 2, 0), ([0, 17], 1, 1), ([0, 17], 3, 0), ([0, 17], 4, 0), ([0, 17], 2, 1), ([0, 17], 0, 0), ([0, 17], 4, 0), ([1, 10, 12], 4, 1), ([1, 10, 12], 5, 0), ([1, 10, 12], 5, 0), ([1, 10, 12], 0, 1), ([1, 10, 12], 5, 0), ([1, 10, 12], 0, 0), ([4], 0, 1), ([4], 4, 0), ([4], 1, 0), ([4], 1, 1), ([4], 0, 0), ([4], 4, 0), ([0, 17], 4, 1), ([0, 17], 5, 0), ([0, 17], 4, 0), ([0, 17], 4, 1), ([0, 17], 2, 0), ([0, 17], 5, 0), ([4], 1, 1), ([4], 3, 0), ([4], 3, 0), ([4], 0, 1), ([4], 0, 0), ([4], 0, 0), ([1, 10, 12], 5, 1), ([1, 10, 12], 5, 0), ([1, 10, 12], 0, 0), ([1, 10, 12], 4, 1), ([1, 10, 12], 2, 0), ([1, 10, 12], 5, 0), ([5, 7, 11], 4, 1), ([5, 7, 11], 3, 0), ([5, 7, 11], 5, 0), ([5, 7, 11], 1, 1), ([5, 7, 11], 0, 0), ([5, 7, 11], 4, 0), ([4], 5, 1), ([4], 2, 0), ([4], 4, 0), ([4], 5, 1), ([4], 4, 0), ([4], 0, 0), 

**3.4.5**

In [12]:
import sys
import ast
import numpy as np


def update_ft_weights(center_embeddings, context_embeddings, center_subwords, context_word, label, learning_rate):
    """
    center_embeddings - VocabSize x EmbSize
    context_embeddings - VocabSize x EmbSize
    center_subwords - list of ints - list of identifiers of n-grams contained in center word
    context_word - int - identifier of context word
    label - 1 if context_word is real, 0 if it is negative
    learning_rate - float > 0 - size of gradient step
    """
    def sigmoid(x):
          return 1 / (1 + np.exp(-x))
        
    predict = sigmoid(center_embeddings[center_subwords].mean(axis=0) @ context_embeddings[context_word])
    w = center_embeddings[center_subwords] - learning_rate*(predict-label)*context_embeddings[context_word]/len(center_subwords)
    d = context_embeddings[context_word] - learning_rate*(predict-label)*center_embeddings[center_subwords].mean(axis=0)
    center_embeddings[center_subwords] = w
    context_embeddings[context_word] = d

    print(center_embeddings)
    print(context_embeddings)

    
center_embeddings = np.array([[0.07217140995735816, 0.9807495045952024, 0.5888650678318127, 0.9419020475323008, 0.9698687137771355],
                     [0.17481764801167854, 0.9598681333667267, 0.8615416075076997, 0.6649845254089604, 0.14272822189820067],
                     [0.695257160390079, 0.6252124583357915, 0.788572884360212, 0.5407620598434707, 0.4742760619803522],
                     [0.3720755825170682, 0.8734430653555122, 0.29388553936147677, 0.7833976055802006, 0.11647446813597206],
                     [0.4793503066165381, 0.7731679392102295, 0.6466062364447424, 0.5834632727525674, 0.16975097768580916],
                     [0.46855676928071344, 0.7440440871653314, 0.5968916205486556, 0.6949993371605877, 0.9995564750677164],
                     [0.3995517204225809, 0.30217048674177027, 0.6934836340605662, 0.5025452046745376, 0.43990420866402447],
                     [0.6233285824044058, 0.7510765715859197, 0.8764982899024905, 0.42892241183749247, 0.9241569354174014],
                     [0.21063022873083803, 0.979366603599722, 0.07879437255385402, 0.7103116511451802, 0.298121842692622],
                     [0.7991181799927396, 0.8700912396205017, 0.4936455488806514, 0.9306352063022928, 0.671689987782089],
                     [0.11245515636577097, 0.2591385008756272, 0.38393130144123977, 0.5927928993875077, 0.3343301767582757],
                     [0.027340724019638274, 0.15461071231349877, 0.7955192467457007, 0.050624838697975516, 0.26136570172628426],
                     [0.7825083895933859, 0.9046538942978853, 0.4559636175207443, 0.733829258685726, 0.022174763292638677],
                     [0.6968176063951074, 0.47974647096747125, 0.8885207189970179, 0.016167994434510558, 0.13260182909882334],
                     [0.5947903955259933, 0.07459974351651177, 0.11391699485528617, 0.823474357110585, 0.4918622459339238],
                     [0.6272760016913231, 0.2711994820963495, 0.24338892914238242, 0.7731707300677505, 0.03720128542002399],
                     [0.8640858092433228, 0.027663971153230382, 0.9271422334467209, 0.37457369227183035, 0.17413436429736662],
                     [0.4878584763813121, 0.5022845803948351, 0.13899660663745628, 0.8353408935052742, 0.48314336609381436],
                     [0.8197910105251979, 0.5371430936015362, 0.12965724315376936, 0.06244349080403733, 0.9558816248633216],
                     [0.5929477505385994, 0.36687167726065173, 0.42925321480480627, 0.8435274356179648, 0.8550018469714032],
                     [0.45785273815309, 0.008764229829187009, 0.6840407156586629, 0.04831125277736026, 0.14609911971743395],
                     [0.1579479219010974, 0.1298470924838635, 0.8283362978065627, 0.9140741421274726, 0.7516395431217443],
                     [0.01139316661353773, 0.6980229640742956, 0.45528806869472405, 0.7653990849713008, 0.24848012670857944],
                     [0.8750941097872984, 0.6964598870452183, 0.6675389863133752, 0.391939718013135, 0.30592620271209714],
                     [0.024161748164975072, 0.6512328549928654, 0.27784751504029503, 0.32588414662648524, 0.4073676483413957],
                     [0.7372935688667617, 0.9743689028772393, 0.26179932035274445, 0.3556999822154028, 0.8234406534181563],
                     [0.9358431512408416, 0.0030942521035778325, 0.7052198210371732, 0.3494249594704901, 0.06494462197366668],
                     [0.027642224051125597, 0.45820907093457997, 0.6172763215932299, 0.03520578036716404, 0.05004091043245007]])

context_embeddings = np.array([[0.3619192935809462, 0.7910582560833153, 0.173840770588212, 0.8486217599360419, 0.09895998679198104],
                      [0.9524670374363299, 0.577316446205222, 0.3348594666828074, 0.7987547183235284, 0.710457681490417],
                      [0.8400820704952479, 0.9414962586451427, 0.08399082278691339, 0.425927381574433, 0.6304514720560764],
                      [0.5331686510681622, 0.2751366715811131, 0.8329999135745643, 0.2770290564458684, 0.020564166091874392],
                      [0.9852792048968001, 0.922320208232837, 0.7297936992308128, 0.20212997935663524, 0.5277458149323955],
                      [0.43383566311415755, 0.14151987203148808, 0.3267585826852797, 0.8796734627573763, 0.14253685112772174],
                      [0.24559727482999572, 0.3015598034026842, 0.12351719983998721, 0.6141130319406622, 0.9210871618079258],
                      [0.21915908704207665, 0.9809645232509783, 0.8685879466971278, 0.9956335594634693, 0.0441562419906687],
                      [0.24988758739587902, 0.42298807118368675, 0.01922872769211703, 0.02806386746602596, 0.2821901214584819],
                      [0.43997555452635384, 0.5078839449569567, 0.812607950040521, 0.9998014106280365, 0.1559607489614684],
                      [0.9092151190046189, 0.5930002929595868, 0.315159378929991, 0.4052299042409616, 0.984475831988958],
                      [0.7836990450026143, 0.002466529016497798, 0.8465916260137056, 0.7227126698344118, 0.5087557482398855],
                      [0.4125921074144525, 0.5582795115000383, 0.889307828978137, 0.928416977596577, 0.8437462138575066],
                      [0.11810981794872477, 0.07787452990697508, 0.3907338451314212, 0.6841828899516664, 0.4547615738832046],
                      [0.4977766315279062, 0.09878866849137813, 0.0622140049250518, 0.9008881823827194, 0.3694055807903669],
                      [0.12415427540834822, 0.01064247175537103, 0.1439469061372417, 0.43996173718103593, 0.3846553735294024],
                      [0.36544315427420426, 0.6651402425072226, 0.3837201693785094, 0.54713466624535, 0.6925194086063208],
                      [0.8217730539154436, 0.7380601103419114, 0.4790971996703556, 0.935248458815274, 0.6385239169547122],
                      [0.4884363834477089, 0.783319748626155, 0.018212966919229467, 0.03662832627793777, 0.03532160993715294],
                      [0.6820505211290306, 0.25769913167047753, 0.9677388106523852, 0.4471332422618759, 0.7731319006564568],
                      [0.3695513424667971, 0.5118113495291988, 0.1721439269100805, 0.09451631327113852, 0.8369170475041434],
                      [0.7918542552021289, 0.0245240901264403, 0.6658133706965796, 0.9740885323982209, 0.02660284500887522],
                      [0.5604137104962275, 0.5643917632639455, 0.6756476068355826, 0.9466913679034125, 0.21062462975598062],
                      [0.7306868573812846, 0.7573083135261555, 0.9450278665003865, 0.9649869335038909, 0.1262321882978371],
                      [0.6830284536315845, 0.7383035166437748, 0.7985226892860073, 0.005247820534787007, 0.6886083391552933],
                      [0.6905561126225058, 0.3220803445510755, 0.8885006766287556, 0.32709316933290455, 0.9126547743770385],
                      [0.26866358146648694, 0.9355232286537734, 0.5254946965960933, 0.6487428023364232, 0.9405298594379049],
                      [0.33881123962516546, 0.6820622877451537, 0.3053828831926755, 0.9229486901650673, 0.5450270097149575]])

center_subwords = np.array([6,7])
context_word = 1
label = 1
learning_rate = 0.8562235244377375

update_ft_weights(center_embeddings, context_embeddings,
                  center_subwords, context_word, label, learning_rate)

[[0.07217141 0.9807495  0.58886507 0.94190205 0.96986871]
 [0.17481765 0.95986813 0.86154161 0.66498453 0.14272822]
 [0.69525716 0.62521246 0.78857288 0.54076206 0.47427606]
 [0.37207558 0.87344307 0.29388554 0.78339761 0.11647447]
 [0.47935031 0.77316794 0.64660624 0.58346327 0.16975098]
 [0.46855677 0.74404409 0.59689162 0.69499934 0.99955648]
 [0.45211865 0.33403275 0.71196462 0.54662871 0.47911457]
 [0.67589551 0.78293883 0.89497928 0.47300591 0.9633673 ]
 [0.21063023 0.9793666  0.07879437 0.71031165 0.29812184]
 [0.79911818 0.87009124 0.49364555 0.93063521 0.67168999]
 [0.11245516 0.2591385  0.3839313  0.5927929  0.33433018]
 [0.02734072 0.15461071 0.79551925 0.05062484 0.2613657 ]
 [0.78250839 0.90465389 0.45596362 0.73382926 0.02217476]
 [0.69681761 0.47974647 0.88852072 0.01616799 0.13260183]
 [0.5947904  0.07459974 0.11391699 0.82347436 0.49186225]
 [0.627276   0.27119948 0.24338893 0.77317073 0.03720129]
 [0.86408581 0.02766397 0.92714223 0.37457369 0.17413436]
 [0.48785848 0

**3.4.6**

In [13]:
import sys
import ast
import numpy as np
import scipy.sparse


def write_array(arr):
    print(repr(arr.tolist()))


def generate_coocurrence_matrix(texts, vocab_size):
    """
    texts - list of lists of ints - i-th sublist contains identifiers of tokens in i-th document
    vocab_size - int - size of vocabulary
    returns scipy.sparse.dok_matrix
    """
    return scipy.sparse.dok_matrix([[sum([i in s and j in s for s in texts if i != j])
                                     for i in range(vocab_size)] for j in range(vocab_size)])


text = np.array([[0, 2, 2, 2, 0, 0], [1, 1, 2, 1, 1], [2, 2, 1, 1]])
vocab_size = 3

result = generate_coocurrence_matrix(text, vocab_size)

write_array(result.toarray())

[[0, 0, 1], [0, 0, 2], [1, 2, 0]]




**3.4.7**

In [14]:
import sys
import ast
import numpy as np


def update_glove_weights(x, w, d, alpha, max_x, learning_rate):
    """
    x - square integer matrix VocabSize x VocabSize - coocurrence matrix
    w - VocabSize x EmbSize - first word vectors
    d - VocabSize x EmbSize - second word vectors
    alpha - float - power in weight smoothing function f
    max_x - int - maximum coocurrence count in weight smoothing function f
    learning_rate - positive float - size of gradient step
    
    """
    def func(x, max_x, alpha):
        
        x_copy = x.copy()
        for i in range(len(x)):
            for j in range(len(x[0])):
                if x[i,j] <= max_x:
                    x_copy[i,j] = (x[i,j]/max_x)**alpha
                else:
                    x_copy[i,j] =1
        return x_copy
                    
    new_x = func(x, max_x, alpha)
    w_new = w - 2*learning_rate*(-np.log1p(x)  + w@d.T)*new_x@d
    d_new = d - (2*learning_rate*(-np.log1p(x) + w@d.T)*new_x).T@w
    
    w[:] = w_new
    d[:] = d_new
        
    print(w)
    print()
    print(d)


x = np.array([[72, 67, 24, 81, 52, 43, 49, 12, 84, 77, 22, 66, 66, 0, 59, 4, 71, 78, 37, 69, 39, 63, 68, 36, 97, 17],
              [72, 38, 34, 43, 11, 46, 91, 96, 43, 4, 80, 77, 19, 18, 39, 8, 43, 58, 59, 43, 40, 55, 14, 96, 90, 43],
              [83, 82, 22, 93, 5, 17, 68, 30, 2, 67, 7, 8, 34, 2, 88, 66, 31, 52, 96, 13, 9, 83, 3, 9, 91, 15],
              [73, 25, 40, 82, 42, 30, 79, 77, 15, 76, 65, 6, 7, 44, 98, 88, 65, 74, 33, 48, 61, 54, 64, 28, 49, 38],
              [47, 41, 2, 54, 5, 13, 36, 0, 97, 15, 80, 90, 38, 27, 24, 31, 32, 20, 77, 20, 8, 11, 24, 19, 77, 23],
              [55, 26, 42, 5, 98, 87, 36, 1, 11, 19, 57, 68, 92, 49, 98, 9, 98, 24, 0, 13, 14, 90, 10, 51, 30, 30],
              [98, 12, 2, 66, 27, 12, 12, 60, 46, 71, 89, 82, 75, 49, 5, 77, 52, 96, 29, 32, 51, 71, 45, 16, 74, 12],
              [18, 92, 95, 62, 51, 65, 1, 49, 51, 62, 16, 64, 97, 48, 78, 14, 90, 50, 43, 49, 59, 11, 75, 50, 60, 2],
              [47, 45, 88, 78, 93, 30, 79, 20, 69, 68, 6, 76, 41, 3, 57, 98, 62, 6, 65, 53, 7, 9, 76, 96, 19, 88],
              [45, 73, 39, 70, 21, 62, 82, 13, 14, 72, 8, 23, 99, 49, 33, 80, 21, 67, 37, 31, 38, 48, 40, 61, 61, 67],
              [61, 86, 91, 61, 13, 88, 79, 56, 78, 87, 91, 94, 37, 14, 15, 44, 91, 3, 6, 23, 15, 85, 18, 58, 11, 4],
              [50, 28, 55, 44, 21, 62, 98, 64, 85, 84, 4, 31, 59, 16, 51, 11, 37, 44, 6, 60, 47, 54, 70, 29, 32, 74],
              [39, 6, 17, 54, 15, 71, 24, 94, 5, 16, 15, 74, 43, 98, 75, 10, 79, 78, 99, 47, 99, 4, 22, 90, 12, 19],
              [74, 51, 67, 72, 21, 9, 57, 50, 0, 43, 80, 91, 58, 46, 92, 98, 11, 4, 36, 31, 90, 90, 91, 52, 68, 63],
              [95, 76, 24, 52, 3, 71, 19, 75, 34, 92, 83, 15, 77, 12, 96, 58, 63, 68, 75, 9, 28, 44, 30, 94, 67, 49],
              [22, 93, 33, 77, 2, 9, 3, 3, 47, 56, 84, 70, 15, 81, 16, 49, 20, 95, 18, 22, 98, 3, 77, 27, 1, 13],
              [45, 63, 34, 0, 75, 45, 30, 23, 7, 7, 80, 62, 34, 11, 41, 16, 45, 6, 11, 21, 18, 55, 7, 24, 18, 70],
              [13, 7, 21, 85, 29, 53, 56, 83, 63, 89, 18, 67, 93, 73, 37, 3, 55, 65, 16, 72, 6, 80, 0, 39, 51, 24],
              [72, 23, 9, 56, 60, 88, 69, 6, 8, 92, 3, 44, 29, 5, 58, 58, 55, 24, 48, 57, 28, 69, 64, 72, 58, 98],
              [20, 56, 52, 74, 27, 95, 85, 20, 7, 52, 8, 93, 76, 53, 62, 54, 34, 25, 89, 38, 85, 29, 38, 18, 1, 28],
              [30, 97, 74, 11, 36, 92, 55, 74, 34, 29, 12, 40, 61, 69, 54, 72, 14, 64, 73, 75, 75, 4, 37, 47, 17, 29],
              [7, 18, 25, 6, 51, 63, 63, 53, 38, 96, 19, 56, 36, 35, 75, 99, 32, 28, 68, 14, 55, 9, 3, 19, 9, 59],
              [0, 98, 57, 98, 13, 25, 55, 56, 58, 37, 30, 90, 51, 71, 10, 36, 58, 94, 32, 80, 95, 44, 40, 82, 99, 6],
              [74, 28, 93, 37, 81, 54, 92, 89, 52, 96, 93, 8, 65, 82, 7, 14, 75, 0, 45, 59, 15, 17, 85, 87, 10, 52],
              [10, 74, 13, 23, 56, 25, 66, 59, 86, 39, 47, 72, 92, 28, 23, 75, 23, 18, 5, 20, 36, 52, 42, 56, 20, 7],
              [32, 37, 58, 20, 3, 33, 76, 92, 36, 73, 90, 53, 82, 78, 6, 66, 11, 33, 64, 68, 51, 76, 94, 94, 74, 88]], dtype = 'float64')

w = np.array([[0.7236403458959406, 0.0956019387576047, 0.0025299248050427714, 0.8219024304497274, 0.43253754513562515, 0.8013795226500925],
              [0.1645225418615962, 0.17254764305062675, 0.915834884927677, 0.15659274788174238, 0.4408801726853846, 0.6712507398638423],
              [0.7220314060070252, 0.1109087497279424, 0.8673890374761482, 0.6019681601593759, 0.21136092547712715, 0.46410460250177055],
              [0.2051472970020488, 0.7021578939163269, 0.4920315519905448, 0.8786530949689468, 0.8406582658875078, 0.7656322995670249],
              [0.5314722945128192, 0.20582039242966288, 0.6649783801689887, 0.9122470167268962, 0.06046820688028054, 0.7640361944809368],
              [0.8531299103217095, 0.8837919293919477, 0.5584731093192602, 0.5488851769744959, 0.5426259488733682, 0.8101919492091457],
              [0.014691936047236509, 0.8299297933323541, 0.04420642840864686, 0.19514486051010316, 0.5605834763387445, 0.021425480951998255],
              [0.6251450063221531, 0.916013278510962, 0.9266733043623226, 0.4314909906070713, 0.5861250222822415, 0.6933275681854775],
              [0.613436662402963, 0.25971014970117345, 0.8516017571376222, 0.3946078968050868, 0.5030576607821642, 0.4947379037657953],
              [0.1831704150579163, 0.7027400367924451, 0.9687380255252486, 0.05161874874595729, 0.5662001008903554, 0.1163342848387866],
              [0.7871817922619593, 0.2744881375377821, 0.47927673745333677, 0.29916960674176196, 0.36165825794500894, 0.4473132902029585],
              [0.9460136327515588, 0.9784510345542305, 0.7292583652396575, 0.9967710630754123, 0.5222338378761943, 0.15774056366446398],
              [0.5663154377790205, 0.31992559317458047, 0.895903341426127, 0.10800834113175917, 0.7025174488794499, 0.09983260287294515],
              [0.2870859802344229, 0.6124244361792336, 0.03043370710190285, 0.4177754705816856, 0.41076530192454186, 0.059229404317664214],
              [0.453421549409623, 0.10006035499361832, 0.4729640823042591, 0.4187735846604017, 0.19252902582118436, 0.4571615927038022],
              [0.4717366823003555, 0.311470963714212, 0.7563429074261462, 0.9450429903711869, 0.23851560864324461, 0.4264206092799121],
              [0.14209483434392234, 0.9545183136517666, 0.02853067102355411, 0.8397788414889452, 0.28747164060068653, 0.5890799959267197],
              [0.7137144457967627, 0.7108041311984524, 0.3391605131543025, 0.18466650700703768, 0.07037926283668172, 0.1691030355977058],
              [0.4181167385409663, 0.5733773938988352, 0.9308794863064511, 0.955104551017489, 0.7472618752255964, 0.9106883383537705],
              [0.29209827546854006, 0.7950653331872178, 0.9314779081831699, 0.2137419943082265, 0.9590688802321072, 0.21779623076769017],
              [0.6414528631722118, 0.7772400748403205, 0.7240597746441493, 0.4846785371953165, 0.20903895145878393, 0.9928711008461597],
              [0.4987552039133927, 0.966261456826001, 0.6392910461562884, 0.3891694028095307, 0.14376415691424704, 0.5654942409405452],
              [0.39062876410463865, 0.4372793328535669, 0.9066881332880398, 0.928194141998039, 0.26891611788606773, 0.970014111003586],
              [0.05753018657343756, 0.5987554892139141, 0.6695393400712614, 0.4342378657370556, 0.5068004463455815, 0.28913437767829675],
              [0.31284712702847906, 0.6696586256781413, 0.6349611781499843, 0.11008282689008553, 0.9000387199581723, 0.5893732652223279],
              [0.38771861901614457, 0.9275236976874062, 0.1507893346167909, 0.2649576462980838, 0.8917999241041804, 0.7060665522096253]])

d = np.array([[0.7146175764456325, 0.31161087332596693, 0.799868898982844, 0.3303984762074823, 0.15755367025489198, 0.8822561515814714],
              [0.48324415449065805, 0.32294633607735035, 0.273076894762348, 0.46575965932905583, 0.35173647464295466, 0.0698782343365999],
              [0.05951092454514029, 0.9631544906381114, 0.14919875559361273, 0.9071033838543416, 0.9235221236014998, 0.15343960980130578],
              [0.37667471994346735, 0.3832592710693109, 0.1372971042292026, 0.5063394603470396, 0.3657347277059969, 0.21520394748123772],
              [0.5589413502705171, 0.9228726685280682, 0.9028006349689756, 0.7902921185261006, 0.09337560160131464, 0.8806823905125992],
              [0.19078196327854235, 0.9862705503057667, 0.00800242331367751, 0.7036641885324555, 0.7452071471082209, 0.85314563397203],
              [0.42059808696733225, 0.3678976649279547, 0.15153142787888962, 0.9831212856723789, 0.7218055186807681, 0.8943329971799489],
              [0.058672278269596756, 0.6364681756816949, 0.24610924719747507, 0.8429515887557353, 0.23639035927773622, 0.9193123017124043],
              [0.3667295853360063, 0.46010540148263646, 0.818107188288508, 0.027140526241385965, 0.4420026102807323, 0.3050634480740779],
              [0.9602073143407148, 0.5408373879572825, 0.4027285042008486, 0.854769594232319, 0.8977332204421882, 0.7804511190784789],
              [0.9554030213710992, 0.6286064807931032, 0.7899715293283952, 0.20778805629585584, 0.34452317136784105, 0.8373278109724016],
              [0.9511367017094053, 0.8108673965379353, 0.5917802839407773, 0.08638924272725734, 0.5614389008614823, 0.10285577516634681],
              [0.15293610366355237, 0.4726630546010566, 0.7151593451811216, 0.6787398883364194, 0.05726564336395312, 0.11175850750236216],
              [0.5284613368117816, 0.2171952870224766, 0.14730305464381344, 0.16327154211081985, 0.22473713798444594, 0.8780618686814565],
              [0.6229749344457463, 0.2450307938022901, 0.856716114606889, 0.5130970556519491, 0.09638792050417233, 0.5580480219996736],
              [0.02557257524793899, 0.16614696997999867, 0.9057474930205891, 0.9639638373151679, 0.8305505098688646, 0.13212730388642224],
              [0.9945224728362285, 0.601015567237635, 0.627777689771871, 0.062014306890884385, 0.5482657713187832, 0.050645865034282034],
              [0.222530564647055, 0.16270913407631815, 0.3463743065572499, 0.3642479732760492, 0.6787809827842912, 0.6698646733332234],
              [0.0514618465561959, 0.09146560753484756, 0.5663782403169225, 0.09809277695721963, 0.7435268283749681, 0.6941669527997202],
              [0.7220783710745816, 0.242189075941737, 0.19197963437514165, 0.2789768605860322, 0.1257100212184865, 0.25803379668907667],
              [0.5231678066470311, 0.4611093289035184, 0.8420569136872692, 0.9566490894261072, 0.07691192438283945, 0.37613366065780873],
              [0.010481514678729265, 0.5145103851754453, 0.9425491945781952, 0.24440293940943314, 0.2766636476384883, 0.9944680222564074],
              [0.7081598239606982, 0.3291415847107684, 0.7986116830970068, 0.32005951294163504, 0.988878016430301, 0.16702718654180948],
              [0.3281899603978248, 0.8371583970360936, 0.914781121298489, 0.9898376984561366, 0.2605393835200668, 0.7046307961318979],
              [0.6669241697435273, 0.7506837943872975, 0.3223310011040579, 0.8024412673323509, 0.47139557621217376, 0.34991596973647043],
              [0.6981065171211724, 0.7907802796637423, 0.05700463849852977, 0.29301210116680565, 0.3246921756526622, 0.9147896908728982]])


alpha = 0.5878728651551414
max_x = 97
learning_rate = 0.8066056621137515

update_glove_weights(x, w, d, alpha, max_x, learning_rate)

[[37.89494962 35.32608454 39.58755104 33.56742861 31.88970757 33.00715087]
 [33.84381119 38.03694579 34.69008748 36.55796979 30.50706391 40.41866134]
 [22.29263074 22.26123384 27.2813589  29.76211635 26.0937861  28.8736166 ]
 [28.62724476 25.01024078 30.24910191 29.9425152  25.79335086 31.50083097]
 [24.8691495  21.18902354 25.17742133 16.7342702  21.32882188 20.13368429]
 [20.6074351  23.38444652 24.17060409 19.190986   14.7326267  22.08578662]
 [41.83248644 39.23972478 48.25206831 39.1844749  36.22355564 43.83898758]
 [25.95766254 27.78414465 26.83185233 27.67048128 24.68166625 21.15158108]
 [32.85189288 36.44077534 35.75352507 37.3032339  34.53270453 31.56366553]
 [30.83503864 34.52475729 33.1649362  41.27828442 33.62578239 37.29511586]
 [33.8370317  40.43002002 36.18444333 37.12624122 35.0877118  35.13925564]
 [21.2549779  23.06045429 21.30309581 23.59129884 20.61199019 24.6602706 ]
 [29.8416268  31.83928484 32.2510323  33.58647929 26.77111149 37.72191515]
 [44.55346816 45.05766659

**3.4.8**

In [15]:
import sys
import ast
import numpy as np


def get_nearest(embeddings, query_word_id, get_n):
    """
    embeddings - VocabSize x EmbSize - word embeddings
    query_word_id - integer - id of query word to find most similar to
    get_n - integer - number of most similar words to retrieve

    returns list of `get_n` tuples (word_id, similarity) sorted by descending order of similarity value
    """
    
    result = {}
    vect = embeddings[query_word_id]/np.linalg.norm(embeddings[query_word_id])
    for i in range(len(embeddings)):
        sim = -np.sqrt(np.sum((vect-embeddings[i]/np.linalg.norm(embeddings[i]))**2))
        result[i] = sim
        
    result = dict(sorted(result.items(), key=lambda item: item[1], reverse=True))
    return list(result.items())


embeddings = np.array([[0.7299015792584768, 0.2915364327741303, 0.5307571134639943, 0.3101345732086396, 0.8327085262119636, 0.39018382511314353, 0.678094726221033, 0.12372148102696612, 0.5966533433209616],
                       [0.5411155947267721, 0.046791742239819856, 0.5358832195593092, 0.09894162419462038, 0.6350557173679914, 0.15126161842015717, 0.11375720216711405, 0.46954553941325416, 0.8281402097264261],
                       [0.5323869209381028, 0.2005012376766715, 0.5925043884236925, 0.4621530177251649, 0.3886830034303448, 0.6403738184472031, 0.23320289120963578, 0.43574647265888766, 0.5305633832484254]])
query_word_id = 0
get_n = 8

result = get_nearest(embeddings, query_word_id, get_n)

print(result)

[(0, -0.0), (2, -0.5028921892757982), (1, -0.542231021984391)]


**3.6.3**

In [16]:
k = np.array([-0.5,0,0.5])
s = np.array([1,1,2,3,3,3,2,1,1])
out = []
for i in range(1, len(s)-1):
    out.append(np.dot(k, s[i-1:i+2]))
print(*out, sep=' ')

0.5 1.0 0.5 0.0 -0.5 -1.0 -0.5


**3.6.5**

In [17]:
def receptive_field(size_kernel, count_layers):
    return size_kernel + (size_kernel - 1)*(count_layers - 1)

receptive_field(5, 4)

17

**3.6.6**

In [19]:
import numpy as np
# given:
x = np.array([[1, 0], [1, 1], [0, 0], [0, 1], [1, 0]])
kernel = np.array([[1, 1, 0], [0, 1, 1]])
bias = np.zeros(3)

# solution:
y = bias + [(x[pos:kernel.shape[1]+pos]*kernel.T).sum() for pos in range(kernel.shape[1])]
y

array([3., 2., 1.])

**3.6.7**

In [20]:
import numpy as np

X = np.array([1, 0, 1, 1, 0, 0, 0, 1, 1, 0]).reshape((5, 2))
kernel = np.array([1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1]).reshape((2, 2, 3))


x_kernel_product = X @ kernel
y = [np.trace(x_kernel_product, pos, 2) for pos in range(kernel.shape[2])]

y

[array([3, 1]), array([2, 2]), array([1, 0])]

**3.8.2**

In [36]:
import sys
import ast
import numpy as np


def write_array(arr):
    print(repr(arr.tolist()))


def apply_convolution(data, kernel, bias):
    """
    data - InLen x InChannels
    kernel - OutChannels x InChannels x KernelSize
    bias - OutChannels

    returns OutLen x OutChannels
    """
    
    dim = data.shape[0]-kernel.shape[2]+1
    result = np.zeros(shape = (kernel.shape[0], dim ))
    
    for i in range(len(kernel)):
        for j in range(dim):
            result[i][j] = np.sum(data[j:j+kernel.shape[2]]*kernel[i].T) 
    return result.T + bias



data = np.array([[0.6685863697825855, 0.9865099796400376],
                 [0.32297881307708, 0.9908870158650515],
                 [0.8359169063921157, 0.5443776713017927],
                 [0.5363888029267118, 0.34755850459471804],
                 [0.5966372342560426, 0.9834742307894673],
                 [0.7371274295314912, 0.03279590013405109],
                 [0.08580648148402137, 0.2850655085982621],
                 [0.584942134340805, 0.7981720699451806],
                 [0.19604496972304086, 0.991819073359733],
                 [0.5622511488322055, 0.07928952553499002],
                 [0.24152151330089744, 0.2865696384305756],
                 [0.882594506643994, 0.5949729821472712],
                 [0.10820233432771786, 0.8549971123651271],
                 [0.18754460128195194, 0.6303661925298489],
                 [0.3551051497971416, 0.9452980688158904],
                 [0.6525044770663634, 0.8054232618991838]])

kernel = np.array([[[0.8638059436915633, 0.4002290439648929, 0.8174398057982054, 0.34082478315585973, 0.5565832130592809],
                    [0.08497737591188492, 0.7853885140384725, 0.1645895575029136, 0.6294907704137637, 0.8169862258229014]],
                   [[0.21527072709338957, 0.9185427760457524, 0.5167378860756242, 0.12177789993763499, 0.4201289643444214],
                    [0.8389450071463863, 0.6238637143288427, 0.5098771768082815, 0.1436853463091461, 0.12036561608743845]],
                   [[0.8347050184618267, 0.12339875692133984, 0.13629086964943626, 0.623950910768403, 0.7092189295761365],
                    [0.9578703072530402, 0.31612669923975534, 0.44018179806384916, 0.26615330385390035, 0.2745979551030847]]])
bias = np.array([0.6574440445518804, 0.3253787051567585, 0.3119672663686287])

result = apply_convolution(data, kernel, bias)

print(result)

[[4.53637401 3.40559783 3.64203285]
 [3.53791755 3.31563793 3.19785548]
 [3.11553433 2.6461514  2.82923446]
 [3.95582621 2.68487692 2.35547493]
 [3.31545625 2.55379287 2.97932928]
 [3.23359714 1.88961059 2.29726403]
 [2.55036376 2.44108517 2.06635398]
 [3.80066148 2.76372775 3.03033055]
 [2.87705358 2.37783992 2.69971685]
 [3.48545989 1.96369042 1.96099252]
 [3.37079171 2.66791052 2.27237898]
 [4.17954021 2.6157867  3.36235457]]


**3.8.3**

In [37]:
import sys
import ast
import numpy as np


def write_array(arr):
    print(repr(arr.tolist()))


def calculate_kernel_grad(x, y, kernel, bias):
    """
    x - InLen x InChannels
    y - OutLen x OutChannels
    kernel - OutChannels x InChannels x KernelSize
    bias - OutChannels

    returns OutChannels x InChannels x KernelSize
    """
    x, kernel = np.array(x), np.array(kernel)
    inlen, in_ch = x.shape
    out_ch, in_ch, ks = kernel.shape
    
    dy_dkernel = np.empty(kernel.shape)
    
    for k in range(ks):
        dy_dkernel[:, :, k] = x[0+k:inlen-ks+1+k, :].sum(axis=0)
        
    return dy_dkernel


x = np.array([[0.1559846921787793, 0.31890243279158936, 0.4294841981370352],
              [0.6287087193276831, 0.041166388481120975, 0.0670771539905104],
              [0.15852860049868056, 0.5535509628878403, 0.7578893631796608],
              [0.505354018460584, 0.39104507392557886, 0.267830936523598],
              [0.35352084058390487, 0.09557605719492113, 0.17762289879326898],
              [0.17895124947325458, 0.2038143268404633, 0.45431038892117714],
              [0.44910520386366004, 0.28874952266426823, 0.48880576852948343],
              [0.978917156152191, 0.11927306276379035, 0.6831784491543058],
              [0.7665547409895508, 0.02661305420346527, 0.662020788170643]])

y = np.array([[0.9423069699375323, 2.235723993887441],
              [1.051430354504024, 2.5441990558270864],
              [1.3060781439427196, 2.77617352972661],
              [1.1097986223660692, 2.019161891632857],
              [0.8312656308401454, 2.1968468090151005],
              [1.0883546513743934, 3.182136137325698],
              [1.4545203460970286, 3.3294233853738975]])

kernel = np.array([[[0.7285150274194675, 0.13990616439149894, 0.08385531710791316],
                    [0.8119118106104425, 0.19272155988045991, 0.010762309371285528],
                    [0.5242309485683324, 0.33106798748722055, 0.2219888201243384]],
                   [[0.31261137319823096, 0.3951341806652614, 0.954412244770657],
                    [0.5475239344122861, 0.6407966544293683, 0.2840031545245296],
                    [0.9267337670407934, 0.626334029479077, 0.4315268897320006]]])

bias = np.array([0.03900532471824536, 0.6619593919342232])

result = calculate_kernel_grad(x, y, kernel, bias)

print(result)

[[[2.43015332 3.25308579 3.39093181]
  [1.89280476 1.69317539 1.67862206]
  [2.64302071 2.89671496 3.49165859]]

 [[2.43015332 3.25308579 3.39093181]
  [1.89280476 1.69317539 1.67862206]
  [2.64302071 2.89671496 3.49165859]]]


**3.8.4**

In [38]:
import ast
import numpy as np


def calculate_kernel_grad(x, y, kernel, bias):
    """
    x - InLen x InChannels
    y - OutLen x OutChannels
    kernel - OutChannels x InChannels x KernelSize
    bias - OutChannels

    returns OutChannels x InChannels x KernelSize
    
    """
    
    res = np.zeros(x.shape)
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            
            if i + kernel.shape[2] <= x.shape[0]:
                res[i,j]=np.sum(kernel[:,j,0:i+1])
            else:
                res[i,j]=np.sum(kernel[:,j,-x.shape[0]+i:])     
    return res


x = np.array([[0.5031766517322117, 0.30744410216949514],
              [0.04690208449415345, 0.322727131626243],
              [0.1388690574185909, 0.48576543724022325],
              [0.5260018011862109, 0.5859221562109312],
              [0.9194272143904142, 0.3887293155713266],
              [0.26873714217871125, 0.9546207791313607],
              [0.8974007607375208, 0.5713329992292489],
              [0.378989716528242, 0.49787928388753266]])

y = np.array([[1.5157583762374225, 0.9460413662192456, 0.9802340338281511],
              [1.5728362445918327, 0.996409724139607, 1.2530013664472253],
              [1.9068174476481374, 1.430592927945995, 1.6704630594015581],
              [2.189768979209843, 2.3149543871163503, 2.1601629609824995],
              [2.8353457102707083, 1.7422359297539565, 1.816707087141475],
              [2.0532913525958474, 1.9924093441385802, 2.3069493556139014]])

kernel = np.array([[[0.8077620147648772, 0.006392942850116379, 0.6080212915877307],
                    [0.6288229869798402, 0.6410664904844843, 0.75419330562945]],
                   [[0.5355186530459589, 0.9211024178840701, 0.27725553497982014],
                    [0.4507098181629161, 0.081570594016668, 0.8234980185346139]],
                   [[0.0325944131753374, 0.7744753133142763, 0.05946983249285043],
                    [0.7059580971549311, 0.7969953841197822, 0.5257810951530107]]])

bias = np.array([0.2579976950685653, 0.029957050945287222, 0.18958928880952108])

result = calculate_kernel_grad(x, y, kernel, bias)

print(result)

[[1.37587508 1.7854909 ]
 [3.07784576 3.30512337]
 [4.02259241 5.40859579]
 [4.02259241 5.40859579]
 [4.02259241 5.40859579]
 [4.02259241 5.40859579]
 [2.64671733 3.62310489]
 [0.94474666 2.10347242]]


**3.8.5**

In [39]:
import ast
import sys
import collections
import numpy as np


LayerInfo = collections.namedtuple('LayerInfo', ('kernel_size', 'dilation'))


def parse_array(s):
    return np.array(ast.literal_eval(s))

def read_array():
    return parse_array(sys.stdin.readline())


def calculate_receptive_field(layers):
    """
    layers - list of LayerInfo

    returns int - receptive field size
    """
    res = 0
    for i, j in layers:
        res+= (i-1)*j
    return res +1


kernels = np.array([9, 9, 3])
dilations = np.array([2, 3, 4])

layers = [LayerInfo(k, d) for k, d in zip(kernels, dilations)]

result = calculate_receptive_field(layers)
print(result)

49
