In [1]:
corpus = """
I drink and I know things.
When you play the game of thrones, your win or you die.
The true enemy won't wait out the storm, He brings the storm.
"""

print(corpus)


I drink and I know things.
When you play the game of thrones, your win or you die.
The true enemy won't wait out the storm, He brings the storm.



In [2]:
import unicodedata

text = ""
for c in unicodedata.normalize("NFD", "Día NIÑO Álvaro").lower():
    # print(unicodedata.category(c))
    if unicodedata.category(c) != "Mn":
        text += c
text

'dia nino alvaro'

In [3]:
import re

vocab = {}

for token in re.sub(r"\s+", " ", re.sub(r"[.,\n]", " ", corpus)).strip().split(" "):
    if token in vocab:
        vocab[token] += 1
    else:
        vocab[token] = 1

vocab

{'I': 2,
 'drink': 1,
 'and': 1,
 'know': 1,
 'things': 1,
 'When': 1,
 'you': 2,
 'play': 1,
 'the': 3,
 'game': 1,
 'of': 1,
 'thrones': 1,
 'your': 1,
 'win': 1,
 'or': 1,
 'die': 1,
 'The': 1,
 'true': 1,
 'enemy': 1,
 "won't": 1,
 'wait': 1,
 'out': 1,
 'storm': 2,
 'He': 1,
 'brings': 1}

In [4]:
import re

vocab = []

for token in re.sub(r"\s+", " ", re.sub(r"[.,\n]", " ", corpus)).strip().split(" "):
    if not token in vocab:
        vocab.append(token)

vocab

['I',
 'drink',
 'and',
 'know',
 'things',
 'When',
 'you',
 'play',
 'the',
 'game',
 'of',
 'thrones',
 'your',
 'win',
 'or',
 'die',
 'The',
 'true',
 'enemy',
 "won't",
 'wait',
 'out',
 'storm',
 'He',
 'brings']

In [5]:
import pandas
import numpy

report1 = pandas.DataFrame(vocab, columns=["Token"])
report1.index = numpy.array(report1.index) + 1
report1.T

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,16,17,18,19,20,21,22,23,24,25
Token,I,drink,and,know,things,When,you,play,the,game,...,die,The,True,enemy,won't,wait,out,storm,He,brings


In [6]:
def text2tokens(text):
    return re.sub(r"\s+", " ", re.sub(r"[.,\n]", " ", text)).strip().split(" ")

In [7]:
def tokens2vec(tokens):
    return [vocab.index(token) for token in tokens]

In [8]:
text = "When you play game of thrones know"
inputs_tokens = text2tokens(text)
inputs = tokens2vec(inputs_tokens)
inputs

[5, 6, 7, 9, 10, 11, 3]

In [9]:
numpy.random.seed(123)
embedding = numpy.random.rand(6, len(inputs))
embedding.round(2)

array([[0.7 , 0.29, 0.23, 0.55, 0.72, 0.42, 0.98],
       [0.68, 0.48, 0.39, 0.34, 0.73, 0.44, 0.06],
       [0.4 , 0.74, 0.18, 0.18, 0.53, 0.53, 0.63],
       [0.85, 0.72, 0.61, 0.72, 0.32, 0.36, 0.23],
       [0.29, 0.63, 0.09, 0.43, 0.43, 0.49, 0.43],
       [0.31, 0.43, 0.89, 0.94, 0.5 , 0.62, 0.12]])

In [10]:
pandas.DataFrame([inputs_tokens, inputs])

Unnamed: 0,0,1,2,3,4,5,6
0,When,you,play,game,of,thrones,know
1,5,6,7,9,10,11,3


In [11]:
pandas.DataFrame(embedding, columns=[f"e{i + 1}" for i in range(len(inputs))])

Unnamed: 0,e1,e2,e3,e4,e5,e6,e7
0,0.696469,0.286139,0.226851,0.551315,0.719469,0.423106,0.980764
1,0.68483,0.480932,0.392118,0.343178,0.72905,0.438572,0.059678
2,0.398044,0.737995,0.182492,0.175452,0.531551,0.531828,0.634401
3,0.849432,0.724455,0.611024,0.722443,0.322959,0.361789,0.228263
4,0.293714,0.630976,0.092105,0.433701,0.430863,0.493685,0.42583
5,0.312261,0.426351,0.893389,0.94416,0.501837,0.623953,0.115618


In [12]:
positional = numpy.zeros_like(embedding)

dim = 6
n = len(inputs)

for pos in range(n):
    for i in range(dim):
        if i % 2 == 0: # even
            # print(i, pos, numpy.sin(pos / (10_000 ** ((2 * i) / dim))))
            positional[i, pos] = numpy.sin(pos / (10_000 ** ((2 * i) / dim)))
        else: # odd
            # print(i, pos, numpy.cos(pos / (10_000 ** ((2 * i) / dim))))
            positional[i, pos] = numpy.cos(pos / (10_000 ** ((2 * i) / dim)))

positional.round(2)

array([[ 0.  ,  0.84,  0.91,  0.14, -0.76, -0.96, -0.28],
       [ 1.  ,  1.  ,  1.  ,  0.99,  0.98,  0.97,  0.96],
       [ 0.  ,  0.  ,  0.  ,  0.01,  0.01,  0.01,  0.01],
       [ 1.  ,  1.  ,  1.  ,  1.  ,  1.  ,  1.  ,  1.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 1.  ,  1.  ,  1.  ,  1.  ,  1.  ,  1.  ,  1.  ]])

In [13]:
pandas.DataFrame(positional, columns=[f"p{i + 1}" for i in range(len(inputs))])

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7
0,0.0,0.841471,0.909297,0.14112,-0.756802,-0.958924,-0.279415
1,1.0,0.998923,0.995694,0.990321,0.982814,0.97319,0.96147
2,0.0,0.002154,0.004309,0.006463,0.008618,0.010772,0.012926
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,0.0,5e-06,9e-06,1.4e-05,1.9e-05,2.3e-05,2.8e-05
5,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
embedding_positional = embedding + positional

pandas.DataFrame(embedding_positional, columns=[f"ep{i + 1}" for i in range(len(inputs))])

Unnamed: 0,ep1,ep2,ep3,ep4,ep5,ep6,ep7
0,0.696469,1.12761,1.136149,0.692435,-0.037334,-0.535818,0.701349
1,1.68483,1.479855,1.387812,1.333499,1.711864,1.411762,1.021148
2,0.398044,0.74015,0.186801,0.181915,0.540169,0.5426,0.647327
3,1.849432,1.724455,1.611023,1.722443,1.322959,1.361789,1.228263
4,0.293714,0.630981,0.092114,0.433715,0.430881,0.493708,0.425858
5,1.312261,1.426351,1.893389,1.94416,1.501837,1.623953,1.115618


In [15]:
report2 = pandas.DataFrame(embedding_positional.T)
report2.index = inputs_tokens
report2

Unnamed: 0,0,1,2,3,4,5
When,0.696469,1.68483,0.398044,1.849432,0.293714,1.312261
you,1.12761,1.479855,0.74015,1.724455,0.630981,1.426351
play,1.136149,1.387812,0.186801,1.611023,0.092114,1.893389
game,0.692435,1.333499,0.181915,1.722443,0.433715,1.94416
of,-0.037334,1.711864,0.540169,1.322959,0.430881,1.501837
thrones,-0.535818,1.411762,0.5426,1.361789,0.493708,1.623953
know,0.701349,1.021148,0.647327,1.228263,0.425858,1.115618


In [16]:
report2.shape

(7, 6)

In [17]:
W = embedding_positional.T

pandas.DataFrame(W)

Unnamed: 0,0,1,2,3,4,5
0,0.696469,1.68483,0.398044,1.849432,0.293714,1.312261
1,1.12761,1.479855,0.74015,1.724455,0.630981,1.426351
2,1.136149,1.387812,0.186801,1.611023,0.092114,1.893389
3,0.692435,1.333499,0.181915,1.722443,0.433715,1.94416
4,-0.037334,1.711864,0.540169,1.322959,0.430881,1.501837
5,-0.535818,1.411762,0.5426,1.361789,0.493708,1.623953
6,0.701349,1.021148,0.647327,1.228263,0.425858,1.115618


In [18]:
dim_k = 4

Q_w = numpy.random.rand(dim, dim_k)

pandas.DataFrame(Q_w)

Unnamed: 0,0,1,2,3
0,0.317285,0.414826,0.866309,0.250455
1,0.483034,0.98556,0.519485,0.612895
2,0.120629,0.826341,0.60306,0.545068
3,0.342764,0.304121,0.417022,0.681301
4,0.875457,0.510422,0.669314,0.585937
5,0.624904,0.674689,0.842342,0.083195


In [19]:
Q = W.dot(Q_w)

pandas.DataFrame(Q)

Unnamed: 0,0,1,2,3
0,2.793915,3.876071,3.791861,2.965309
1,3.196688,4.346717,4.534913,3.256096
2,2.869406,3.807849,4.146224,2.546041
3,3.070772,3.808719,4.048538,2.67925
4,2.649387,3.753564,3.287859,2.613018
5,2.491182,3.379288,2.862692,2.378991
6,2.284842,3.175857,3.265412,2.333506


In [20]:
K_w = numpy.random.rand(dim, dim_k)

pandas.DataFrame(K_w)

Unnamed: 0,0,1,2,3
0,0.763683,0.243666,0.194223,0.572457
1,0.095713,0.885327,0.627249,0.723416
2,0.016129,0.594432,0.556785,0.15896
3,0.153071,0.69553,0.318766,0.69197
4,0.554383,0.388951,0.925132,0.84167
5,0.357398,0.043591,0.304768,0.398186


In [21]:
K = W.dot(K_w)

pandas.DataFrame(K)

Unnamed: 0,0,1,2,3
0,1.614484,3.355719,2.6749,3.730291
1,2.138258,3.531891,3.127495,4.126016
2,1.977861,2.855426,2.37098,3.630286
3,1.858305,2.908894,2.615029,3.721042
4,1.12218,2.980771,2.645321,3.179001
5,0.79723,2.651832,2.469337,2.805302
6,1.466604,2.528299,2.262663,2.895683


In [22]:
dim_v = 4

V_w = numpy.random.rand(dim, dim_v)

pandas.DataFrame(V_w)

Unnamed: 0,0,1,2,3
0,0.704959,0.995358,0.355915,0.762548
1,0.593177,0.691702,0.151127,0.398876
2,0.240856,0.343456,0.513128,0.666625
3,0.105908,0.130895,0.321981,0.661564
4,0.846506,0.553257,0.854452,0.384838
5,0.316788,0.354265,0.171082,0.829113


In [23]:
V = W.dot(V_w)

pandas.DataFrame(V)

Unnamed: 0,0,1,2,3
0,2.446465,2.864815,1.777705,3.893039
1,3.019618,3.480327,2.343179,4.509807
2,2.517547,3.087584,1.631311,4.215537
3,2.488404,2.828247,1.799115,4.099529
4,2.099842,2.276068,1.57367,3.300677
5,1.666984,1.656255,1.439222,2.95359
6,2.100045,2.418359,1.68632,3.275083


$$
Q \cdot K^{\top}
$$

In [24]:
pandas.DataFrame(Q.dot(K.T))

Unnamed: 0,0,1,2,3,4,5,6
0,38.722049,43.757912,36.349158,37.416893,34.146369,30.188052,31.063735
1,44.023983,49.805096,41.307082,42.559558,38.891262,34.407818,35.367679
2,37.998885,43.056738,35.621788,36.72528,33.632277,29.766188,30.589699
3,38.562517,43.734495,36.274523,37.342237,34.025902,30.061504,31.051925
4,35.415307,39.986368,33.239594,34.163084,31.165834,27.515127,28.381523
5,31.893657,36.030878,30.000324,30.797715,28.003996,24.690089,25.563542
6,31.785428,35.943005,29.801046,30.706385,28.086788,24.852974,25.526096


$$
\frac{Q \cdot K^{\top}}{\sqrt{d_{model}}}
$$

In [25]:
pandas.DataFrame(Q.dot(K.T)) / (dim ** 0.5)

Unnamed: 0,0,1,2,3,4,5,6
0,15.80821,17.864093,14.839482,15.275382,13.940197,12.324221,12.681717
1,17.972716,20.332845,16.863545,17.374867,15.877291,14.046933,14.438794
2,15.51298,17.57784,14.542534,14.993033,13.730319,12.151995,12.488192
3,15.743082,17.854533,14.809012,15.244904,13.891016,12.272558,12.676895
4,14.458238,16.324366,13.570007,13.947021,12.723399,11.233003,11.586708
5,13.020531,14.709544,12.247581,12.573115,11.432583,10.079687,10.436272
6,12.976347,14.67367,12.166226,12.535829,11.466383,10.146184,10.420985


In [26]:
def sofmax(A):
    B = A.copy()
    n, m = A.shape
    for i in range(n):
        d = sum([numpy.exp(A[i, j]) for j in range(m)])
        for j in range(m):
            sij = numpy.exp(A[i, j]) / d
            B[i, j] = sij
    return B

In [27]:
QK_softmax = sofmax(Q.dot(K.T) / (dim ** 0.5))

pandas.DataFrame(QK_softmax).round(1)

Unnamed: 0,0,1,2,3,4,5,6
0,0.1,0.8,0.0,0.1,0.0,0.0,0.0
1,0.1,0.8,0.0,0.0,0.0,0.0,0.0
2,0.1,0.8,0.0,0.1,0.0,0.0,0.0
3,0.1,0.8,0.0,0.1,0.0,0.0,0.0
4,0.1,0.7,0.0,0.1,0.0,0.0,0.0
5,0.1,0.7,0.1,0.1,0.0,0.0,0.0
6,0.1,0.7,0.1,0.1,0.0,0.0,0.0


In [28]:
A = QK_softmax.dot(V)

pandas.DataFrame(A)

Unnamed: 0,0,1,2,3
0,2.889798,3.336874,2.210262,4.384131
1,2.924903,3.376024,2.245807,4.418463
2,2.888497,3.334969,2.2095,4.382245
3,2.893614,3.3411,2.21405,4.3881
4,2.863408,3.307327,2.183868,4.357875
5,2.83188,3.271827,2.152489,4.326652
6,2.83078,3.269573,2.15259,4.324289


$$
N_{d_v \times d_{model}}
$$

In [29]:
N = numpy.random.rand(dim_v, dim)

pandas.DataFrame(N)

Unnamed: 0,0,1,2,3,4,5
0,0.338671,0.55237,0.578551,0.521533,0.002688,0.988345
1,0.905342,0.207636,0.292489,0.52001,0.901911,0.983631
2,0.257542,0.564359,0.806969,0.39437,0.731073,0.161069
3,0.600699,0.865864,0.983522,0.079366,0.428347,0.204543


In [30]:
Nx = A.dot(N)

pandas.DataFrame(Nx)

Unnamed: 0,0,1,2,3,4,5
0,7.202478,7.332537,8.743397,4.461944,6.511126,7.391118
1,7.279588,7.409844,8.837608,4.517354,6.587222,7.477071
2,7.198983,7.32936,8.739617,4.459825,6.508039,7.387449
3,7.210955,7.341097,8.753801,4.467941,6.519417,7.400468
4,7.14422,7.274195,8.672364,4.420324,6.453864,7.326351
5,7.074566,7.204666,8.587711,4.370568,6.385447,7.248831
6,7.07076,7.201601,8.584173,4.368674,6.382473,7.24506


In [31]:
pandas.DataFrame(W)

Unnamed: 0,0,1,2,3,4,5
0,0.696469,1.68483,0.398044,1.849432,0.293714,1.312261
1,1.12761,1.479855,0.74015,1.724455,0.630981,1.426351
2,1.136149,1.387812,0.186801,1.611023,0.092114,1.893389
3,0.692435,1.333499,0.181915,1.722443,0.433715,1.94416
4,-0.037334,1.711864,0.540169,1.322959,0.430881,1.501837
5,-0.535818,1.411762,0.5426,1.361789,0.493708,1.623953
6,0.701349,1.021148,0.647327,1.228263,0.425858,1.115618


In [32]:
pandas.DataFrame(W + Nx)

Unnamed: 0,0,1,2,3,4,5
0,7.898947,9.017367,9.141441,6.311376,6.80484,8.703379
1,8.407198,8.889699,9.577758,6.241809,7.218203,8.903422
2,8.335132,8.717171,8.926417,6.070848,6.600153,9.280839
3,7.90339,8.674595,8.935716,6.190384,6.953132,9.344628
4,7.106887,8.986059,9.212533,5.743283,6.884745,8.828187
5,6.538749,8.616428,9.13031,5.732356,6.879155,8.872784
6,7.772109,8.222749,9.2315,5.596937,6.808331,8.360678


In [33]:
def norm(A):
    B = A.copy()

    n, m = A.shape

    means = [A[i, :].mean() for i in range(n)]
    deviations = [A[i, :].std() for i in range(n)]

    error = 0.0001 # numpy.array(deviations).std()

    for i in range(n):
        B[i, :] = (A[i, :] - means[i]) / (deviations[i] + error)

    return B

In [34]:
pandas.DataFrame(norm(Nx))

Unnamed: 0,0,1,2,3,4,5
0,0.202928,0.303647,1.396223,-1.919353,-0.332458,0.349012
1,0.200698,0.300678,1.39658,-1.919497,-0.330738,0.352279
2,0.202799,0.303804,1.396356,-1.919277,-0.332488,0.348807
3,0.202696,0.303376,1.396273,-1.919358,-0.332293,0.349307
4,0.204536,0.305874,1.395994,-1.919222,-0.333719,0.346538
5,0.206413,0.308675,1.395792,-1.919015,-0.335257,0.343391
6,0.206007,0.308897,1.396117,-1.918846,-0.335245,0.343072


In [45]:
def relu(G, x):
    z = G.dot(x)
    n, m = z.shape
    for i in range(n):
        for j in range(m):
            if z[i, j] < 0:
                z[i, j] = 0
    return z

In [47]:
n, m = Nx.shape
G = numpy.random.rand(m, n)

pandas.DataFrame(relu(G, norm(Nx)))


Unnamed: 0,0,1,2,3,4,5
0,0.843627,1.262922,5.806447,0.0,0.0,1.451078
1,0.461594,0.69078,3.157346,0.0,0.0,0.784723
2,0.68281,1.022127,4.67683,0.0,0.0,1.163479
3,0.843377,1.262376,5.786372,0.0,0.0,1.44196
4,0.866495,1.297209,5.929269,0.0,0.0,1.473538
5,0.737873,1.104195,5.062904,0.0,0.0,1.262127
