<a href="https://colab.research.google.com/github/eriksali/DNN_2023_NLP/blob/main/NLP13_14.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NLP Class Lecture 13-14 Code Examples

Oakland University, W 23, Prof. Wilson

## Simple NN with pytorch

In [None]:
import torch
import torch.nn as nn

In [None]:
model = torch.nn.Sequential( nn.Linear(2,2,bias=False), nn.Linear(2,1,bias=False), nn.Sigmoid())

In [None]:
with torch.no_grad():
    model[0].weight[0,0] = .3 
    model[0].weight[1,0] = -.1 
    model[0].weight[0,1] = .1 
    model[0].weight[1,1] = .2 
    model[1].weight[0,0] = .1 
    model[1].weight[0,1] = -.05 

In [None]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [None]:
model.forward(x)

In [None]:
x = torch.Tensor([2,1])
h = model[0](x)
z = model[1](h)
o = model[2](z)
out = o
print(x)
print(model[0].weight)
print(h)
print(z)
print(o)

tensor([2., 1.])
Parameter containing:
tensor([[ 0.3000,  0.1000],
        [-0.1000,  0.2000]], requires_grad=True)
tensor([0.7000, 0.0000], grad_fn=<SqueezeBackward3>)
tensor([0.0700], grad_fn=<SqueezeBackward3>)
tensor([0.5175], grad_fn=<SigmoidBackward0>)


In [None]:
loss = loss_fn(out, torch.Tensor([1.0]))
loss

tensor(0.6588, grad_fn=<BinaryCrossEntropyBackward0>)

In [None]:
loss.backward()

In [None]:
print('layer 2 gradient:',model[1].weight.grad)
print('layer 1 gradient:',model[0].weight.grad)

layer 2 gradient: tensor([[-0.3378,  0.0000]])
layer 1 gradient: tensor([[-0.0965, -0.0483],
        [ 0.0483,  0.0241]])


In [None]:
optimizer.step()
print('new layer 1:',model[0].weight)
print("new layer 2:",model[1].weight) 

new layer 1: Parameter containing:
tensor([[ 0.3001,  0.1000],
        [-0.1000,  0.2000]], requires_grad=True)
new layer 2: Parameter containing:
tensor([[ 0.1003, -0.0500]], requires_grad=True)


### Scratch space

In [None]:
import numpy as np
def logistic(z, derivative=False):
    if not derivative:
        return 1 / (1 + np.exp(-z))
    else:
        return logistic(z) * (1 - logistic(z))
logistic(-.3153,True)

0.2438881376106578

In [None]:
t3 = torch.Tensor([0.6,0])
sm = torch.softmax(t3,dim=0)
.6457*.1 + .3543*-0.05
torch.sigmoid(torch.Tensor([.046855]))

In [None]:
def getBack(var_grad_fn):
    print(var_grad_fn)
    for n in var_grad_fn.next_functions:
        if n[0]:
            try:
                tensor = getattr(n[0], 'variable')
                print(n[0])
                print('Tensor with grad found:', tensor)
                print(' - gradient:', tensor.grad)
                print()
            except AttributeError as e:
                getBack(n[0])

getBack(loss.grad_fn)

## Word embeddings

In [None]:
! pip install --upgrade gensim

In [None]:
import gensim
gensim.__version__

'4.3.0'

In [None]:
# download pretrained embeddings

import gensim.downloader as api
wv = api.load('word2vec-google-news-300')

In [None]:
for index, word in enumerate(wv.index_to_key):
    if index == 10:
        break
    print(f"word #{index}/{len(wv.index_to_key)} is {word}")

word #0/3000000 is </s>
word #1/3000000 is in
word #2/3000000 is for
word #3/3000000 is that
word #4/3000000 is is
word #5/3000000 is on
word #6/3000000 is ##
word #7/3000000 is The
word #8/3000000 is with
word #9/3000000 is said


In [None]:
vec_king = wv['king']
print(vec_king)

[ 1.25976562e-01  2.97851562e-02  8.60595703e-03  1.39648438e-01
 -2.56347656e-02 -3.61328125e-02  1.11816406e-01 -1.98242188e-01
  5.12695312e-02  3.63281250e-01 -2.42187500e-01 -3.02734375e-01
 -1.77734375e-01 -2.49023438e-02 -1.67968750e-01 -1.69921875e-01
  3.46679688e-02  5.21850586e-03  4.63867188e-02  1.28906250e-01
  1.36718750e-01  1.12792969e-01  5.95703125e-02  1.36718750e-01
  1.01074219e-01 -1.76757812e-01 -2.51953125e-01  5.98144531e-02
  3.41796875e-01 -3.11279297e-02  1.04492188e-01  6.17675781e-02
  1.24511719e-01  4.00390625e-01 -3.22265625e-01  8.39843750e-02
  3.90625000e-02  5.85937500e-03  7.03125000e-02  1.72851562e-01
  1.38671875e-01 -2.31445312e-01  2.83203125e-01  1.42578125e-01
  3.41796875e-01 -2.39257812e-02 -1.09863281e-01  3.32031250e-02
 -5.46875000e-02  1.53198242e-02 -1.62109375e-01  1.58203125e-01
 -2.59765625e-01  2.01416016e-02 -1.63085938e-01  1.35803223e-03
 -1.44531250e-01 -5.68847656e-02  4.29687500e-02 -2.46582031e-02
  1.85546875e-01  4.47265

In [None]:
pairs = [
    ('car', 'minivan'),   # a minivan is a kind of car
    ('car', 'bicycle'),   # still a wheeled vehicle
    ('car', 'airplane'),  # ok, no wheels, but still a vehicle
    ('car', 'cereal'),    # ... and so on
    ('car', 'communism'),
]
for w1, w2 in pairs:
    print('%r\t%r\t%.2f' % (w1, w2, wv.similarity(w1, w2)))

'car'	'minivan'	0.69
'car'	'bicycle'	0.54
'car'	'airplane'	0.42
'car'	'cereal'	0.14
'car'	'communism'	0.06


In [None]:
print(wv.most_similar(positive=['car', 'minivan'], topn=5))

[('SUV', 0.8532192707061768), ('vehicle', 0.8175783753395081), ('pickup_truck', 0.7763688564300537), ('Jeep', 0.7567334175109863), ('Ford_Explorer', 0.7565720081329346)]


In [None]:
print(wv.doesnt_match(['fire', 'water', 'land', 'sea', 'air', 'car']))

car


In [None]:
print(wv.most_similar(positive=['king', 'woman'], negative=['man'], topn=5))

[('queen', 0.7118193507194519), ('monarch', 0.6189674139022827), ('princess', 0.5902431011199951), ('crown_prince', 0.5499460697174072), ('prince', 0.5377321839332581)]


In [None]:
print(wv.n_similarity( "I was at the store".split(), "You did some shopping".split()))
print(wv.n_similarity( "I was at the store".split(), "She ate an apple".split()))

0.61323637
0.46933332


## Building a Neural Language Model

In [None]:
# if not ready we can just random init this layer and train it with the LM
# embedding_layer = nn.Embedding(vocab_size, emb_dim)

class my_LM(torch.nn.Module):

    def __init__(self, vocab_size, emb_dim, hidden_size, context_size=3, embs=None):
        super(my_LM, self).__init__()
        self.embedding_layer = nn.Embedding(vocab_size, emb_dim)
        if embs:
            self.embedding_layer = nn.Embedding.from_pretrained(embs)
        self.linear1 = nn.Linear(emb_dim * context_size, hidden_size)
        self.sigmoid = nn.Sigmoid()
        self.linear2 = nn.Linear(hidden_size, vocab_size)
        self.softmax = nn.Softmax()

    def forward(self, x):
        # flatten into a 1d output, concatenating vectors
        # from each embedding in the input
        x = torch.flatten(self.embedding_layer(x))
        x = self.linear1(x)
        x = self.sigmoid(x)
        x = self.linear2(x)
        x = self.softmax(x)
        return x

In [None]:
vocab_size = 10000 
emb_dim = 300
hidden_size = 200

LM = my_LM(vocab_size, emb_dim, hidden_size)
out = LM.forward(torch.LongTensor([0,1,2]))
print(out)

tensor([1.1169e-04, 1.1879e-04, 7.2119e-05,  ..., 6.3994e-05, 7.5007e-05,
        6.8660e-05], grad_fn=<SoftmaxBackward0>)


  x = self.softmax(x)


In [None]:
loss_fn = nn.CELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [None]:
# need to load some data
# build mapping from words to indexes in vocab size
# making training sequences
# y is a one hot vector of the actual next word
# then write a function for the forward pass and inference time