In [2]:
# create a word-embedding model using PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1234)

<torch._C.Generator at 0x7816b072d9b0>

In [6]:
word_to_ix={"data":0,"science":1}
word_to_ix

{'data': 0, 'science': 1}

In [8]:
embeds=nn.Embedding(2,5)#2 words in vocab, 5 dimensional embeddings

In [9]:
lookup_tensor=torch.tensor([word_to_ix["data"]],dtype=torch.long)

In [10]:
lookup_tensor

tensor([0])

Now, we set up an embedding layer

In [11]:
hello_embed=embeds(lookup_tensor)

In [13]:
print(hello_embed)

tensor([[ 0.1021, -0.2590, -0.1549, -1.3706, -0.1319]],
       grad_fn=<EmbeddingBackward0>)


In [27]:
CONTEXT_SIZE=2
EMBEDDING_DIM=10

The following test sentence is the wiki article of PyTorch available at https://en.wikipedia.org/wiki/PyTorch

In [18]:
test_sentence="""PyTorch is a machine learning library based on the Torch library,[4][5][6] used for applications such as computer vision and natural language processing,[7] originally developed by Meta AI and now part of the Linux Foundation umbrella.[8][9][10][11] It is recognized as one of the two most popular machine learning libraries alongside TensorFlow, offering free and open-source software released under the modified BSD license. Although the Python interface is more polished and the primary focus of development, PyTorch also has a C++ interface.[12]""".split()

Introducing tokenization of the words to split sentences into small chunks.

In [20]:
trigrams=[(test_sentence[i], test_sentence[i+1], test_sentence[i+2]) for i in range(len(test_sentence)-2)]

In [21]:
print(trigrams[:3])

[('PyTorch', 'is', 'a'), ('is', 'a', 'machine'), ('a', 'machine', 'learning')]


In [22]:
vocab=set(test_sentence)

In [24]:
word_to_ix={word: i for i, word in enumerate(vocab)}
word_to_ix

{'originally': 0,
 'and': 1,
 'umbrella.[8][9][10][11]': 2,
 'BSD': 3,
 'libraries': 4,
 'license.': 5,
 'interface': 6,
 'free': 7,
 'natural': 8,
 'polished': 9,
 'by': 10,
 'has': 11,
 'learning': 12,
 'processing,[7]': 13,
 'machine': 14,
 'now': 15,
 'It': 16,
 'popular': 17,
 'Torch': 18,
 'development,': 19,
 'under': 20,
 'Although': 21,
 'such': 22,
 'used': 23,
 'open-source': 24,
 'applications': 25,
 'software': 26,
 'based': 27,
 'more': 28,
 'a': 29,
 'for': 30,
 'the': 31,
 'alongside': 32,
 'offering': 33,
 'developed': 34,
 'TensorFlow,': 35,
 'library,[4][5][6]': 36,
 'AI': 37,
 'Foundation': 38,
 'most': 39,
 'released': 40,
 'vision': 41,
 'one': 42,
 'language': 43,
 'primary': 44,
 'focus': 45,
 'computer': 46,
 'C++': 47,
 'library': 48,
 'Meta': 49,
 'modified': 50,
 'Python': 51,
 'as': 52,
 'two': 53,
 'recognized': 54,
 'also': 55,
 'on': 56,
 'interface.[12]': 57,
 'of': 58,
 'Linux': 59,
 'part': 60,
 'is': 61,
 'PyTorch': 62}

Extracting key-word via PyTorch n-grams.

In [28]:
class NGramLanguageModeler(nn.Module):
  def __init__(self, vocab_size, embedding_dim,context_size):
    super(NGramLanguageModeler,self).__init__()
    self.embeddings=nn.Embedding(vocab_size,embedding_dim)
    self.linear1=nn.Linear(context_size*embedding_dim,128)
    self.linear2=nn.Linear(128,vocab_size)

  def forward(self,inputs):
    embeds=self.embeddings(input).view((1,-1))
    out=F.relu(self.linear1(embeds))
    out=self.linear2(out)
    log_probs=F.log_softmax(out,dim=1)
    return log_probs

losses=[]
loss_function=nn.NLLLoss()
model=NGramLanguageModeler(len(vocab),EMBEDDING_DIM,CONTEXT_SIZE)
optimizer=optim.SGD(model.parameters(),lr=0.001)

In [29]:
losses

[]

In [30]:
loss_function

NLLLoss()

In [31]:
model

NGramLanguageModeler(
  (embeddings): Embedding(63, 10)
  (linear1): Linear(in_features=20, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=63, bias=True)
)

In [32]:
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [54]:
#Context extraction from sentences
for epoch in range(10):
  total_loss=0
  for context, target in zip(trigrams,trigrams):
    #Step 1: Pass the inputs into model(words into numbers then wrap into tensores)
    context_idxs=torch.tensor([word_to_ix[w] for w in context],dtype=torch.long)
    #Step 2: Zero-ing out the gradients a torch accumulats gradients
    model.zero_grad()
    #Step 3: Forward pass, log-probabilities over the next word
    log_probs=model(context_idxs)
    #Step 4: Compute the loss function.
    loss=loss_function(log_probs, torch.tensor([word_to_ix[target]],dtype=torch.long))
    #Step 5: Backward pass and update the gradient
    loss.backward()
    optimizer.step()

    total_loss+=loss.item()
  loss.append(total_loss)
print(losses)

TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not method

In [55]:
#continuous bag of words (CBOW) and skip gram
CONTEXT_SIZE=2;
raw_text="""PyTorch is a machine learning library based on the Torch library,[4][5][6] used for applications such as computer vision and natural language processing,[7] originally developed by Meta AI and now part of the Linux Foundation umbrella.[8][9][10][11] It is recognized as one of the two most popular machine learning libraries alongside TensorFlow, offering free and open-source software released under the modified BSD license. Although the Python interface is more polished and the primary focus of development, PyTorch also has a C++ interface.[12]

A number of pieces of deep learning software are built on top of PyTorch, including Tesla Autopilot,[13] Uber's Pyro,[14] Hugging Face's Transformers,[15] PyTorch Lightning,[16][17] and Catalyst.[18][19]""".split()

In [56]:
#de-duplicate the array
vocab=set(raw_text)
vocab_size=len(vocab)

word_to_ix={word: i for i, word in enumerate(vocab)}
data=[]
for i in range(2, len(raw_text)-2):
  context=[raw_text[i-2],raw_text[i-1],raw_text[i+1],raw_text[i+2]]
  target=raw_text[i]
  data.append((context,target))
print(data[:5])

[(['PyTorch', 'is', 'machine', 'learning'], 'a'), (['is', 'a', 'learning', 'library'], 'machine'), (['a', 'machine', 'library', 'based'], 'learning'), (['machine', 'learning', 'based', 'on'], 'library'), (['learning', 'library', 'on', 'the'], 'based')]


In [59]:
class CBOW(nn.Module):

  def __init__(self):
      pass
      def forward(self,inputs):
        pass

def  make_context_vector(context, word_to_ix):
  idxs=[word_to_ix[w] for w in context]
  return torch.tensor(idxs,dtype=torch.long)

make_context_vector(data[0][0], word_to_ix)

tensor([19, 42, 47,  4])

In [60]:
lin=nn.Linear(5,3)
data=torch.randn(2,5)
print(lin(data))

tensor([[-0.6651,  0.0700, -0.2061],
        [ 0.2934,  0.5486, -0.4480]], grad_fn=<AddmmBackward0>)


In [61]:
data=torch.randn(2,2)
print(data)
print(F.relu(data))

tensor([[ 1.4497, -0.0026],
        [-1.7472, -0.6526]])
tensor([[1.4497, 0.0000],
        [0.0000, 0.0000]])


In [62]:
data=torch.randn(5)
print(data)
print(F.softmax(data,dim=0))
print(F.softmax(data,dim=0).sum())
print(F.log_softmax(data,dim=0))

tensor([ 0.3233,  0.8929, -0.5331, -0.2413, -0.9804])
tensor([0.2480, 0.4383, 0.1053, 0.1410, 0.0673])
tensor(1.0000)
tensor([-1.3944, -0.8247, -2.2507, -1.9590, -2.6980])


Now, we shall prepare a specific form of recurrent neural network, called as Long-Short Term Memory Model (LSTMM).

In [64]:
lstm=nn.LSTM(3,3)
inputs=[torch.randn(1,3) for _ in range(5)]
hidden=(torch.randn(1,1,3),
        torch.randn(1,1,3))

for i in inputs:
  out, hidden=lstm(i.view(1,1,-1),hidden)

inputs=torch.cat(inputs).view(len(inputs),1,-1)
hidden=(torch.randn(1,1,3),torch.randn(1,1,3))
out,hidden=lstm(inputs,hidden)
print(out)
print(hidden)

tensor([[[-0.0124, -0.0072,  0.3236]],

        [[-0.1823,  0.2966,  0.2160]],

        [[-0.2137,  0.2821,  0.2430]],

        [[-0.2842,  0.4189,  0.1871]],

        [[-0.1355,  0.1876,  0.0799]]], grad_fn=<MkldnnRnnLayerBackward0>)
(tensor([[[-0.1355,  0.1876,  0.0799]]], grad_fn=<StackBackward0>), tensor([[[-0.3803,  0.6267,  0.1372]]], grad_fn=<StackBackward0>))


In [66]:
#Prepare a sequence of words as training data to form the LSTM network
def prepare_sequence(seq, to_ix):
  idxs=[to_ix[w] for w in seq]
  return torch.tensor(idxs,dtype=torch.long)

training_data=[("Probability and random variables are integral part of computation".split(),["DET","NN","V","DET","NN"]),
    ("I am learning LLM,LAM,G-AI".split(),["NN","V","DET","NN"])
               ]

In [67]:
training_data

[(['Probability',
   'and',
   'random',
   'variables',
   'are',
   'integral',
   'part',
   'of',
   'computation'],
  ['DET', 'NN', 'V', 'DET', 'NN']),
 (['I', 'am', 'learning', 'LLM,LAM,G-AI'], ['NN', 'V', 'DET', 'NN'])]

In [68]:
word_t_ix={}
for sent, tags in training_data:
  for word in sent:
    if word not in word_to_ix:
      word_to_ix[word]=len(word_t_ix)
print(word_to_ix)
tag_to_ix={"DET":0,"V":1,"NN":2}

EMBEDDING_DIM=6
HIDDEEN_DIM=6

{'Pyro,[14]': 0, 'BSD': 1, 'natural': 2, 'by': 3, 'learning': 4, 'now': 5, 'development,': 6, 'based': 7, 'developed': 8, 'AI': 9, 'Foundation': 10, 'one': 11, 'pieces': 12, 'language': 13, 'are': 14, 'C++': 15, 'A': 16, 'as': 17, 'Linux': 18, 'PyTorch': 19, 'Lightning,[16][17]': 20, 'Autopilot,[13]': 21, 'free': 22, 'top': 23, 'polished': 24, 'Transformers,[15]': 25, 'processing,[7]': 26, 'It': 27, 'Torch': 28, 'under': 29, 'for': 30, 'the': 31, 'Hugging': 32, 'vision': 33, 'primary': 34, 'modified': 35, 'Meta': 36, "Face's": 37, 'two': 38, 'also': 39, 'on': 40, 'part': 41, 'is': 42, 'originally': 43, 'umbrella.[8][9][10][11]': 44, 'interface': 45, 'has': 46, 'machine': 47, 'popular': 48, 'such': 49, 'used': 50, 'open-source': 51, 'applications': 52, 'more': 53, 'Tesla': 54, 'offering': 55, 'TensorFlow,': 56, 'library': 57, 'Python': 58, 'PyTorch,': 59, 'interface.[12]': 60, 'of': 61, 'and': 62, 'Catalyst.[18][19]': 63, 'libraries': 64, 'license.': 65, 'Although': 66, 'software': 67, 

In [69]:
class LSTMTagger(nn.Module):

  def __init__(self,embedding_dim,hidden_dim,vocab_size,target_size):
      super(LSTMTagger,self).__init__()
      self.hidden_dim=hidden_dim

      self.word_embeddings=nn.Embedding(vocab_size,embedding_dim)
      self.lstm=nn.LSTM(embedding_dim,hidden_dim)

      self.hidden2tag=nn.Linear(hidden_dim,target_size)
      self.hidden=self.init_hidden()

  def init_hidden(self):
    return (torch.zeros(1,1,self.hidden_dim),
            torch.zeros(1,1,self.hidden_dim))

  def forward(self, sentence):
    embdes=self.word_embeddings(sentence)
    lstm_out, self.hidden=self.lstm(
        embdes.view(len(sentence),1,-1),self.hidden)
    tag_space=self.hidden2tag(lstm_out.view(len(sentence),-1))
    tag_scores=F.log_softmax(tag_space,dim=1)
    return tag_scores

In [74]:
model=LSTMTagger(EMBEDDING_DIM,HIDDEEN_DIM,len(word_to_ix),len(tag_to_ix))
loss_function=nn.NLLLoss()
optimizer=optim.SGD(model.parameters(),lr=.1)
model
loss_function
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.1
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [75]:
with torch.no_grad():
  inputs=prepare_sequence(training_data[0][0],word_to_ix)
  tag_scores=model(inputs)
  print(tag_scores)

tensor([[-1.0243, -1.2688, -1.0222],
        [-1.0162, -1.2989, -1.0073],
        [-1.0630, -1.2351, -1.0113],
        [-1.0429, -1.2376, -1.0287],
        [-0.9808, -1.3155, -1.0310],
        [-1.0002, -1.2729, -1.0436],
        [-0.9511, -1.3455, -1.0405],
        [-0.9889, -1.3068, -1.0291],
        [-1.0214, -1.2596, -1.0325]])


In [77]:
for epoch in range(300):
  for sentence, tags in training_data:
    model.zero_grad()
    model.hidden=model.init_hidden()
    sentence_in=prepare_sequence(sentence,word_to_ix)
    targets=prepare_sequence(tags,tag_to_ix)
    tag_scores=model(sentence_in)
    loss=loss_function(tag_scores, targets)
    loss.backward()
    optimizer.step()

ValueError: Expected input batch_size (9) to match target batch_size (5).