#### SEQUENCE MODELS AND LONG-SHORT TERM MEMORY NETWORKS

https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x7fdc8a6bdaf0>

In [3]:
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

print(inputs)
print(torch.cat(inputs))

[tensor([[0.6614, 0.2669, 0.0617]]), tensor([[ 0.6213, -0.4519, -0.1661]]), tensor([[-1.5228,  0.3817, -1.0276]]), tensor([[-0.5631, -0.8923, -0.0583]]), tensor([[-0.1955, -0.9656,  0.4224]])]
tensor([[ 0.6614,  0.2669,  0.0617],
        [ 0.6213, -0.4519, -0.1661],
        [-1.5228,  0.3817, -1.0276],
        [-0.5631, -0.8923, -0.0583],
        [-0.1955, -0.9656,  0.4224]])


In [4]:
lstm = nn.LSTM(3, 4)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 4),
          torch.randn(1, 1, 4))

for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    
    # Pytorch’s LSTM expects all of its inputs to be 3D tensors. 
    # The first axis is the sequence itself, the second indexes instances in the mini-batch, 
    # and the third indexes elements of the input.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

print(out)
print(hidden)

tensor([[[-0.2293, -0.3008, -0.0047, -0.1296]]], grad_fn=<StackBackward>)
(tensor([[[-0.2293, -0.3008, -0.0047, -0.1296]]], grad_fn=<StackBackward>), tensor([[[-0.5694, -0.6107, -0.0071, -0.3985]]], grad_fn=<StackBackward>))


In [5]:
lstm = nn.LSTM(3, 4)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument to the lstm at a later time
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 4), torch.randn(1, 1, 4))  # clean out hidden state
out, hidden = lstm(inputs, hidden)

print(out)
print(hidden)

tensor([[[ 0.0320,  0.1599,  0.2533,  0.2040]],

        [[-0.0795,  0.0909,  0.2159,  0.1160]],

        [[ 0.0252, -0.0008,  0.0478,  0.1728]],

        [[-0.0090,  0.0138,  0.0587,  0.1013]],

        [[ 0.3384, -0.1325, -0.0193,  0.4874]]], grad_fn=<StackBackward>)
(tensor([[[ 0.3384, -0.1325, -0.0193,  0.4874]]], grad_fn=<StackBackward>), tensor([[[ 0.6776, -0.5650, -0.0321,  0.8473]]], grad_fn=<StackBackward>))


In [6]:
t = torch.randn(1, 3)
print(t)
t.view(1, 1, -1) # add a new dim

tensor([[-0.8562, -0.7870, -0.8161]])


tensor([[[-0.8562, -0.7870, -0.8161]]])

In [8]:
import nltk

nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [12]:
from nltk import word_tokenize

text = "hello Sunday. :-) I love work!"

word_tokenize(text)

['hello', 'Sunday', '.', ':', '-', ')', 'I', 'love', 'work', '!']

In [15]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegressionCV

In [27]:
X, y = load_iris(return_X_y=True)

print(X, y)

X.shape, y.shape

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.2]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.6 1.4 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.

((150, 4), (150,))

In [31]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0).fit(X, y)

clf.predict(X)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [29]:
clf.predict_proba(X[:2, :])

array([[9.81802911e-01, 1.81970751e-02, 1.43580537e-08],
       [9.71729527e-01, 2.82704429e-02, 3.00353141e-08]])

In [30]:
clf.score(X, y)

0.9733333333333334