In [1]:
import torch
import torch.nn as nn

In [6]:
data = torch.randint(0, 100, (64, 100))

In [7]:
data.size()

torch.Size([64, 100])

In [8]:
embedding = nn.Embedding(num_embeddings=100, embedding_dim=512)

In [9]:
embedding(data).size()

torch.Size([64, 100, 512])

In [11]:
data = embedding(data)

In [12]:
rnn = nn.RNN(input_size=512, hidden_size=32, num_layers=1, batch_first=True)

In [15]:
len(rnn(data))

2

In [16]:
hidden, final = rnn(data)

In [17]:
hidden.size()

torch.Size([64, 100, 32])

In [18]:
final.size()

torch.Size([1, 64, 32])

In [19]:
hidden[:, -1, :].size()

torch.Size([64, 32])

In [21]:
check = hidden[:, -1, :] == final.squeeze()

In [22]:
if check.all() == True:
  print("Yes")
else:
  print("False")

Yes


In [32]:
class SimpleRNN(nn.Module):
  def __init__(self, vocabulary_size: int = 100, num_dimension: int = 512):
    super(SimpleRNN, self).__init__()
    self.vocabulary_size = vocabulary_size
    self.num_dimension = num_dimension
    self.hidden_size = 64
    self.out_features = 10

    self.embedding = nn.Embedding(num_embeddings=vocabulary_size, embedding_dim=num_dimension)
    self.model = nn.RNN(input_size=num_dimension, hidden_size=self.hidden_size, num_layers=1, batch_first=True)
    self.output = nn.Linear(in_features=self.hidden_size, out_features=self.out_features)

  def forward(self, x: torch.Tensor):
    if isinstance(x, torch.Tensor):
      x = self.embedding(x)
      _, final = self.model(x)
      return self.output(final.squeeze(0))

    else:
      raise Exception("INput should be the torch tensor format".capitalize())

In [None]:
class SimpleRNN(nn.Module):
  def __init__(self, vocabulary_size: int = 100, num_dimension: int = 512):
    super(SimpleRNN, self).__init__()
    self.vocabulary_size = vocabulary_size
    self.num_dimension = num_dimension
    self.hidden_size = 64 # Hidden state that is also known hidden layers
    self.out_features = 10 # Final neurons that is for the last layer

    self.embedding = nn.Embedding(num_embeddings=vocabulary_size, embedding_dim=num_dimension) # 64, 100, 512
    self.model = nn.RNN(input_size=num_dimension, hidden_size=self.hidden_size, num_layers=1, batch_first=True) # 512(dimension) would be passed to the RNN in the timestamps
    self.output = nn.Linear(in_features=self.hidden_size, out_features=self.out_features)

  def forward(self, x: torch.Tensor):
    if isinstance(x, torch.Tensor):
      x = self.embedding(x)
      _, final = self.model(x)
      return self.output(final.squeeze(0))

    else:
      raise Exception("INput should be the torch tensor format".capitalize())

In [33]:
classifier = SimpleRNN()

In [34]:
print("Total parameters of this simple model = ", sum(params.numel() for params in classifier.parameters()))

Total parameters of this simple model =  88842


In [35]:
data = torch.randint(0, 100, (64, 100))

In [36]:
data.size()

torch.Size([64, 100])

In [38]:
classifier(data).size()

torch.Size([64, 10])

In [41]:
A = torch.randn((1, 64, 512))

In [45]:
A.view(A.size(0) * A.size(1), A.size(2)).size()

torch.Size([64, 512])

In [262]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torchsummary import summary
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [263]:
batch_size = 32
vocabulary_size = 100
split_size = 0.30
num_dimension = 128
lr = 1e-4
EPOCHS = 2

In [264]:
X = torch.randint(0, 100, (400, 100))
y = torch.randint(0, 2, (400, ))

In [265]:
X.size(), y.size()

(torch.Size([400, 100]), torch.Size([400]))

In [266]:
X - torch.tensor(data = X, dtype=torch.float)
y = torch.tensor(data = y, dtype=torch.float)

X.size(), y.size()

  X - torch.tensor(data = X, dtype=torch.float)
  y = torch.tensor(data = y, dtype=torch.float)


(torch.Size([400, 100]), torch.Size([400]))

In [267]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_size, random_state=42)

In [268]:
X_train.size(), y_train.size(), X_test.size(), y_test.size()

(torch.Size([280, 100]),
 torch.Size([280]),
 torch.Size([120, 100]),
 torch.Size([120]))

In [269]:
train_dataloader = DataLoader(dataset=list(zip(X_train, y_train)), batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(dataset=list(zip(X_test, y_test)), batch_size=batch_size, shuffle=True)

In [270]:
data, labels = next(iter(train_dataloader))

data.size(), labels.size()

(torch.Size([32, 100]), torch.Size([32]))

In [271]:
class SimpleClassifier(nn.Module):
  def __init__(self, vocabulary_size: int = 100, num_dimension: int = 128):
    super(SimpleClassifier, self).__init__()

    self.vocabulary_size = vocabulary_size
    self.num_dimension = num_dimension
    self.hidden_layers = 32
    self.out_features = 1

    self.embedding = nn.Embedding(num_embeddings=self.vocabulary_size, embedding_dim=self.num_dimension)
    self.rnn_model = nn.RNN(input_size=self.num_dimension, hidden_size=self.hidden_layers, num_layers=1, batch_first=True)
    self.output_layer = nn.Linear(in_features=self.hidden_layers, out_features=self.out_features)

  def forward(self, x: torch.Tensor):
    if isinstance(x, torch.Tensor):
      embedding = self.embedding(x)
      _, final = self.rnn_model(embedding)
      return self.output_layer(final.squeeze(0))

    else:
      raise Exception("Input should be in the format of tensor".capitalize())

In [272]:
classifier = SimpleClassifier(vocabulary_size=vocabulary_size, num_dimension=num_dimension)

In [273]:
print("Total number of parameters = ", sum(params.numel() for params in classifier.parameters()))

Total number of parameters =  18017


In [274]:
classifier(data).size()

torch.Size([32, 1])

In [275]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(params=classifier.parameters(), lr = lr, betas=(0.9, 0.999))

In [276]:
labels

tensor([1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0.,
        1., 1., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0.])

In [277]:
total_train_loss = []
total_valid_loss = []

for epoch in range(EPOCHS):
  train_loss = []
  valid_loss = []

  for data, labels in train_dataloader:
    predicted = classifier(data)
    predicted = predicted.view(-1)

    loss = criterion(predicted, labels)

    train_loss.append(loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  for data, labels in valid_dataloader:
    predicted = classifier(data)
    predicted = predicted.view(-1)

    loss = criterion(predicted, labels)

    valid_loss.append(loss.item())

    total_train_loss.append(np.array(train_loss).mean)
    total_valid_loss.append(np.array(valid_loss).mean())

  print("Epochs: [{}/{}] - train_loss: {} - valid_loss: {}".format(epoch, EPOCHS, np.array(train_loss).mean(), np.array(valid_loss).mean()))

Epochs: [0/2] - train_loss: 0.7320349944962395 - valid_loss: 0.7510333508253098
Epochs: [1/2] - train_loss: 0.7253956662284003 - valid_loss: 0.7490456700325012


In [278]:
import numpy as np
from sklearn.metrics import accuracy_score

predicted_labels = []
actual_labels = []

for data, labels in valid_dataloader:
    predicted = classifier(data)
    predicted = predicted.view(-1)
    predicted = torch.sigmoid(predicted)
    predicted = (predicted > 0.5).int()

    predicted_labels.extend(predicted.detach().cpu().numpy().flatten())
    actual_labels.extend(labels.detach().cpu().numpy().flatten())

predicted_labels = np.array(predicted_labels)
actual_labels = np.array(actual_labels)

In [279]:
print("accuarcy = ", accuracy_score(predicted_labels, actual_labels))
print("precision = ", precision_score(predicted_labels, actual_labels))
print("recall = ", recall_score(predicted_labels, actual_labels))
print("f1 = ", f1_score(predicted_labels, actual_labels))

accuarcy =  0.4583333333333333
precision =  0.4262295081967213
recall =  0.4642857142857143
f1 =  0.4444444444444444


In [280]:
predicted_labels

array([0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 1, 0], dtype=int32)

In [281]:
actual_labels

array([0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0.,
       1., 1., 1., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0.,
       0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
       1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1.,
       0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 1., 1., 1.,
       0.], dtype=float32)