<a href="https://www.kaggle.com/code/dsaichand3/lstm-in-pytorch?scriptVersionId=95451440" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np
import pandas as pd

import torch
from torch import nn

from sklearn.model_selection import train_test_split

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [3]:
vocab_size = 5000

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
dataset = pd.read_csv("../input/disaster-tweets/tweets.csv")

In [6]:
dataset = dataset[["text", "target"]]

In [7]:
dataset.head(5)

Unnamed: 0,text,target
0,"Communal violence in Bhainsa, Telangana. ""Ston...",1
1,Telangana: Section 144 has been imposed in Bha...,1
2,Arsonist sets cars ablaze at dealership https:...,1
3,Arsonist sets cars ablaze at dealership https:...,1
4,"""Lord Jesus, your love brings freedom and pard...",0


In [8]:
tokenizer = Tokenizer(
    num_words=25000,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=True,
    split=' ',
    char_level=False,
    oov_token=None,
    document_count=0
)

In [9]:
tokenizer.fit_on_texts(dataset["text"])

In [10]:
sequences = tokenizer.texts_to_sequences(dataset["text"])
labels = dataset["target"]

In [11]:
sequences = pad_sequences(sequences, maxlen=10, padding="post")

In [12]:
total_vocab_size = len(tokenizer.word_counts)
total_vocab_size

30729

In [13]:
x_train, x_val, y_train, y_val = train_test_split(sequences, labels, test_size=0.3, shuffle=True)

In [14]:
y_train = y_train.values.reshape(-1, 1)
y_val = y_val.values.reshape(-1, 1)

In [15]:
x_train.max()

24994

In [16]:
class NeuralNet(nn.Module):
    
    def __init__(self):
        super(NeuralNet, self).__init__()
        
        self.embedding = nn.Embedding(25000, 50)
        self.lstm = nn.LSTM(50, 64, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        self.fc_1 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):        
        embeddings = self.embedding(x)
        
        lstm_0, (h_0, c_0) = self.lstm(embeddings)
        lstm_0 = lstm_0[:, -1, :] # because we only want the last sequence part of the sequence. first : tells to take all batches, last : tells to take all dimensions, -1 in the second category represents last sequence output
        
        linear = self.fc_1(lstm_0)
        linear = self.dropout(linear)
        out = self.sigmoid(linear)
    
        return out

model = NeuralNet()
model.to(device)

NeuralNet(
  (embedding): Embedding(25000, 50)
  (lstm): LSTM(50, 64, batch_first=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc_1): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)

In [17]:
# we first convert the data into tensor form
train_in_tensor = torch.utils.data.TensorDataset(torch.tensor(x_train),torch.tensor(y_train))
val_in_tensor = torch.utils.data.TensorDataset(torch.tensor(x_val), torch.tensor(y_val))

# then we use dataloaders to perform batch operations
train_loader = torch.utils.data.DataLoader(train_in_tensor, batch_size=15)
val_loader = torch.utils.data.DataLoader(val_in_tensor, batch_size=15)

In [18]:
# define a loss function
loss_fn = torch.nn.BCELoss(reduction = "mean").cuda()

# define a optimizer
optim = torch.optim.Adam(model.parameters(), lr = 0.05)

# define LR scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=5, gamma=0.1)

In [19]:
loss_list = []
def train_model(model, epochs, optim, loss_fn, validate):
    
    for epoch in range(epochs):
        avg_train_loss = 0
        for x_batch, y_batch in train_loader:
            
            x_batch = x_batch.to(device) # data loading into cuda/gpu is not in-place operation
            y_batch = y_batch.to(device)
            
            optim.zero_grad()
            
            y_pred = model(x_batch)
            
            loss = loss_fn(y_pred, y_batch.float())

            loss.backward()
        
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=1)
            
            optim.step()
            
            avg_train_loss += loss.item()/len(train_loader)
               
        
        if validate:
            with torch.no_grad():
                model.eval()
                avg_val_loss = 0
                for x_batch, y_batch in val_loader:
                    
                    x_batch = x_batch.to(device)
                    y_batch = y_batch.to(device)
            
                    y_pred = model(x_batch)
                    loss = loss_fn(y_pred, y_batch.float())
                    avg_val_loss += loss.item()/len(val_loader)
            
            model.train()
        
        scheduler.step()
        
        print('Epoch {} \t train_loss={:.4f} \t validation_loss={:.4f}'.format(epoch + 1, avg_train_loss, avg_val_loss))
        
    return model

In [20]:
train_model(model, 10, optim, loss_fn, validate=True)



Epoch 1 	 train_loss=0.5613 	 validation_loss=0.5004
Epoch 2 	 train_loss=0.5414 	 validation_loss=0.5199
Epoch 3 	 train_loss=0.5315 	 validation_loss=0.4986
Epoch 4 	 train_loss=0.5229 	 validation_loss=0.5209
Epoch 5 	 train_loss=0.4985 	 validation_loss=0.5957
Epoch 6 	 train_loss=0.4808 	 validation_loss=0.5089
Epoch 7 	 train_loss=0.4261 	 validation_loss=0.5124
Epoch 8 	 train_loss=0.4028 	 validation_loss=0.5243
Epoch 9 	 train_loss=0.3805 	 validation_loss=0.5291
Epoch 10 	 train_loss=0.3643 	 validation_loss=0.5452


NeuralNet(
  (embedding): Embedding(25000, 50)
  (lstm): LSTM(50, 64, batch_first=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc_1): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)