# Sentiment Analysis

## 0. Environment Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# !pip install pyvi
# !pip install datasets
# !pip install transformers
# !pip install lightning
# !pip install wandb

In [None]:
import os
import pandas as pd
import numpy as np
import shutil
import zipfile
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from datasets import Dataset
from transformers import AutoModel, AutoTokenizer
from tqdm import tqdm
import glob
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from pyvi.ViTokenizer import tokenize
from string import digits
import lightning as L
import torch.nn.functional as F
import wandb

In [None]:
# Set environment parameters for debugging
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ["SPCONV_DEBUG_SAVE_PATH"] = "spconv_log/error.log"
if not os.path.exists('spconv_log'):
    os.mkdir('spconv_log')

torch.autograd.set_detect_anomaly(True)
torch.set_float32_matmul_precision('high')

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
DATA_PATH = 'dataset'
SAVE_PATH = 'result'
DRIVE_PATH = 'drive/MyDrive/HCMUT/NLP/asm'

# if not os.path.exists(DATA_PATH):
#     os.makedirs(DATA_PATH, exist_ok=True)
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH, exist_ok=True)

nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# # Copy dataset to instance memory for stable data processing
# shutil.copytree(os.path.join(DRIVE_PATH, 'dataset'), DATA_PATH)
# glob.glob(os.path.join(DATA_PATH, '*'))

## 1. Data Preprocessing

In [None]:
# Load data
train_df = pd.read_csv(os.path.join(DATA_PATH, 'vlsp_sentiment_train.csv'), sep = '\t')
test_df = pd.read_csv(os.path.join(DATA_PATH, 'vlsp_sentiment_test.csv'), sep = '\t')

### 1.1 Data overview

In [None]:
# data overview
train_df.head()

Unnamed: 0,Class,Data
0,-1,"Mình đã dùng anywhere thế hệ đầu, quả là đầy t..."
1,-1,"Quan tâm nhất là độ trễ có cao không, dùng thi..."
2,-1,"dag xài con cùi bắp 98k....pin trâu, mỗi tội đ..."
3,-1,logitech chắc hàng phải tiền triệu trở lên dùn...
4,-1,"Đang xài con m175 cùi mía , nhà xài nhiều chuộ..."


In [None]:
test_df.head()

Unnamed: 0,Class,Data
0,-1,Nói thiệt là mình thì thì chuột nào mình cũng ...
1,-1,Đang dùng mx1. Cũng ngon nhưng chưa đầy năm mà...
2,-1,"Chưa thấy đc điểm thuyết phục để mua, nhất là ..."
3,-1,"Những phần xem báo tra cứu bản đồ, dịch vụ.. d..."
4,-1,ĐÚNG LÀ MUA Ở VIỆT NAM KHÔNG ỨNG DỤNG ĐƯỢC GÌ ...


In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5100 entries, 0 to 5099
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Class   5100 non-null   int64 
 1   Data    5100 non-null   object
dtypes: int64(1), object(1)
memory usage: 79.8+ KB


In [None]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1050 entries, 0 to 1049
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Class   1050 non-null   int64 
 1   Data    1050 non-null   object
dtypes: int64(1), object(1)
memory usage: 16.5+ KB


### 1.2 Data prepairation

In [None]:
def custom_tokenize(sent):
    sent = tokenize(sent.lower())
    sents = sent_tokenize(sent)
    sent = [a.replace('-', '') for a in sent]
    sent = ''.join(sent)
    return sent

In [None]:
test_df.loc[:, 'Data'] = [custom_tokenize(t) for t in test_df['Data']]
train_df.loc[:, 'Data'] = [custom_tokenize(t) for t in train_df['Data']]

In [None]:
train_df.head()

Unnamed: 0,Class,Data
0,-1,"mình đã dùng anywhere thế_hệ đầu , quả là đầy ..."
1,-1,"quan_tâm nhất là độ trễ có cao không , dùng th..."
2,-1,"dag xài con cùi bắp 98k ... . pin trâu , mỗi t..."
3,-1,logitech chắc hàng phải tiền triệu trở lên dùn...
4,-1,"đang xài con m175 cùi mía , nhà xài nhiều chuộ..."


In [None]:
# map the label to non-negative int
train_df["Class"] += 1
test_df["Class"] += 1

In [None]:
print(train_df.Class.unique(), '\n', test_df.Class.unique())

[0 2 1] 
 [0 2 1]


In [None]:
test_set = Dataset.from_pandas(test_df)
train_set = Dataset.from_pandas(train_df)

### 1.3 Word Embeddings

In [None]:
phobert = AutoModel.from_pretrained("vinai/phobert-base-v2")
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base-v2")
phobert.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of RobertaModel were not initialized from the model checkpoint at vinai/phobert-base-v2 and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(64001, 768, padding_idx=1)
    (position_embeddings): Embedding(258, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): RobertaEncoder(
    (layer): ModuleList(
      (0-11): 12 x RobertaLayer(
        (attention): RobertaAttention(
          (self): RobertaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): RobertaSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropou

In [None]:
def tokenize_word(dts):
    max_len = 256
    sentences = dts['Data']
    input_ids = [tokenizer.encode(sentence , max_length = 256, truncation = True) for sentence in sentences]
    with torch.no_grad():
        features = [phobert(torch.tensor([input_id]).to(device)) for input_id in input_ids]  # Models outputs are now tuples

    seqs = [torch.squeeze(feature.last_hidden_state) for feature in features]

    seqs[0] = torch.permute(seqs[0], (1, 0))
    seqs[0] = nn.ConstantPad1d((0, max_len - seqs[0].shape[1]), 0)(seqs[0])
    seqs[0] = torch.permute(seqs[0], (1, 0))

    embeddings = torch.nn.utils.rnn.pad_sequence(seqs, batch_first=True)
    embeddings = embeddings.to('cpu')
    embeddings = embeddings.numpy()

    list_emb = list(embeddings)
    dts['Data'] = list_emb

    return dts

In [None]:
tokenized_test_datasets = test_set.map(tokenize_word, batched=True, batch_size = 64)
tokenized_train_datasets = train_set.map(tokenize_word, batched=True, batch_size = 64)

tokenized_train_datasets.set_format("torch")
tokenized_test_datasets.set_format("torch")

Map:   0%|          | 0/1050 [00:00<?, ? examples/s]

Map:   0%|          | 0/5100 [00:00<?, ? examples/s]

### 1.4 Dataloader

In [None]:
train_dataloader = DataLoader(
    tokenized_train_datasets,
    batch_size=512,
    shuffle=True,
    drop_last=False,
    pin_memory=True,
    num_workers=12,
)
test_dataloader = DataLoader(
    tokenized_test_datasets,
    batch_size=512,
    shuffle=False,
    drop_last=False,
    pin_memory=True,
    num_workers=12,
)

## 2. Model prepairation

In [None]:
class BaseNetwork(L.LightningModule):
    def __init__(self):
        super(BaseNetwork, self).__init__()
        self.set_criterion()
        self.set_scheduler()
        self.reset_val_metrics()
        self.reset_train_metrics()

    def set_optimizer(self, optim=None):
        if optim == None:
            optim = torch.optim.Adam(self.parameters(), lr=1e-3)
        self.optim = optim

    def set_scheduler(self, scheduler=None):
        self.scheduler = scheduler

    def set_criterion(self, crit=None):
      if crit == None:
          crit = nn.CrossEntropyLoss()
      self.criterion = crit

    def configure_optimizers(self):
        return {
            "optimizer": self.optim,
            "lr_scheduler": self.scheduler,
        } if self.scheduler is not None else self.optim

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch['Data'].permute(0,2,1), train_batch['Class']
        pred = self.forward(x)
        loss = self.loss_cal(pred, y)
        self.train_metrics['train_loss'] += loss
        pred = torch.argmax(pred, dim=1)
        self.train_metrics['train_acc'] += ((pred == y).sum().item()/len(pred))
        self.train_count += 1
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch['Data'].permute(0,2,1), val_batch['Class']
        pred = self.forward(x)
        self.val_metrics['val_loss'] += self.loss_cal(pred, y)
        pred = torch.argmax(pred, dim=1)
        self.val_metrics['val_acc'] += ((pred == y).sum().item()/len(pred))
        self.val_count += 1

    def loss_cal(self, pred, label):
        return self.criterion(pred, label)

    def reset_train_metrics(self):
        self.train_metrics = {
            'train_loss': 0,
            'train_acc': 0,
        }
        self.train_count = 0

    def log_train_metrics(self):
        train_metrics = {k: v/self.train_count for k, v in self.train_metrics.items()}
        self.log_dict(train_metrics)
        self.reset_train_metrics()

    def reset_val_metrics(self):
        self.val_metrics = {
            'val_loss': 0,
            'val_acc': 0,
        }
        self.val_count = 0

    def log_val_metrics(self):
        val_metrics = {k: v/self.val_count for k, v in self.val_metrics.items()}
        self.log_dict(val_metrics)
        self.reset_val_metrics()

In [None]:
class CNN(BaseNetwork):
    def __init__(
        self,
        in_features = 768,
        num_filters = 128,
        kernel_sizes = [3, 4, 5],
        seq_len = 256,
        drop = 0.2,
    ):
        super(CNN, self).__init__()
        self.conv_0 = nn.Conv1d(in_features, num_filters, kernel_sizes[0])
        self.pool_0 = nn.MaxPool1d(seq_len - kernel_sizes[0] + 1, stride = 1)

        self.conv_1 = nn.Conv1d(in_features, num_filters, kernel_sizes[1])
        self.pool_1 = nn.MaxPool1d(seq_len - kernel_sizes[1] + 1, stride = 1)

        self.conv_2 = nn.Conv1d(in_features, num_filters, kernel_sizes[2])
        self.pool_2 = nn.MaxPool1d(seq_len - kernel_sizes[2] + 1 , stride = 1)

        self.flatten = nn.Flatten()

        self.dropout = nn.Dropout(drop)

        self.dense_0 = nn.Linear(num_filters * 3, 3)

        self.set_optimizer()

    def forward(self, x):
        x_0 = F.relu(self.pool_0(self.conv_0(x)))
        x_1 = F.relu(self.pool_1(self.conv_1(x)))
        x_2 = F.relu(self.pool_2(self.conv_2(x)))
        merged_tensor = self.flatten(torch.cat([x_0, x_1, x_2], dim = 1))
        merged_tensor = self.dropout(merged_tensor)
        x = F.softmax(self.dense_0(merged_tensor), dim=1)
        return x

In [None]:
class LSTMMean(BaseNetwork):
    def __init__(
        self,
        in_features = 768,
        hidden_size = 512,
        hidden_size_2 = 128,
        drop = 0.2,
    ):
        super(LSTMMean, self).__init__()
        self.lstm_0 = nn.LSTMCell(input_size = in_features, hidden_size = hidden_size)
        self.lstm_1 = nn.LSTMCell(input_size = hidden_size, hidden_size = hidden_size_2)
        self.dropout = nn.Dropout(drop)
        self.dense_0 = nn.Linear(hidden_size_2, 3)

        self.set_optimizer()

    def forward(self, x):
        x = torch.permute(x, (2,0,1))
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_0(x[i])
            output.append(hx)
        x = torch.stack(output, dim=0) #256, 4, 512
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_1(x[i])
            output.append(hx)
        x = torch.stack(output, dim=0).mean(dim=0) #256, 4, 128
        x =  F.softmax(self.dense_0(x), dim=1)
        return x

In [None]:
class LSTMLast(BaseNetwork):
    def __init__(
        self,
        in_features = 768,
        hidden_size = 512,
        hidden_size_2 = 128,
        drop = 0.2,
    ):
        super(LSTMLast, self).__init__()
        self.lstm_0 = nn.LSTMCell(input_size = in_features, hidden_size = hidden_size)
        self.lstm_1 = nn.LSTMCell(input_size = hidden_size, hidden_size = hidden_size_2)
        self.dropout = nn.Dropout(drop)
        self.dense_0 = nn.Linear(hidden_size_2, 3)

        self.set_optimizer()

    def forward(self, x):
        x = torch.permute(x, (2,0,1))
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_0(x[i])
            output.append(hx)
        x = torch.stack(output, dim=0) #256, 4, 512
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_1(x[i])
            output.append(hx)
        x = torch.tensor(output[-1])
        x =  F.softmax(self.dense_0(x), dim=1)
        return x

In [None]:
class CNN2LSTM(BaseNetwork):
    def __init__(
        self,
        in_features = 768,
        hidden_size = 512,
        hidden_size_2 = 128,
        num_filters = 128,
        kernel_sizes = [3, 4, 5],
        seq_len = 256,
        drop = 0.2,
    ):
        super(CNN2LSTM, self).__init__()

        self.conv_0 = nn.Conv1d(in_features, num_filters, kernel_sizes[0], padding=kernel_sizes[0]//2)
        self.pad_1 = torch.nn.ZeroPad1d((2, 1))
        self.conv_1 = nn.Conv1d(in_features, num_filters, kernel_sizes[1])
        self.conv_2 = nn.Conv1d(in_features, num_filters, kernel_sizes[2], padding=kernel_sizes[2]//2)

        self.lstm_0 = nn.LSTMCell(input_size = num_filters*3, hidden_size = hidden_size)
        self.lstm_1 = nn.LSTMCell(input_size = hidden_size, hidden_size = hidden_size_2)

        self.dropout = nn.Dropout(drop)
        self.dense_0 = nn.Linear(hidden_size_2, 3)


    def forward(self, x):
        x_0 = F.relu(self.conv_0(x))
        x_1 = F.relu(self.conv_1(self.pad_1(x)))
        x_2 = F.relu(self.conv_2(x))

        x = torch.cat([x_0, x_1, x_2], dim = 1)

        x = torch.permute(x, (2,0,1))
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_0(x[i])
            output.append(hx)
        x = torch.stack(output, dim=0) #256, 4, 512
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_1(x[i])
            output.append(hx)
        x = torch.stack(output, dim=0).mean(dim=0) #256, 4, 128
        x =  F.softmax(self.dense_0(x), dim=1)
        return x

In [None]:
class LSTM2CNN(BaseNetwork):
    def __init__(
        self,
        in_features = 768,
        hidden_size = 512,
        hidden_size_2 = 128,
        num_filters = 128,
        kernel_sizes = [3, 4, 5],
        seq_len = 256,
        drop = 0.2,
    ):
        super(LSTM2CNN, self).__init__()

        self.lstm_0 = nn.LSTMCell(input_size = in_features, hidden_size = hidden_size)
        self.lstm_1 = nn.LSTMCell(input_size = hidden_size, hidden_size = hidden_size_2)

        self.conv_0 = nn.Sequential(
            nn.Conv1d(hidden_size_2, num_filters, kernel_sizes[0]),
            nn.MaxPool1d(seq_len - kernel_sizes[0] + 1, stride = 1),
            nn.ReLU(),
        )

        self.conv_1 = nn.Sequential(
            nn.Conv1d(hidden_size_2, num_filters, kernel_sizes[1]),
            nn.MaxPool1d(seq_len - kernel_sizes[1] + 1, stride = 1),
            nn.ReLU(),
        )

        self.conv_2 = nn.Sequential(
            nn.Conv1d(hidden_size_2, num_filters, kernel_sizes[2]),
            nn.MaxPool1d(seq_len - kernel_sizes[2] + 1 , stride = 1),
            nn.ReLU(),
        )

        self.flatten = nn.Flatten()

        self.dropout = nn.Dropout(drop)

        self.dense_0 = nn.Linear(num_filters * 3, 3)

        self.set_optimizer()


    def forward(self, x):
        x = torch.permute(x, (2,0,1))
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_0(x[i])
            output.append(hx)
        x = torch.stack(output, dim=0) #256, 4, 512
        output = []
        for i in range(x.size()[0]):
            hx, cx = self.lstm_1(x[i])
            output.append(hx)
        x = torch.stack(output, dim=0).permute(1,2,0) #256, 4, 128

        x_0 = self.conv_0(x)
        x_1 = self.conv_1(x)
        x_2 = self.conv_2(x)
        merged_tensor = self.flatten(torch.cat([x_0, x_1, x_2], dim = 1))
        merged_tensor = self.dropout(merged_tensor)
        x = F.softmax(self.dense_0(merged_tensor), dim=1)

        return x

In [None]:
class LSTM_CNN_dual(BaseNetwork):
    def __init__(
        self,
        in_features = 768,
        hidden_size = 512,
        hidden_size_2 = 128,
        num_filters = 128,
        kernel_sizes = [3, 4, 5],
        seq_len = 256,
        drop = 0.2,
    ):
        super(LSTM_CNN_dual, self).__init__()

        self.lstm_0 = nn.LSTMCell(input_size = in_features, hidden_size = hidden_size)
        self.lstm_1 = nn.LSTMCell(input_size = hidden_size, hidden_size = hidden_size_2)

        self.conv_0 = nn.Sequential(
            nn.Conv1d(in_features, num_filters, kernel_sizes[0]),
            nn.MaxPool1d(seq_len - kernel_sizes[0] + 1, stride = 1),
            nn.ReLU(),
        )

        self.conv_1 = nn.Sequential(
            nn.Conv1d(in_features, num_filters, kernel_sizes[1]),
            nn.MaxPool1d(seq_len - kernel_sizes[1] + 1, stride = 1),
            nn.ReLU(),
        )

        self.conv_2 = nn.Sequential(
            nn.Conv1d(in_features, num_filters, kernel_sizes[2]),
            nn.MaxPool1d(seq_len - kernel_sizes[2] + 1 , stride = 1),
            nn.ReLU(),
        )

        self.flatten = nn.Flatten()

        self.dropout = nn.Dropout(drop)

        self.dense_0 = nn.Linear(num_filters * 3 + hidden_size_2, 3)

        self.set_optimizer()


    def forward(self, x):
        lstm_x = torch.permute(x, (2,0,1))
        output = []
        for i in range(lstm_x.size()[0]):
            hx, cx = self.lstm_0(lstm_x[i])
            output.append(hx)
        lstm_x = torch.stack(output, dim=0) #256, 4, 512
        output = []
        for i in range(lstm_x.size()[0]):
            hx, cx = self.lstm_1(lstm_x[i])
            output.append(hx)
        lstm_out = torch.stack(output, dim=0).mean(dim=0) #256, 4, 128

        x_0 = self.conv_0(x).squeeze(-1)
        x_1 = self.conv_1(x).squeeze(-1)
        x_2 = self.conv_2(x).squeeze(-1)
        merged_tensor = torch.cat([x_0, x_1, x_2, lstm_out], dim = 1)
        merged_tensor = self.dropout(merged_tensor)
        x = F.softmax(self.dense_0(merged_tensor), dim=1)

        return x

## 3. Training

In [None]:
lr=1e-3

optim_targets = [
    'val_loss',
    'train_loss',
    'val_acc',
    'train_acc',
]

log_path = os.path.join(SAVE_PATH, 'logs')
if not os.path.exists(log_path):
    os.makedirs(log_path, exist_ok=True)

### 3.1 CNN

In [None]:
cnn_model = CNN()
cnn_model.set_optimizer(torch.optim.Adam(cnn_model.parameters(), lr=lr))

In [None]:
# import wandb
# wandb.util.generate_id()

In [None]:
# Define model callbacks

# Checkpoints
from lightning.pytorch.callbacks import ModelCheckpoint

cnn_checkpoint_path = os.path.join(SAVE_PATH, 'cnn_checkpoints')
if not os.path.exists(cnn_checkpoint_path):
    os.makedirs(cnn_checkpoint_path, exist_ok=True)

cnn_checkpoint_callback = [
    ModelCheckpoint(
        monitor=None,
        dirpath=cnn_checkpoint_path,
        filename=f'Sentiment_last',
        every_n_epochs=1,
        save_on_train_epoch_end=True,
    ) # save the last checkpoint for continuing training
]

for target in optim_targets:
    cnn_checkpoint_callback.append(ModelCheckpoint(
        monitor=target,
        dirpath=cnn_checkpoint_path,
        filename=f'Sentiment-{{epoch:02d}}-{{{target}:.2f}}',
        save_top_k=3,
        save_last=False,
        mode='min' if 'loss' in target else 'max',
        every_n_epochs=2,
        save_on_train_epoch_end=False,
        # every_n_train_steps=250,
    ))


# Log result
class LogCallback(L.Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        pl_module.log_val_metrics()

    def on_train_epoch_end(self, trainer, pl_module):
        pl_module.log_train_metrics()

log_callback = LogCallback()

# wandb logger
from lightning.pytorch.loggers import WandbLogger
wandb_logger = WandbLogger(
    project="NLP_Sentiment",
    log_model=True,
    save_dir=log_path,
    name='Sentiment_cnn',
    id="j447gtm8",
    resume=True,
)

In [None]:
#define trainer and train model
trainer = L.Trainer(
    max_epochs=100,
    callbacks=cnn_checkpoint_callback +  [log_callback],
    logger=wandb_logger,
    log_every_n_steps=len(train_dataloader),
    check_val_every_n_epoch=1,
)
last_checkpoint = 'result/cnn_checkpoints/Sentiment_last.ckpt'
if not os.path.exists(last_checkpoint):
    last_checkpoint = None
# last_checkpoint=None
trainer.fit(cnn_model, train_dataloader, test_dataloader, ckpt_path=last_checkpoint)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | conv_0    | Conv1d           | 295 K 
2 | pool_0    | MaxPool1d        | 0     
3 | conv_1    | Conv1d           | 393 K 
4 | pool_1    | MaxPool1d        | 0     
5 | conv_2    | Conv1d           | 491 K 
6 | pool_2    | MaxPool1d        | 0     
7 | flatten   | Flatten          | 0     
8 | dropout   | Dropout          | 0     
9 | dense_0   | Linear           | 1.2 K 
-----------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.725     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=100` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=100` reached.


In [None]:
wandb.finish()

VBox(children=(Label(value='13.536 MB of 13.536 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▄▆▆▇▇▇▇████████████████████████████████
train_loss,█▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▅▂▅▃█▄▆▃▅▇▆▄▆▆▃▅▅▁▂▅▅▁▅▃▃▂▃▅▃▅▂▄▅▅▄▄▅▅▄▃
val_loss,█▆▄▄▁▄▂▄▂▁▂▃▃▂▃▃▃▄▃▃▃▄▃▃▃▄▃▃▃▂▄▃▂▂▃▃▂▂▃▃

0,1
epoch,99.0
train_acc,0.99098
train_loss,0.56063
trainer/global_step,999.0
val_acc,0.69186
val_loss,0.83628


### 3.2 LSTM Mean

In [None]:
# wandb.util.generate_id()

In [None]:
lstm_mean_model = LSTMMean()
lstm_mean_model.set_optimizer(torch.optim.Adam(lstm_mean_model.parameters(), lr=lr))

In [None]:
# Define model callbacks

# Checkpoints
from lightning.pytorch.callbacks import ModelCheckpoint

lstm_mean_checkpoint_path = os.path.join(SAVE_PATH, 'lstm_mean_checkpoints')
if not os.path.exists(lstm_mean_checkpoint_path):
    os.makedirs(lstm_mean_checkpoint_path, exist_ok=True)

lstm_mean_checkpoint_callback = [
    ModelCheckpoint(
        monitor=None,
        dirpath=lstm_mean_checkpoint_path,
        filename=f'Sentiment_last',
        every_n_epochs=1,
        save_on_train_epoch_end=True,
    ) # save the last checkpoint for continuing training
]

for target in optim_targets:
    lstm_mean_checkpoint_callback.append(ModelCheckpoint(
        monitor=target,
        dirpath=lstm_mean_checkpoint_path,
        filename=f'Sentiment-{{epoch:02d}}-{{{target}:.2f}}',
        save_top_k=3,
        save_last=False,
        mode='min' if 'loss' in target else 'max',
        every_n_epochs=2,
        save_on_train_epoch_end=False,
        # every_n_train_steps=250,
    ))


# Log result
class LogCallback(L.Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        pl_module.log_val_metrics()

    def on_train_epoch_end(self, trainer, pl_module):
        pl_module.log_train_metrics()

log_callback = LogCallback()

# wandb logger
from lightning.pytorch.loggers import WandbLogger
wandb_logger = WandbLogger(
    project="NLP_Sentiment",
    log_model=True,
    save_dir=log_path,
    name='Sentiment_lstm_mean',
    id="lqlhv3sv",
    resume=True,
)

In [None]:
#define trainer and train model
trainer = L.Trainer(
    max_epochs=100,
    callbacks=lstm_mean_checkpoint_callback +  [log_callback],
    logger=wandb_logger,
    log_every_n_steps=len(train_dataloader),
    check_val_every_n_epoch=1,
)
last_checkpoint = 'result/lstm_mean_checkpoints/Sentiment_last.ckpt'
if not os.path.exists(last_checkpoint):
    last_checkpoint = None
# last_checkpoint=None
trainer.fit(lstm_mean_model, train_dataloader, test_dataloader, ckpt_path=last_checkpoint)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
[34m[1mwandb[0m: Currently logged in as: [33mhontrn9122[0m. Use [1m`wandb login --relogin`[0m to force relogin


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2.6 M 
2 | lstm_1    | LSTMCell         | 328 K 
3 | dropout   | Dropout          | 0     
4 | dense_0   | Linear           | 387   
-----------------------------------------------
3.0 M     Trainable params
0         Non-trainable params
3.0 M     Total params
11.819    Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2.6 M 
2 | lstm_1    | LSTMCell         | 328 K 
3 | dropout   | Dropout          | 0     
4 | dense_0   | Linear           | 387   
------------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=100` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=100` reached.


In [None]:
wandb.finish()

VBox(children=(Label(value='33.833 MB of 33.845 MB uploaded\r'), FloatProgress(value=0.9996654718858312, max=1…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▂▃▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇██████████
train_loss,██▇▆▆▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▃▁▁▃▅▄▅▆▆▆▆▆▆▆▆▇▆▅▇▇▆▇▇▇▇▇██▇▇█████▇██▇▅
val_loss,██▇▆▅▅▄▄▄▄▃▄▃▃▃▂▃▃▂▂▂▂▂▂▂▂▁▁▂▂▂▁▁▁▁▁▁▁▁▂

0,1
epoch,99.0
train_acc,0.92926
train_loss,0.66713
trainer/global_step,999.0
val_acc,0.61538
val_loss,0.89605


### 3.3 LSTM Last

In [None]:
# wandb.util.generate_id()

In [None]:
lstm_last_model = LSTMLast()
lstm_last_model.set_optimizer(torch.optim.Adam(lstm_last_model.parameters(), lr=lr))

In [None]:
# Define model callbacks

# Checkpoints
from lightning.pytorch.callbacks import ModelCheckpoint

lstm_last_checkpoint_path = os.path.join(SAVE_PATH, 'lstm_last_checkpoints')
if not os.path.exists(lstm_last_checkpoint_path):
    os.makedirs(lstm_last_checkpoint_path, exist_ok=True)

lstm_last_checkpoint_callback = [
    ModelCheckpoint(
        monitor=None,
        dirpath=lstm_last_checkpoint_path,
        filename=f'Sentiment_last',
        every_n_epochs=1,
        save_on_train_epoch_end=True,
    ) # save the last checkpoint for continuing training
]

for target in optim_targets:
    lstm_last_checkpoint_callback.append(ModelCheckpoint(
        monitor=target,
        dirpath=lstm_last_checkpoint_path,
        filename=f'Sentiment-{{epoch:02d}}-{{{target}:.2f}}',
        save_top_k=3,
        save_last=False,
        mode='min' if 'loss' in target else 'max',
        every_n_epochs=2,
        save_on_train_epoch_end=False,
        # every_n_train_steps=250,
    ))


# Log result
class LogCallback(L.Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        pl_module.log_val_metrics()

    def on_train_epoch_end(self, trainer, pl_module):
        pl_module.log_train_metrics()

log_callback = LogCallback()

# wandb logger
from lightning.pytorch.loggers import WandbLogger
wandb_logger = WandbLogger(
    project="NLP_Sentiment",
    log_model=True,
    save_dir=log_path,
    name='Sentiment_lstm_last',
    id="bkjk9w07",
    resume=True,
)

In [None]:
#define trainer and train model
trainer = L.Trainer(
    max_epochs=100,
    callbacks=lstm_last_checkpoint_callback +  [log_callback],
    logger=wandb_logger,
    log_every_n_steps=len(train_dataloader),
    check_val_every_n_epoch=1,
)
last_checkpoint = 'result/lstm_last_checkpoints/Sentiment_last.ckpt'
if not os.path.exists(last_checkpoint):
    last_checkpoint = None
# last_checkpoint=None
trainer.fit(lstm_last_model, train_dataloader, test_dataloader, ckpt_path=last_checkpoint)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2.6 M 
2 | lstm_1    | LSTMCell         | 328 K 
3 | dropout   | Dropout          | 0     
4 | dense_0   | Linear           | 387   
-----------------------------------------------
3.0 M     Trainable params
0         Non-trainable params
3.0 M     Total params
11.819    Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2.6 M 
2 | lstm_1    | LSTMCell         | 328 K 
3 | dropout   | Dropout          | 0     
4 | dense_0   | Linear           | 387   
------------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=100` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=100` reached.


In [None]:
wandb.finish()

VBox(children=(Label(value='33.833 MB of 33.833 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▇▃▇▇██▇██▅▇█▄▁██▃███▇███▇██▄███▅██████▅█
train_loss,█▆▅▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▇▁▅▇▂▄▄▄▅▆▄▅▅▅▄▆▅▄▆▅▃▆▂▅▆█▄▅▃▇▅▅▄▅▄▅▅▄▆▅

0,1
epoch,99.0
train_acc,0.33729
train_loss,1.09628
trainer/global_step,999.0
val_acc,0.22786
val_loss,1.10064


### 3.4 CNN2LSTM

In [None]:
# wandb.util.generate_id()

In [None]:
cnn2lstm_model = CNN2LSTM()
cnn2lstm_model.set_optimizer(torch.optim.Adam(cnn2lstm_model.parameters(), lr=lr))

In [None]:
# Define model callbacks

# Checkpoints
from lightning.pytorch.callbacks import ModelCheckpoint

cnn2lstm_checkpoint_path = os.path.join(SAVE_PATH, 'cnn2lstm_checkpoints')
if not os.path.exists(cnn2lstm_checkpoint_path):
    os.makedirs(cnn2lstm_checkpoint_path, exist_ok=True)

cnn2lstm_checkpoint_callback = [
    ModelCheckpoint(
        monitor=None,
        dirpath=cnn2lstm_checkpoint_path,
        filename=f'Sentiment_last',
        every_n_epochs=1,
        save_on_train_epoch_end=True,
    ) # save the last checkpoint for continuing training
]

for target in optim_targets:
    cnn2lstm_checkpoint_callback.append(ModelCheckpoint(
        monitor=target,
        dirpath=cnn2lstm_checkpoint_path,
        filename=f'Sentiment-{{epoch:02d}}-{{{target}:.2f}}',
        save_top_k=3,
        save_last=False,
        mode='min' if 'loss' in target else 'max',
        every_n_epochs=2,
        save_on_train_epoch_end=False,
        # every_n_train_steps=250,
    ))


# Log result
class LogCallback(L.Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        pl_module.log_val_metrics()

    def on_train_epoch_end(self, trainer, pl_module):
        pl_module.log_train_metrics()

log_callback = LogCallback()

# wandb logger
from lightning.pytorch.loggers import WandbLogger
wandb_logger = WandbLogger(
    project="NLP_Sentiment",
    log_model=True,
    save_dir=log_path,
    name='Sentiment_cnn2lstm',
    id="qc8ia5fr",
    resume=True,
)

In [None]:
#define trainer and train model
trainer = L.Trainer(
    max_epochs=100,
    callbacks=cnn2lstm_checkpoint_callback +  [log_callback],
    logger=wandb_logger,
    log_every_n_steps=len(train_dataloader),
    check_val_every_n_epoch=1,
)
last_checkpoint = 'result/cnn2lstm_checkpoints/Sentiment_last.ckpt'
if not os.path.exists(last_checkpoint):
    last_checkpoint = None
# last_checkpoint=None
trainer.fit(cnn2lstm_model, train_dataloader, test_dataloader, ckpt_path=last_checkpoint)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type             | Params
-----------------------------------------------
0 |

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=100` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=100` reached.


In [None]:
wandb.finish()

VBox(children=(Label(value='38.343 MB of 38.343 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▃▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████▇█████████
train_loss,██▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▄▁▆▆▆▇▆▅▆▇█▆▆█▇▇▇▇▇▇▇██████▇██▆█▇███████
val_loss,██▆▅▅▄▅▅▄▄▃▃▄▂▃▃▃▃▂▂▂▂▂▁▂▁▂▂▂▁▃▁▂▂▂▂▂▂▂▂

0,1
epoch,99.0
train_acc,0.96416
train_loss,0.60786
trainer/global_step,999.0
val_acc,0.69296
val_loss,0.86833


### 3.5 LSTM2CNN

In [None]:
# wandb.util.generate_id()

In [None]:
lstm2cnn_model = LSTM2CNN()
lstm2cnn_model.set_optimizer(torch.optim.Adam(lstm2cnn_model.parameters(), lr=lr))

In [None]:
# Define model callbacks

# Checkpoints
from lightning.pytorch.callbacks import ModelCheckpoint

lstm2cnn_checkpoint_path = os.path.join(SAVE_PATH, 'lstm2cnn_checkpoints')
if not os.path.exists(lstm2cnn_checkpoint_path):
    os.makedirs(lstm2cnn_checkpoint_path, exist_ok=True)

lstm2cnn_checkpoint_callback = [
    ModelCheckpoint(
        monitor=None,
        dirpath=lstm2cnn_checkpoint_path,
        filename=f'Sentiment_last',
        every_n_epochs=1,
        save_on_train_epoch_end=True,
    ) # save the last checkpoint for continuing training
]

for target in optim_targets:
    lstm2cnn_checkpoint_callback.append(ModelCheckpoint(
        monitor=target,
        dirpath=lstm2cnn_checkpoint_path,
        filename=f'Sentiment-{{epoch:02d}}-{{{target}:.2f}}',
        save_top_k=3,
        save_last=False,
        mode='min' if 'loss' in target else 'max',
        every_n_epochs=2,
        save_on_train_epoch_end=False,
        # every_n_train_steps=250,
    ))


# Log result
class LogCallback(L.Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        pl_module.log_val_metrics()

    def on_train_epoch_end(self, trainer, pl_module):
        pl_module.log_train_metrics()

log_callback = LogCallback()

# wandb logger
from lightning.pytorch.loggers import WandbLogger
wandb_logger = WandbLogger(
    project="NLP_Sentiment",
    log_model=True,
    save_dir=log_path,
    name='Sentiment_lstm2cnn',
    id="12qhdg4l",
    resume=True,
)

In [None]:
#define trainer and train model
trainer = L.Trainer(
    max_epochs=100,
    callbacks=lstm2cnn_checkpoint_callback +  [log_callback],
    logger=wandb_logger,
    log_every_n_steps=len(train_dataloader),
    check_val_every_n_epoch=1,
)
last_checkpoint = 'result/lstm2cnn_checkpoints/Sentiment_last.ckpt'
if not os.path.exists(last_checkpoint):
    last_checkpoint = None
# last_checkpoint=None
trainer.fit(lstm2cnn_model, train_dataloader, test_dataloader, ckpt_path=last_checkpoint)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2.6 M 
2 | lstm_1    | LSTMCell         | 328 K 
3 | conv_0    | Sequential       | 49.3 K
4 | conv_1    | Sequential       | 65.7 K
5 | conv_2    | Sequential       | 82.0 K
6 | flatten   | Flatten          | 0     
7 | dropout   | Dropout          | 0     
8 | dense_0   | Linear           | 1.2 K 
-----------------------------------------------
3.2 M     Trainable params
0         Non-trainable params
3.2 M     Total params
12.610    Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  return F.conv1d(input, weight, bias, self.stride,


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=100` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=100` reached.


In [None]:
wandb.finish()

VBox(children=(Label(value='36.104 MB of 36.104 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▅▆▆▆▇▇▇▇▇▇▇████████████████████████████
train_loss,█▅▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▂▂▃▅▆▃▃▁▅▅▂▅▄▄▄▅▄█▅▄▄▃▅▃▃▆▄▃▄▄▂▄▄▂▄▄▃▄▃▃
val_loss,█▃▃▁▁▂▃▃▂▂▃▂▂▂▃▂▂▁▂▂▃▂▂▃▃▁▂▃▂▃▃▂▃▃▂▃▃▂▃▃

0,1
epoch,99.0
train_acc,0.91632
train_loss,0.63454
trainer/global_step,999.0
val_acc,0.66602
val_loss,0.87727


### 3.6 LSTM_CNN_dual

In [None]:
# wandb.util.generate_id()

In [None]:
lstm_cnn_dual_model = LSTM_CNN_dual()
lstm_cnn_dual_model.set_optimizer(torch.optim.Adam(lstm_cnn_dual_model.parameters(), lr=lr))

In [None]:
# Define model callbacks

# Checkpoints
from lightning.pytorch.callbacks import ModelCheckpoint

lstm_cnn_dual_checkpoint_path = os.path.join(SAVE_PATH, 'lstm_cnn_dual_checkpoints')
if not os.path.exists(lstm_cnn_dual_checkpoint_path):
    os.makedirs(lstm_cnn_dual_checkpoint_path, exist_ok=True)

lstm_cnn_dual_checkpoint_callback = [
    ModelCheckpoint(
        monitor=None,
        dirpath=lstm_cnn_dual_checkpoint_path,
        filename=f'Sentiment_last',
        every_n_epochs=1,
        save_on_train_epoch_end=True,
    ) # save the last checkpoint for continuing training
]

for target in optim_targets:
    lstm_cnn_dual_checkpoint_callback.append(ModelCheckpoint(
        monitor=target,
        dirpath=lstm_cnn_dual_checkpoint_path,
        filename=f'Sentiment-{{epoch:02d}}-{{{target}:.2f}}',
        save_top_k=3,
        save_last=False,
        mode='min' if 'loss' in target else 'max',
        every_n_epochs=2,
        save_on_train_epoch_end=False,
        # every_n_train_steps=250,
    ))


# Log result
class LogCallback(L.Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        pl_module.log_val_metrics()

    def on_train_epoch_end(self, trainer, pl_module):
        pl_module.log_train_metrics()

log_callback = LogCallback()

# wandb logger
from lightning.pytorch.loggers import WandbLogger
wandb_logger = WandbLogger(
    project="NLP_Sentiment",
    log_model=True,
    save_dir=log_path,
    name='Sentiment_lstm_cnn_dual',
    id="2vwus6kq",
    resume=True,
)

In [None]:
#define trainer and train model
trainer = L.Trainer(
    max_epochs=100,
    callbacks=lstm_cnn_dual_checkpoint_callback +  [log_callback],
    logger=wandb_logger,
    log_every_n_steps=len(train_dataloader),
    check_val_every_n_epoch=1,
)
last_checkpoint = 'result/lstm_cnn_dual_checkpoints/Sentiment_last.ckpt'
if not os.path.exists(last_checkpoint):
    last_checkpoint = None
# last_checkpoint=None
trainer.fit(lstm_cnn_dual_model, train_dataloader, test_dataloader, ckpt_path=last_checkpoint)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2.6 M 
2 | lstm_1    | LSTMCell         | 328 K 
3 | conv_0    | Sequential       | 295 K 
4 | conv_1    | Sequential       | 393 K 
5 | conv_2    | Sequential       | 491 K 
6 | flatten   | Flatten          | 0     
7 | dropout   | Dropout          | 0     
8 | dense_0   | Linear           | 1.5 K 
-----------------------------------------------
4.1 M     Trainable params
0         Non-trainable params
4.1 M     Total params
16.543    Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | lstm_0    | LSTMCell         | 2

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=100` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=100` reached.


In [None]:
wandb.finish()

VBox(children=(Label(value='47.358 MB of 47.358 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▄▆▆▇▇▇█████████████████████████████████
train_loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▃█▅▄▇▆▆▅▆▆▅▅▅▅▅▅▄▄▆▄▄▄▅▃▆▄▆▄▄▃▅▃▄▄▄▄▃▃▅
val_loss,█▃▁▂▃▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▃▁▂▂▃▂▂▂▃▂▂▂▃▂

0,1
epoch,99.0
train_acc,0.99099
train_loss,0.56055
trainer/global_step,999.0
val_acc,0.7175
val_loss,0.82485


## 4 Backup data

In [None]:
shutil.make_archive('result', 'zip', 'result')
shutil.move('result.zip', os.path.join(DRIVE_PATH, 'result.zip'))

'drive/MyDrive/HCMUT/NLP/asm/result.zip'