In [1]:
from google.colab import drive
import zipfile
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import nltk
from collections import Counter
import time
from torch.cuda.amp import autocast, GradScaler

In [2]:
drive.mount('/content/drive')
df = pd.read_csv('/content/drive/My Drive/AIL721_A3/Datasets/TrainData.csv')
sentences = df['Text'].tolist()

Mounted at /content/drive


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
torch.backends.cudnn.benchmark = True

cuda


In [4]:
EMBEDDING_DIM = 300
BATCH_SIZE = 1026
EPOCHS = 30
LEARNING_RATE = 0.001
WINDOW_SIZE = 2

In [10]:
nltk.download('punkt_tab')
nltk.download('punkt')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [11]:
tokenized = [nltk.word_tokenize(sentence.lower()) for sentence in sentences]

words = [word for sentence in tokenized for word in sentence]
word_counts = Counter(words)
vocab = {'<PAD>': 0, '<UNK>': 1}
vocab.update({word: i+2 for i, (word, _) in enumerate(word_counts.items())})
vocab_size = len(vocab)


In [12]:
def compute_pairs(tokenized, WINDOW_SIZE, vocab):
    contexts = []
    targets = []
    for sentence in tokenized:
        for i in range(len(sentence)):
            start = max(0, i - WINDOW_SIZE)
            end = min(len(sentence), i + WINDOW_SIZE + 1)
            context = sentence[start:i] + sentence[i+1:end]
            context = context + ['<PAD>'] * (2 * WINDOW_SIZE - len(context))
            contexts.append([vocab.get(w, 1) for w in context])
            targets.append(vocab.get(sentence[i], 1))
    return torch.LongTensor(contexts), torch.LongTensor(targets)

contexts, targets = compute_pairs(tokenized, WINDOW_SIZE, vocab)



In [13]:
dataset = TensorDataset(contexts, targets)
dataloader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    persistent_workers=True
)

class CBOW(nn.Module):
    def __init__(self, v, e):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(v, e)
        self.linear = nn.Linear(e, v)

    def forward(self, contexts):
        embeds = self.embeddings(contexts)
        embedcontexts = torch.mean(embeds, dim=1)
        out = self.linear(embedcontexts)
        # print(contexts.shape, embeds.shape, embedcontexts.shape, out.shape)
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = CBOW(vocab_size, EMBEDDING_DIM).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)




cuda


In [None]:
for epoch in range(EPOCHS):
    s = time.time()
    total_loss = 0

    for contexts, targets in dataloader:
        contexts = contexts.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)
        optimizer.zero_grad()
        outputs = model(contexts)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"epoch {epoch} loss: {total_loss:.4f} time: {time.time() - s:.2f}s")
embeddings = model.embeddings.weight.data.cpu().numpy()



epoch 0 loss: 4383.8292 time: 18.55s
epoch 1 loss: 3546.0027 time: 17.14s
epoch 2 loss: 3210.1357 time: 17.91s
epoch 3 loss: 2960.9658 time: 17.85s
epoch 4 loss: 2759.2934 time: 17.16s
epoch 5 loss: 2591.5428 time: 18.47s
epoch 6 loss: 2451.6125 time: 19.79s
epoch 7 loss: 2334.5250 time: 18.42s
epoch 8 loss: 2234.1646 time: 17.57s
epoch 9 loss: 2145.9135 time: 17.73s
epoch 10 loss: 2066.9322 time: 18.13s
epoch 11 loss: 1995.5025 time: 18.25s
epoch 12 loss: 1930.3008 time: 18.36s
epoch 13 loss: 1870.0006 time: 17.95s
epoch 14 loss: 1814.4887 time: 17.74s
epoch 15 loss: 1762.7629 time: 20.14s
epoch 16 loss: 1714.5636 time: 17.92s
epoch 17 loss: 1669.3513 time: 19.31s
epoch 18 loss: 1627.0044 time: 17.46s
epoch 19 loss: 1587.2834 time: 19.78s
epoch 20 loss: 1549.9673 time: 18.06s
epoch 21 loss: 1514.8117 time: 18.54s
epoch 22 loss: 1481.8163 time: 17.41s
epoch 23 loss: 1450.6090 time: 17.50s
epoch 24 loss: 1421.1054 time: 17.12s
epoch 25 loss: 1393.3914 time: 17.73s
epoch 26 loss: 1367.08

In [14]:
os.makedirs('/content/drive/My Drive/AIL721_A3/saved/', exist_ok=True)
torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/saved/cbow_model.pth')
torch.save(vocab, '/content/drive/My Drive/AIL721_A3/saved/vocab.pth')
embeddings = model.embeddings.weight.data.cpu().numpy()
np.save('/content/drive/My Drive/AIL721_A3/saved/embeddings.npy', embeddings)


In [16]:
vocab = torch.load('/content/drive/My Drive/AIL721_A3/saved/vocab.pth')

loaded_model = CBOW(len(vocab), EMBEDDING_DIM).to(device)

loaded_model.load_state_dict(torch.load('/content/drive/My Drive/AIL721_A3/saved/cbow_model.pth'))
loaded_model.eval()

len(vocab)
embedding_matrix = np.load('/content/drive/My Drive/AIL721_A3/saved/embeddings.npy')


In [17]:
EMBEDDING_DIM = embedding_matrix.shape[1]
NUM_CLASSES = len(df['Category'].unique())
BATCH_SIZE = 64
EPOCHS = 10
LSTM_HIDDEN_DIM = 128
DENSE_EMBED_DIM = 128
CONV_OUT_DIM = 100
FUSION_DIM = 256


In [18]:
tokenized = [nltk.word_tokenize(text.lower()) for text in df['Text']]
sequences = [[vocab.get(word, 1) for word in seq] for seq in tokenized]

len(sequences)


1490

In [19]:
MAX_LEN = max(len(sequence) for sequence in sequences)
MAX_LEN


3496

In [20]:
padded = torch.zeros((len(sequences), MAX_LEN), dtype=torch.long)
for i, seq in enumerate(sequences):
    length = min(len(seq), MAX_LEN)
    padded[i,:length] = torch.tensor(seq[:length])


In [21]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
labels = le.fit_transform(df['Category'])
labels = torch.tensor(labels, dtype=torch.long)


In [22]:
class NewsDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]


In [23]:
from sklearn.model_selection import train_test_split

train_sequences, test_sequences, train_labels, test_labels = train_test_split(
    padded, labels, test_size=0.2, random_state=42, stratify=labels
)



In [24]:
dataset_train = NewsDataset(train_sequences, train_labels)
dataset_test = NewsDataset(test_sequences, test_labels)

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)


# NOTE: I had to run the following model again while I was cleaning this file, and the new test set accuracy is 97 instead of 94, I think its due to randomness so wont be utilizing this fact in the report

In [None]:
class CLSTM_WE(nn.Module):
    def __init__(self, embedding_matrix, num_classes):
        super().__init__()

        self.v, self.e = embedding_matrix.shape

        self.embedding = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix),
            freeze=False
        )
        self.conv3 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=3)
        self.conv4 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=4)
        self.conv5 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=5)
        self.cnn_dropout = nn.Dropout(0.5)

        self.lstm = nn.LSTM(self.e, LSTM_HIDDEN_DIM, bidirectional=False, batch_first=True)
        self.lstm_dropout = nn.Dropout(0.5)

        self.embed_dense = nn.Linear(self.e, DENSE_EMBED_DIM)

        self.fc1 = nn.Linear(CONV_OUT_DIM*3 + LSTM_HIDDEN_DIM + DENSE_EMBED_DIM, FUSION_DIM)
        self.fc2 = nn.Linear(FUSION_DIM, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):


        x_embed = self.embedding(x)

        x_cnn = x_embed.permute(0, 2, 1)
        c3 = torch.relu(self.conv3(x_cnn)).max(dim=2)[0]
        c4 = torch.relu(self.conv4(x_cnn)).max(dim=2)[0]
        c5 = torch.relu(self.conv5(x_cnn)).max(dim=2)[0]
        cnn_out = torch.cat([c3, c4, c5], dim=1)
        cnn_out = self.cnn_dropout(cnn_out)

        lstm_out, _ = self.lstm(x_embed)
        lstm_out = lstm_out.mean(dim=1)
        lstm_out = self.lstm_dropout(lstm_out)

        embed_out = x_embed.mean(dim=1)
        embed_out = self.embed_dense(embed_out)

        combined = torch.cat([cnn_out, lstm_out, embed_out], dim=1)
        out = self.dropout(torch.relu(self.fc1(combined)))
        out = self.fc2(out)

        return out


In [None]:
model = CLSTM_WE(embedding_matrix, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
import time
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CLSTM_WE.pth')


epoch 0, loss: 1.6896 time: 7.10 acc: 0.2307
epoch 1, loss: 1.3923 time: 7.23 acc: 0.3859
epoch 2, loss: 1.0026 time: 7.28 acc: 0.6166
epoch 3, loss: 0.6660 time: 7.10 acc: 0.7592
epoch 4, loss: 0.4679 time: 6.91 acc: 0.8272
epoch 5, loss: 0.2694 time: 6.80 acc: 0.9102
epoch 6, loss: 0.2267 time: 6.74 acc: 0.9270
epoch 7, loss: 0.1430 time: 6.69 acc: 0.9505
epoch 8, loss: 0.1033 time: 6.67 acc: 0.9698
epoch 9, loss: 0.0839 time: 6.70 acc: 0.9757
Test Accuracy: 0.9765


In [25]:
ATTN_DIM = 100

class CBiLSTM_WE_SA(nn.Module):
    def __init__(self, embedding_matrix, num_classes):
        super().__init__()
        self.v, self.e = embedding_matrix.shape

        self.embedding = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix),
            freeze=False
        )

        self.conv3 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=3)
        self.conv4 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=4)
        self.conv5 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=5)
        self.cnn_dropout = nn.Dropout(0.5)

        self.lstm = nn.LSTM(self.e, LSTM_HIDDEN_DIM, bidirectional=True, batch_first=True)
        self.lstm_dropout = nn.Dropout(0.5)

        self.embed_dense = nn.Linear(self.e, DENSE_EMBED_DIM)

        self.attn_dim = ATTN_DIM
        self.proj_cnn = nn.Linear(CONV_OUT_DIM * 3, self.attn_dim)
        self.proj_lstm = nn.Linear(2 * LSTM_HIDDEN_DIM, self.attn_dim)
        self.proj_embed = nn.Linear(DENSE_EMBED_DIM, self.attn_dim)

        self.self_attn = nn.MultiheadAttention(embed_dim=self.attn_dim, num_heads=4)

        self.fc2 = nn.Linear(self.attn_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x_embed = self.embedding(x)

        x_cnn = x_embed.permute(0, 2, 1)
        c3 = torch.relu(self.conv3(x_cnn)).max(dim=2)[0]
        c4 = torch.relu(self.conv4(x_cnn)).max(dim=2)[0]
        c5 = torch.relu(self.conv5(x_cnn)).max(dim=2)[0]
        cnn_out = torch.cat([c3, c4, c5], dim=1)
        cnn_out = self.cnn_dropout(cnn_out)

        lstm_out, _ = self.lstm(x_embed)
        lstm_out = lstm_out.mean(dim=1)
        lstm_out = self.lstm_dropout(lstm_out)

        embed_out = x_embed.mean(dim=1)
        embed_out = self.embed_dense(embed_out)

        cnn_token = self.proj_cnn(cnn_out)
        lstm_token = self.proj_lstm(lstm_out)
        embed_token = self.proj_embed(embed_out)

        tokens = torch.stack([cnn_token, lstm_token, embed_token], dim=1)

        tokens = tokens.permute(1, 0, 2)
        attn_output, _ = self.self_attn(tokens, tokens, tokens)

        aggregated = attn_output.mean(dim=0)
        out = self.dropout(aggregated)
        out = self.fc2(out)

        return out

In [None]:
dataset_train = NewsDataset(train_sequences, train_labels)
dataset_test = NewsDataset(test_sequences, test_labels)

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

model = CBiLSTM_WE_SA(embedding_matrix, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

import time
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CBiLSTM_WE_SA.pth')


epoch 0, loss: 1.5852 time: 18.56 acc: 0.2693
epoch 1, loss: 1.1371 time: 8.32 acc: 0.5487
epoch 2, loss: 0.6258 time: 8.34 acc: 0.7718
epoch 3, loss: 0.3503 time: 8.42 acc: 0.8867
epoch 4, loss: 0.1969 time: 8.51 acc: 0.9295
epoch 5, loss: 0.1552 time: 8.49 acc: 0.9446
epoch 6, loss: 0.1226 time: 8.43 acc: 0.9622
epoch 7, loss: 0.1016 time: 8.36 acc: 0.9622
epoch 8, loss: 0.0770 time: 8.31 acc: 0.9757
epoch 9, loss: 0.0438 time: 8.37 acc: 0.9815
Test Accuracy: 0.9698


In [None]:
!pip install gensim



In [None]:
import gensim

In [None]:
import gensim.downloader as api
import numpy as np

model = api.load("word2vec-google-news-300")

embedding_dim = model.vector_size

embedding_matrix2 = np.zeros((len(vocab) + 1, embedding_dim))

for word, idx in vocab.items():
    if word in model.key_to_index:
        embedding_matrix2[idx] = model[word]
    else:
        embedding_matrix2[idx] = np.random.normal(scale=0.6, size=(embedding_dim, ))

print(embedding_matrix2.shape)

Embedding matrix shape: (28181, 300)


In [None]:
class CBiLSTM_WE_ME(nn.Module):
    def __init__(self, embedding_matrix1, embedding_matrix2, num_classes):
        super().__init__()
        self.v1, self.e = embedding_matrix1.shape
        self.v2, _ = embedding_matrix2.shape

        self.embedding1 = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix1),
            freeze=False
        )
        self.embedding2 = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix2),
            freeze=False
        )

        self.gate1 = nn.Linear(self.e, 1)
        self.gate2 = nn.Linear(self.e, 1)

        self.conv3 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=3)
        self.conv4 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=4)
        self.conv5 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=5)
        self.cnn_dropout = nn.Dropout(0.5)

        self.lstm = nn.LSTM(self.e, LSTM_HIDDEN_DIM, bidirectional=True, batch_first=True)
        self.lstm_dropout = nn.Dropout(0.5)

        self.embed_dense = nn.Linear(self.e, DENSE_EMBED_DIM)

        self.fc1 = nn.Linear(CONV_OUT_DIM * 3 + 2 * LSTM_HIDDEN_DIM + DENSE_EMBED_DIM, FUSION_DIM)
        self.fc2 = nn.Linear(FUSION_DIM, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):

        emb1 = self.embedding1(x)
        emb2 = self.embedding2(x)

        gate1 = self.gate1(emb1)
        gate2 = self.gate2(emb2)
        gates = torch.cat([gate1, gate2], dim=2)
        weights = torch.softmax(gates, dim=2)

        meta_emb = weights[:, :, 0:1] * emb1 + weights[:, :, 1:2] * emb2

        x_cnn = meta_emb.permute(0, 2, 1)
        c3 = torch.relu(self.conv3(x_cnn)).max(dim=2)[0]
        c4 = torch.relu(self.conv4(x_cnn)).max(dim=2)[0]
        c5 = torch.relu(self.conv5(x_cnn)).max(dim=2)[0]
        cnn_out = torch.cat([c3, c4, c5], dim=1)
        cnn_out = self.cnn_dropout(cnn_out)

        lstm_out, _ = self.lstm(meta_emb)
        lstm_out = lstm_out.mean(dim=1)
        lstm_out = self.lstm_dropout(lstm_out)

        embed_out = meta_emb.mean(dim=1)
        embed_out = self.embed_dense(embed_out)

        combined = torch.cat([cnn_out, lstm_out, embed_out], dim=1)
        out = self.dropout(torch.relu(self.fc1(combined)))
        out = self.fc2(out)
        return out

In [None]:
dataset_train = NewsDataset(train_sequences, train_labels)
dataset_test = NewsDataset(test_sequences, test_labels)

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

model = CBiLSTM_WE_ME(embedding_matrix, embedding_matrix2, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

import time
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CBiLSTM_WE_ME.pth')


epoch 0, loss: 1.6233 time: 20.19 acc: 0.2483
epoch 1, loss: 1.3379 time: 9.27 acc: 0.4530
epoch 2, loss: 0.8365 time: 9.37 acc: 0.7039
epoch 3, loss: 0.5183 time: 9.43 acc: 0.8129
epoch 4, loss: 0.2948 time: 9.43 acc: 0.9086
epoch 5, loss: 0.1840 time: 9.41 acc: 0.9471
epoch 6, loss: 0.1272 time: 9.37 acc: 0.9513
epoch 7, loss: 0.0956 time: 9.31 acc: 0.9706
epoch 8, loss: 0.0681 time: 9.28 acc: 0.9782
epoch 9, loss: 0.0503 time: 9.29 acc: 0.9883
Test Accuracy: 0.9631


In [None]:
class CLSTM_WE_ME_SA(nn.Module):
    def __init__(self, embedding_matrix1, embedding_matrix2, num_classes):
        super().__init__()

        self.v1, self.e = embedding_matrix1.shape
        self.v2, _ = embedding_matrix2.shape

        self.embedding1 = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix1), freeze=False
        )
        self.embedding2 = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix2), freeze=False
        )
        self.gate1 = nn.Linear(self.e, 1)
        self.gate2 = nn.Linear(self.e, 1)

        self.conv3 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=3)
        self.conv4 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=4)
        self.conv5 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=5)
        self.cnn_dropout = nn.Dropout(0.5)

        self.lstm = nn.LSTM(self.e, LSTM_HIDDEN_DIM, bidirectional=True, batch_first=True)
        self.lstm_dropout = nn.Dropout(0.5)

        self.embed_dense = nn.Linear(self.e, DENSE_EMBED_DIM)

        self.attn_dim = FUSION_DIM
        self.proj_cnn = nn.Linear(CONV_OUT_DIM * 3, self.attn_dim)
        self.proj_lstm = nn.Linear(2 * LSTM_HIDDEN_DIM, self.attn_dim)
        self.proj_embed = nn.Linear(DENSE_EMBED_DIM, self.attn_dim)

        self.self_attn = nn.MultiheadAttention(embed_dim=self.attn_dim, num_heads=4)

        self.fc2 = nn.Linear(self.attn_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):

        emb1 = self.embedding1(x)
        emb2 = self.embedding2(x)

        gate1 = self.gate1(emb1)
        gate2 = self.gate2(emb2)
        gates = torch.cat([gate1, gate2], dim=2)
        weights = torch.softmax(gates, dim=2)
        meta_emb = weights[:, :, 0:1] * emb1 + weights[:, :, 1:2] * emb2

        x_cnn = meta_emb.permute(0, 2, 1)
        c3 = torch.relu(self.conv3(x_cnn)).max(dim=2)[0]
        c4 = torch.relu(self.conv4(x_cnn)).max(dim=2)[0]
        c5 = torch.relu(self.conv5(x_cnn)).max(dim=2)[0]
        cnn_out = torch.cat([c3, c4, c5], dim=1)
        cnn_out = self.cnn_dropout(cnn_out)

        lstm_out, _ = self.lstm(meta_emb)
        lstm_out = lstm_out.mean(dim=1)
        lstm_out = self.lstm_dropout(lstm_out)

        embed_out = meta_emb.mean(dim=1)
        embed_out = self.embed_dense(embed_out)

        cnn_token = self.proj_cnn(cnn_out)
        lstm_token = self.proj_lstm(lstm_out)
        embed_token = self.proj_embed(embed_out)

        tokens = torch.stack([cnn_token, lstm_token, embed_token], dim=1)
        tokens = tokens.permute(1, 0, 2)

        attn_output, _ = self.self_attn(tokens, tokens, tokens)
        aggregated = attn_output.mean(dim=0)

        out = self.dropout(aggregated)
        out = self.fc2(out)
        return out

In [None]:
dataset_train = NewsDataset(train_sequences, train_labels)
dataset_test = NewsDataset(test_sequences, test_labels)

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

model = CLSTM_WE_ME_SA(embedding_matrix, embedding_matrix2, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

import time
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CLSTM_WE_ME_SA.pth')


epoch 0, loss: 1.5235 time: 20.74 acc: 0.2978
epoch 1, loss: 0.7981 time: 9.77 acc: 0.6988
epoch 2, loss: 0.3563 time: 9.80 acc: 0.8876
epoch 3, loss: 0.2082 time: 10.00 acc: 0.9413
epoch 4, loss: 0.1252 time: 10.10 acc: 0.9488
epoch 5, loss: 0.1137 time: 10.10 acc: 0.9555
epoch 6, loss: 0.1002 time: 9.98 acc: 0.9715
epoch 7, loss: 0.0872 time: 9.90 acc: 0.9757
epoch 8, loss: 0.0540 time: 9.82 acc: 0.9849
epoch 9, loss: 0.0586 time: 9.85 acc: 0.9773
Test Accuracy: 0.9564


In [None]:
import time
for epoch in range(1):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CLSTM_WE_ME_SA.pth')


epoch 0, loss: 0.0481 time: 9.85 acc: 0.9891
Test Accuracy: 0.9631


In [None]:
ATTN_DIM = 100

class CBiLSTM_WE_SA_T(nn.Module):
    def __init__(self, embedding_matrix, num_classes):
        super().__init__()
        self.v, self.e = embedding_matrix.shape

        self.embedding = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix),
            freeze=False
        )

        T_enc = nn.TransformerEncoderLayer(
            d_model=self.e,
            nhead=4,
            dim_feedforward=2048
        )
        self.trans_enc = nn.TransformerEncoder(T_enc, num_layers=1)

        self.conv3 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=3)
        self.conv4 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=4)
        self.conv5 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=5)
        self.cnn_dropout = nn.Dropout(0.5)

        self.lstm = nn.LSTM(self.e, LSTM_HIDDEN_DIM, bidirectional=True, batch_first=True)
        self.lstm_dropout = nn.Dropout(0.5)

        self.embed_dense = nn.Linear(self.e, DENSE_EMBED_DIM)

        self.attn_dim = ATTN_DIM
        self.proj_cnn = nn.Linear(CONV_OUT_DIM * 3, self.attn_dim)
        self.proj_lstm = nn.Linear(2 * LSTM_HIDDEN_DIM, self.attn_dim)
        self.proj_embed = nn.Linear(DENSE_EMBED_DIM, self.attn_dim)

        self.self_attn = nn.MultiheadAttention(embed_dim=self.attn_dim, num_heads=4)

        self.fc2 = nn.Linear(self.attn_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x_embed = self.embedding(x)
        x_enc = self.trans_enc(x_embed.permute(1, 0, 2)).permute(1, 0, 2)

        x_cnn = x_enc.permute(0, 2, 1)
        c3 = torch.relu(self.conv3(x_cnn)).max(dim=2)[0]
        c4 = torch.relu(self.conv4(x_cnn)).max(dim=2)[0]
        c5 = torch.relu(self.conv5(x_cnn)).max(dim=2)[0]
        cnn_out = torch.cat([c3, c4, c5], dim=1)
        cnn_out = self.cnn_dropout(cnn_out)

        lstm_out, _ = self.lstm(x_enc)
        lstm_out = lstm_out.mean(dim=1)
        lstm_out = self.lstm_dropout(lstm_out)

        embed_out = x_enc.mean(dim=1)
        embed_out = self.embed_dense(embed_out)

        cnn_token = self.proj_cnn(cnn_out)
        lstm_token = self.proj_lstm(lstm_out)
        embed_token = self.proj_embed(embed_out)

        tokens = torch.stack([cnn_token, lstm_token, embed_token], dim=1)

        tokens = tokens.permute(1, 0, 2)
        attn_output, _ = self.self_attn(tokens, tokens, tokens)

        aggregated = attn_output.mean(dim=0)
        out = self.dropout(aggregated)
        out = self.fc2(out)

        return out

In [None]:
BATCH_SIZE = 32
dataset_train = NewsDataset(train_sequences, train_labels)
dataset_test = NewsDataset(test_sequences, test_labels)

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

model = CBiLSTM_WE_SA_T(embedding_matrix, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

import time
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CBiLSTM_WE_SA_T.pth')




epoch 0, loss: 1.6248 time: 74.92 acc: 0.2156
epoch 1, loss: 1.4452 time: 71.81 acc: 0.3238
epoch 2, loss: 1.0107 time: 71.99 acc: 0.5789
epoch 3, loss: 0.5512 time: 72.18 acc: 0.8037
epoch 4, loss: 0.3083 time: 72.09 acc: 0.9111
epoch 5, loss: 0.1879 time: 72.02 acc: 0.9438
epoch 6, loss: 0.1299 time: 72.13 acc: 0.9681
epoch 7, loss: 0.1397 time: 72.12 acc: 0.9648
epoch 8, loss: 0.1218 time: 71.96 acc: 0.9639
epoch 9, loss: 0.1880 time: 71.90 acc: 0.9614
Test Accuracy: 0.9060


In [None]:
ATTN_DIM = 100

import torch
import torch.nn as nn
import math

class CBiLSTM_WE_SA_T_PE(nn.Module):
    def __init__(self, embedding_matrix, num_classes):
        super().__init__()
        self.v, self.e = embedding_matrix.shape

        self.embedding = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix),
            freeze=False
        )

        position = torch.arange(MAX_LEN).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.e, 2) * (-math.log(10000.0) / self.e))
        pe = torch.zeros(MAX_LEN, 1, self.e)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

        T_enc = nn.TransformerEncoderLayer(
            d_model=self.e,
            nhead=4,
            dim_feedforward=2048
        )
        self.trans_enc = nn.TransformerEncoder(T_enc, num_layers=1)

        self.conv3 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=3)
        self.conv4 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=4)
        self.conv5 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=5)
        self.cnn_dropout = nn.Dropout(0.5)

        self.lstm = nn.LSTM(self.e, LSTM_HIDDEN_DIM, bidirectional=True, batch_first=True)
        self.lstm_dropout = nn.Dropout(0.5)

        self.embed_dense = nn.Linear(self.e, DENSE_EMBED_DIM)

        self.attn_dim = ATTN_DIM
        self.proj_cnn = nn.Linear(CONV_OUT_DIM * 3, self.attn_dim)
        self.proj_lstm = nn.Linear(2 * LSTM_HIDDEN_DIM, self.attn_dim)
        self.proj_embed = nn.Linear(DENSE_EMBED_DIM, self.attn_dim)
        self.self_attn = nn.MultiheadAttention(embed_dim=self.attn_dim, num_heads=4)

        self.fc2 = nn.Linear(self.attn_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x_embed = self.embedding(x)
        x_embed_permuted = x_embed.permute(1, 0, 2)
        seq_len = x_embed_permuted.size(0)

        x_embedpos = x_embed_permuted + self.pe[:seq_len]

        # x_enc = self.trans_enc(x_embedpos.permute(1, 0, 2)).permute(1, 0, 2)
        x_enc = self.trans_enc(x_embedpos)
        x_enc = x_enc.permute(1, 0, 2)

        x_cnn = x_enc.permute(0, 2, 1)
        c3 = torch.relu(self.conv3(x_cnn)).max(dim=2)[0]
        c4 = torch.relu(self.conv4(x_cnn)).max(dim=2)[0]
        c5 = torch.relu(self.conv5(x_cnn)).max(dim=2)[0]
        cnn_out = torch.cat([c3, c4, c5], dim=1)
        cnn_out = self.cnn_dropout(cnn_out)

        lstm_out, _ = self.lstm(x_enc)
        lstm_out = lstm_out.mean(dim=1)
        lstm_out = self.lstm_dropout(lstm_out)

        embed_out = x_enc.mean(dim=1)
        embed_out = self.embed_dense(embed_out)

        cnn_token = self.proj_cnn(cnn_out)
        lstm_token = self.proj_lstm(lstm_out)
        embed_token = self.proj_embed(embed_out)

        tokens = torch.stack([cnn_token, lstm_token, embed_token], dim=1)
        tokens = tokens.permute(1, 0, 2)
        attn_output, _ = self.self_attn(tokens, tokens, tokens)

        aggregated = attn_output.mean(dim=0)
        out = self.dropout(aggregated)
        out = self.fc2(out)

        return out

In [None]:
BATCH_SIZE = 32
dataset_train = NewsDataset(train_sequences, train_labels)
dataset_test = NewsDataset(test_sequences, test_labels)

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

model = CBiLSTM_WE_SA_T_PE(embedding_matrix, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

import time
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CBiLSTM_WE_SA_T_PE.pth')




epoch 0, loss: 1.6185 time: 76.17 acc: 0.2391
epoch 1, loss: 1.3244 time: 74.76 acc: 0.4312
epoch 2, loss: 0.9060 time: 76.02 acc: 0.6602
epoch 3, loss: 0.6128 time: 76.88 acc: 0.7945
epoch 4, loss: 0.4445 time: 77.02 acc: 0.8624
epoch 5, loss: 0.3372 time: 76.93 acc: 0.8935
epoch 6, loss: 0.3436 time: 76.91 acc: 0.9018
epoch 7, loss: 0.2683 time: 76.69 acc: 0.9253
epoch 8, loss: 0.2393 time: 76.63 acc: 0.9329
epoch 9, loss: 0.2159 time: 76.73 acc: 0.9379
Test Accuracy: 0.8758


In [None]:
ATTN_DIM = 100

class CBiLSTM_WE_SA_5T(nn.Module):
    def __init__(self, embedding_matrix, num_classes):
        super().__init__()
        self.v, self.e = embedding_matrix.shape

        self.embedding = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix),
            freeze=False
        )

        T_enc = nn.TransformerEncoderLayer(
            d_model=self.e,
            nhead=4,
            dim_feedforward=2048
        )

        self.trans_enc = nn.TransformerEncoder(T_enc, num_layers=5)

        self.conv3 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=3)
        self.conv4 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=4)
        self.conv5 = nn.Conv1d(self.e, CONV_OUT_DIM, kernel_size=5)
        self.cnn_dropout = nn.Dropout(0.5)

        self.lstm = nn.LSTM(self.e, LSTM_HIDDEN_DIM, bidirectional=True, batch_first=True)
        self.lstm_dropout = nn.Dropout(0.5)

        self.embed_dense = nn.Linear(self.e, DENSE_EMBED_DIM)

        self.attn_dim = ATTN_DIM
        self.proj_cnn = nn.Linear(CONV_OUT_DIM * 3, self.attn_dim)
        self.proj_lstm = nn.Linear(2 * LSTM_HIDDEN_DIM, self.attn_dim)
        self.proj_embed = nn.Linear(DENSE_EMBED_DIM, self.attn_dim)

        self.self_attn = nn.MultiheadAttention(embed_dim=self.attn_dim, num_heads=4)

        self.fc2 = nn.Linear(self.attn_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x_embed = self.embedding(x)
        x_enc = self.trans_enc(x_embed.permute(1, 0, 2)).permute(1, 0, 2)

        x_cnn = x_enc.permute(0, 2, 1)
        c3 = torch.relu(self.conv3(x_cnn)).max(dim=2)[0]
        c4 = torch.relu(self.conv4(x_cnn)).max(dim=2)[0]
        c5 = torch.relu(self.conv5(x_cnn)).max(dim=2)[0]
        cnn_out = torch.cat([c3, c4, c5], dim=1)
        cnn_out = self.cnn_dropout(cnn_out)

        lstm_out, _ = self.lstm(x_enc)
        lstm_out = lstm_out.mean(dim=1)
        lstm_out = self.lstm_dropout(lstm_out)

        embed_out = x_enc.mean(dim=1)
        embed_out = self.embed_dense(embed_out)

        cnn_token = self.proj_cnn(cnn_out)
        lstm_token = self.proj_lstm(lstm_out)
        embed_token = self.proj_embed(embed_out)

        tokens = torch.stack([cnn_token, lstm_token, embed_token], dim=1)

        tokens = tokens.permute(1, 0, 2)
        attn_output, _ = self.self_attn(tokens, tokens, tokens)

        aggregated = attn_output.mean(dim=0)
        out = self.dropout(aggregated)
        out = self.fc2(out)

        return out


In [None]:
BATCH_SIZE = 16
dataset_train = NewsDataset(train_sequences, train_labels)
dataset_test = NewsDataset(test_sequences, test_labels)

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

model = CBiLSTM_WE_SA_5T(embedding_matrix, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

import time
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    s = time.time()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'epoch {epoch}, loss: {total_loss/len(train_loader):.4f} time: {time.time()-s:.2f} acc: {correct/total:.4f}')

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy:.4f}')

torch.save(model.state_dict(), '/content/drive/My Drive/AIL721_A3/CBiLSTM_WE_SA_5T.pth')




epoch 0, loss: 1.6309 time: 341.04 acc: 0.2156
epoch 1, loss: 1.6203 time: 345.31 acc: 0.2122
epoch 2, loss: 1.6135 time: 345.80 acc: 0.2139
epoch 3, loss: 1.6105 time: 345.22 acc: 0.2500
epoch 4, loss: 1.6137 time: 345.45 acc: 0.2164
epoch 5, loss: 1.6067 time: 345.34 acc: 0.2248
epoch 6, loss: 1.6136 time: 345.60 acc: 0.2072
epoch 7, loss: 1.6102 time: 345.10 acc: 0.2282
epoch 8, loss: 1.6095 time: 344.77 acc: 0.2198
epoch 9, loss: 1.6065 time: 344.60 acc: 0.2232
Test Accuracy: 0.2248


# Final architecture 

In [73]:
df = pd.read_csv('/content/drive/My Drive/AIL721_A3/Datasets/TestLabels.csv')

In [74]:
df

Unnamed: 0,ArticleId,Text,"Label - (business, tech, politics, sport, entertainment)",Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29
0,1018,qpr keeper day heads for preston queens park r...,sport,,,,,,,,...,,,,,,,,,,sport
1,1319,software watching while you work software that...,tech,,,,,,,,...,,,,,,,,,,tech
2,1138,d arcy injury adds to ireland woe gordon d arc...,sport,,,,,,,,...,,,,,,,,,,sport
3,459,india s reliance family feud heats up the ongo...,business,,,,,,,,...,,,,,,,,,,business
4,1020,boro suffer morrison injury blow middlesbrough...,sport,,,,,,,,...,,,,,,,,,,sport
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730,1923,eu to probe alitalia state aid the european ...,business,,,,,,,,...,,,,,,,,,,
731,373,u2 to play at grammy awards show irish rock ba...,entertainment,,,,,,,,...,,,,,,,,,,
732,1704,sport betting rules in spotlight a group of mp...,sport,,,,,,,,...,,,,,,,,,,
733,206,alfa romeos to get gm engines fiat is to sto...,business,,,,,,,,...,,,,,,,,,,


In [75]:
df.columns

Index(['ArticleId', 'Text',
       'Label - (business, tech, politics, sport, entertainment)',
       'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7',
       'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12',
       'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16',
       'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20',
       'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24',
       'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28',
       'Unnamed: 29'],
      dtype='object')

In [76]:
df = df[['Text', 'Label - (business, tech, politics, sport, entertainment)']]

In [77]:
tokenized_test = [nltk.word_tokenize(text.lower()) for text in df['Text']]
sequences_test = [[vocab.get(word, 1) for word in seq] for seq in tokenized_test]

In [78]:
MAX_LEN_test = max(len(seq) for seq in sequences_test)
padded_test = torch.zeros((len(sequences_test), MAX_LEN_test), dtype=torch.long)
for i, seq in enumerate(sequences_test):
    length = min(len(seq), MAX_LEN_test)
    padded_test[i, :length] = torch.tensor(seq[:length])

In [79]:
le = LabelEncoder()
true_labels = le.fit_transform(df['Label - (business, tech, politics, sport, entertainment)'])

In [80]:
class NewsDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

In [81]:
test_dataset = NewsDataset(padded_test, torch.tensor(true_labels, dtype=torch.long))
BATCH_SIZE = 64
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [82]:
model = CBiLSTM_WE_SA(embedding_matrix, NUM_CLASSES).to(device)
model.load_state_dict(torch.load('/content/drive/My Drive/AIL721_A3/CBiLSTM_WE_SA.pth'))
model.eval()


CBiLSTM_WE_SA(
  (embedding): Embedding(28180, 300)
  (conv3): Conv1d(300, 100, kernel_size=(3,), stride=(1,))
  (conv4): Conv1d(300, 100, kernel_size=(4,), stride=(1,))
  (conv5): Conv1d(300, 100, kernel_size=(5,), stride=(1,))
  (cnn_dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(300, 128, batch_first=True, bidirectional=True)
  (lstm_dropout): Dropout(p=0.5, inplace=False)
  (embed_dense): Linear(in_features=300, out_features=128, bias=True)
  (proj_cnn): Linear(in_features=300, out_features=100, bias=True)
  (proj_lstm): Linear(in_features=256, out_features=100, bias=True)
  (proj_embed): Linear(in_features=128, out_features=100, bias=True)
  (self_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
  )
  (fc2): Linear(in_features=100, out_features=5, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [83]:
all_preds = []
all_true = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_true.extend(labels.cpu().numpy())

In [85]:
from sklearn.metrics import f1_score
f1 = f1_score(all_true, all_preds, average='weighted')
f1

0.20594249055856598