### Load Data

In [None]:
import pandas as pd
import torchtext
from torch.utils.data import TensorDataset, DataLoader

def data_loader(batch_size=128):
    # load data from csv file
    fields = ['news_article', 'news_category']

    train_data = pd.read_csv('./content/inshort_news_data-train.csv', header=0, encoding='ISO-8859-1', usecols=fields, skip_blank_lines=True)
    val_data = pd.read_csv('./content/inshort_news_data-val.csv', header=0, encoding='ISO-8859-1', usecols=fields, skip_blank_lines=True)
    test_data = pd.read_csv('./content/inshort_news_data-test.csv', header=0, encoding='ISO-8859-1', usecols=fields, skip_blank_lines=True)

    # Creating training and testing data
    X_train = train_data['news_article']
    Y_train = train_data['news_category']

    X_test = test_data['news_article']
    Y_test = test_data['news_category']

    X_val = val_data['news_article']
    Y_val = val_data['news_category']

    for i in range(X_train.shape[0]):
      X_train[i] = X_train[i].split()

    for j in range(X_val.shape[0]):
      X_val[j] = X_val[j].split()

    for k in range(X_test.shape[0]):
      X_test[k] = X_test[k].split()

    Y_train = pd.get_dummies(Y_train).to_numpy()
    Y_val = pd.get_dummies(Y_val).to_numpy()
    Y_test = pd.get_dummies(Y_test).to_numpy()

    # stopwords to eliminate useless words
    stopwords = []
    stop = open('./content/stopwords.txt', encoding="utf-8")
    for line in stop:
      stopwords.append(line.strip())
    stop.close()

    # utilize Glove6B for embedding
    glove = torchtext.vocab.GloVe(name='6B', dim=50)

    # Filling the embedding matrix
    embedding_matrix_train = np.zeros((X_train.shape[0], 61, 50))
    embedding_matrix_val = np.zeros((X_val.shape[0], 61, 50))
    embedding_matrix_test = np.zeros((X_test.shape[0], 61, 50))

    for i in range(X_train.shape[0]):
      for j in range(len(X_train[i])):
        if not (X_train[i][j].lower() in stopwords):
          embedding_matrix_train[i][j] = glove[X_train[i][j].lower()]

    for i in range(X_val.shape[0]):
      for j in range(len(X_val[i])):
        if not (X_val[i][j].lower() in stopwords):
          embedding_matrix_val[i][j] = glove[X_val[i][j].lower()]

    for i in range(X_test.shape[0]):
      for j in range(len(X_test[i])):
        if not (X_test[i][j].lower() in stopwords):
          embedding_matrix_test[i][j] = glove[X_test[i][j].lower()]

    X_train_t = torch.from_numpy(embedding_matrix_train).to(torch.float32)
    Y_train_t = torch.from_numpy(Y_train).to(torch.float32)
    X_val_t = torch.from_numpy(embedding_matrix_val).to(torch.float32)
    Y_val_t = torch.from_numpy(Y_val).to(torch.float32)
    X_test_t = torch.from_numpy(embedding_matrix_test).to(torch.float32)
    Y_test_t = torch.from_numpy(Y_test).to(torch.float32)

    train_dataset = TensorDataset(X_train_t, Y_train_t)
    val_dataset = TensorDataset(X_val_t, Y_val_t)
    test_dataset = TensorDataset(X_test_t, Y_test_t)

    print('Num training articles: ', len(train_dataset))
    print('Num validation articles: ', len(val_dataset))
    print('Num test articles: ', len(test_dataset))

    train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    return train_dataloader, val_dataloader, test_dataloader

In [None]:
train_loader, val_loader, test_loader = data_loader()

### Architecture

In [None]:
import torch.nn as nn


# LSTM model
class LSTM_news_classifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_class):
        super(LSTM_news_classifier, self).__init__()
        self.name = "LSTM_1"
        self.hidden_size = hidden_size
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_class)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        c0 = torch.zeros(1, x.size(0), self.hidden_size)
        out, (h_n, c_n) = self.rnn(x, (h0, c0))
        return self.fc(out[:,-1,:])

# LSTM model number 2. I add a sigmoid function
class LSTM_news_classifier_2(nn.Module):
    def __init__(self, input_size, hidden_size, num_class):
        super(LSTM_news_classifier_2, self).__init__()
        self.name = "LSTM_2"
        self.hidden_size = hidden_size
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_class)
        self.af = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        c0 = torch.zeros(1, x.size(0), self.hidden_size)
        out, (h_n, c_n) = self.rnn(x, (h0, c0))
        return self.af(self.fc(out[:,-1,:]))

In [None]:
News_LSTM = LSTM_news_classifier(50, 64, 7)

### Training and Testing Code

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import time
import csv


def get_model_path(name, batch_size, learning_rate, epoch, exercise_code):
    """ Generate a name for the model consisting of all the hyperparameter values

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "model_{0}_bs{1}_lr{2}_epoch{3}_exercise_{4}".format(name,
                                                   batch_size,
                                                   learning_rate,
                                                   epoch, exercise_code)
    path = "./model/" + path
    return path


def get_csv_path(name, batch_size, learning_rate, exercise_code):
    """ Generate a name for the csv file consisting of all training and validation data

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "data_{0}_bs{1}_lr{2}_exercise_{3}.csv".format(name,batch_size, learning_rate, exercise_code)
    path = "./model/" + path
    return path


def get_fig_path(name1, name2, batch_size, learning_rate, exercise_code):
    path = "fig_{0}_bs{1}_lr{2}_exercise_{3}_{4}.png".format(name1, batch_size, learning_rate, exercise_code, name2)
    path = "./model/" + path
    return path

def find_the_best_model(val_acc):
    """ Find the model with the best validation accuracy

    Args:
        validation accuracy list
    Returns:
        The epoch with the greatest accuracy and its accuracy
    """
    cur_largest = -1
    cur_largest_epoch = -1
    for epoch in range(len(val_acc)):
        if(val_acc[epoch] > cur_largest):
            cur_largest = val_acc[epoch]
            cur_largest_epoch = epoch
    return cur_largest_epoch, cur_largest


def save_to_csv(path, epochs, train_losses, train_acc, val_losses, val_acc, header):
    organized_data = []
    organized_data.append(header)
    for i in range(len(epochs)):
        organized_data.append([epochs[i], train_losses[i], train_acc[i], val_losses[i], val_acc[i]])
    f = open(path,'w+')
    write_csv = csv.writer(f)
    write_csv.writerows(organized_data)


def train_net(net, batch_size, learning_rate, num_epochs, train_loader, val_loader, exercise_code):
    assert num_epochs > 0, "num_epochs must be an integer that is greater than 0"
    assert learning_rate > 0, "learning_rate must be greater than 0"
    torch.manual_seed(1000)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=learning_rate,
                                 weight_decay=1e-5)
    epochs, train_losses, train_acc, val_losses, val_acc = [], [], [], [], []
    start_time = time.time()
    for epoch in range(num_epochs):
        epochs.append(epoch)
        total, correct = 0, 0
        total_loss = 0
        for articles, labels in train_loader:
            #############################################
            #To Enable GPU Usage
            # if use_cuda and torch.cuda.is_available():
              # imgs = imgs.cuda()
              # labels = labels.cuda()
            #############################################
            #print(imgs)
            #print(labels)
            out = net(articles)
            loss = criterion(out, labels)
            total_loss = total_loss + loss.item() * articles.shape[0]
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            # print(out.shape)
            pred = torch.squeeze(out.max(1, keepdim=True)[1], 1)
            # print(pred)
            # print(torch.argmax(labels, dim=1))
            correct = correct + pred.eq(torch.argmax(labels, dim=1)).sum().item()
            total = total + articles.shape[0]
            # print(correct, total)
        train_acc.append(correct/total)
        train_losses.append(total_loss/total)

        val_correct = 0
        val_total_loss = 0
        val_total = 0
        for val_articles, val_labels in val_loader:
            # if use_cuda and torch.cuda.is_available():
                # val_imgs = val_imgs.cuda()
                # val_labels = val_labels.cuda()
            val_out = net(val_articles)
            # print(val_imgs)
            val_pred = torch.squeeze(val_out.max(1, keepdim=True)[1], 1)
            val_correct = val_correct + val_pred.eq(torch.argmax(val_labels, dim=1)).sum().item()
            val_total = val_total + val_articles.shape[0]
            val_total_loss = val_total_loss + (criterion(val_out, val_labels)).item() * val_articles.shape[0]
        val_losses.append(val_total_loss/val_total) # Append the average loss
        val_acc.append(val_correct/val_total)

        print("Epoch {0}:\ntraining accuracy: {1}\ttraining loss: {2}\tvalidation accuracy: {3}\tvalidation loss:{4}".format(epoch, train_acc[epoch], train_losses[epoch], val_acc[epoch], val_losses[epoch]))
        print("Correct number of outputs in validation: {0}\tTotal number of outputs in validation: {1}\tTotal validation loss {2}".format(val_correct, val_total, val_total_loss))
        model_path = get_model_path(net.name, batch_size, learning_rate, epoch, exercise_code)
        torch.save(net.state_dict(), model_path)
    end_time = time.time()
    print("Total time:  % 6.2f s  Time per Epoch: % 6.2f s " % (
    (end_time - start_time), ((end_time - start_time) / num_epochs)))

    best_epoch, best_epoch_acc = find_the_best_model(val_acc)
    print("The best epoch: {0}\tAccuracy:{1}".format(best_epoch, best_epoch_acc))

    csv_path = get_csv_path(net.name, batch_size, learning_rate, exercise_code)
    header = ["Epoch", "Train Loss", "Train Accuracy", "Validation Loss", "Validation Accuracy"]
    save_to_csv(csv_path, epochs, train_losses, train_acc, val_losses, val_acc, header)

    # plotting
    plt.title("Training Loss Curve")
    plt.plot(epochs, train_losses, label="Train")
    plt.xlabel("Epochs")
    plt.ylabel("Train Loss")
    plt.savefig(get_fig_path(net.name, "Training_Loss", batch_size, learning_rate, exercise_code))
    plt.show()

    plt.title("Training Accuracy Curve")
    plt.plot(epochs, train_acc, label="Training")
    plt.xlabel("Epochs")
    plt.ylabel("Training Accuracy")
    plt.savefig(get_fig_path(net.name, "Training_Acc", batch_size, learning_rate, exercise_code))
    plt.show()

    plt.title("Validation Loss Curve")
    plt.plot(epochs, val_losses, label="Validation")
    plt.xlabel("Epochs")
    plt.ylabel("Train Loss")
    plt.savefig(get_fig_path(net.name, "Val_Loss", batch_size, learning_rate, exercise_code))
    plt.show()

    plt.title("Validation Accuracy Curve")
    plt.plot(epochs, val_acc, label="Validation")
    plt.xlabel("Epochs")
    plt.ylabel("Validation Accuracy")
    plt.savefig(get_fig_path(net.name, "Val_Acc", batch_size, learning_rate, exercise_code))
    plt.show()


def test_model(net_type, parameters, use_cuda, model_path, data_loader, criterion):
    state = torch.load(model_path)
    net = net_type(parameters[0], parameters[1], parameters[2])
    net.load_state_dict(state)
    if use_cuda and torch.cuda.is_available():
        net.cuda()
        print('CUDA is available!  Training on GPU ...')
    else:
        print('CUDA is not available.  Training on CPU ...')
    correct = 0
    total_loss = 0
    total = 0
    for articles, labels in data_loader:
        if use_cuda and torch.cuda.is_available():
            articles = articles.cuda()
            labels = labels.cuda()
        out = net(articles)
        pred = torch.squeeze(out.max(1, keepdim=True)[1], 1)
        correct = correct + pred.eq(torch.argmax(labels, dim=1)).sum().item()
        total = total + articles.shape[0]
        total_loss = total_loss + (criterion(out, labels)).item() * articles.shape[0]
    return correct, total, correct / total, total_loss / total

Train and test your model here

In [None]:
parameters = (50, 64, 7)
model_path = get_model_path("LSTM_1", 128, 0.01, 16, "July_8_8_33")
test_result = test_model(LSTM_news_classifier, parameters, False, model_path, test_loader, nn.MSELoss())
print("Correct: {0}\tTotal: {1}\tAccuracy: {2}\tLoss: {3}".format(test_result[0], test_result[1], test_result[2], test_result[3]))