In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Github/ARDL

/content/drive/MyDrive/Colab Notebooks/NNDL/Project/ARDL


In [None]:
%load_ext autoreload
%autoreload 2

from dataset import *
from torch.utils.data import TensorDataset, DataLoader

In [None]:
from sklearn.datasets import fetch_20newsgroups

newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)


In [None]:
emb_mat, data = create_inp_data_and_weights(newsgroups_train, newsgroups_test, glove_dim=50)

Vocabulary size [Bag-of-words]:  151353
Vocabulary size [GloVe]:  400000


In [None]:
from lstm import *
input_size, emb_size = emb_mat.shape

model = build_random_lstm(3, 8, 32, 128, input_size, emb_size, 7, emb_mat)
model

LSTM(
  (embedding): Embedding(151353, 50)
  (lstm): LSTM(50, 45, num_layers=6, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=22500, out_features=7, bias=True)
  (att): Linear(in_features=22500, out_features=7, bias=True)
)

In [None]:
learning_rate = 0.001
num_epochs = 15
batch_size = 512
display_step = 4

criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [None]:
from torch.autograd import Variable

In [None]:
training_loss = []
validation_loss = []

total_batch = int(len(newsgroups_train.data)/batch_size)
    
total_batch_val = int(len(newsgroups_test.data)/batch_size)
 

for epoch in range(num_epochs):
    tr_loss = []
    model.train()

    total_batch = int(len(newsgroups_train.data)/batch_size)
    # Loop over all batches
    for i in range(total_batch):
        batch_x,batch_y = get_batch(data['train'],i,batch_size)

        articles = Variable(torch.LongTensor(batch_x))
        labels = Variable(torch.LongTensor(batch_y))
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()  # zero the gradient buffer
        _, outputs = model(articles)
        #print(outputs.shape, labels.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % display_step == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                   %(epoch+1, num_epochs, i+1, len(newsgroups_train.data)//batch_size, loss.data))    
    
    model.eval()
    training_loss.append(sum(tr_loss)/total_batch)

    model.eval()
    # Loop over all batches
    batch_val_loss = []
    for i in range(total_batch_val):
        
        batch_x,batch_y = get_batch(data['test'],i,batch_size)

        articles = Variable(torch.LongTensor(batch_x))
        labels = Variable(torch.LongTensor(batch_y))

        _, y_test_pred = model(articles)
        
        test_loss = criterion(y_test_pred, labels)
        batch_val_loss.append(test_loss.data)

    validation_loss.append(sum(batch_val_loss)/total_batch_val)

  return a, F.log_softmax(y)


Epoch [1/15], Step [4/22], Loss: 1.7907
Epoch [1/15], Step [8/22], Loss: 1.7704
Epoch [1/15], Step [12/22], Loss: 1.7846
Epoch [1/15], Step [16/22], Loss: 1.8060
Epoch [1/15], Step [20/22], Loss: 1.7627
Epoch [2/15], Step [4/22], Loss: 1.7487
Epoch [2/15], Step [8/22], Loss: 1.7506
Epoch [2/15], Step [12/22], Loss: 1.7714
Epoch [2/15], Step [16/22], Loss: 1.7782
Epoch [2/15], Step [20/22], Loss: 1.7629
Epoch [3/15], Step [4/22], Loss: 1.7338
Epoch [3/15], Step [8/22], Loss: 1.6231
Epoch [3/15], Step [12/22], Loss: 1.5430
Epoch [3/15], Step [16/22], Loss: 1.4624
Epoch [3/15], Step [20/22], Loss: 1.3953
Epoch [4/15], Step [4/22], Loss: 1.4455
Epoch [4/15], Step [8/22], Loss: 1.3294
Epoch [4/15], Step [12/22], Loss: 1.3126
Epoch [4/15], Step [16/22], Loss: 1.2976
Epoch [4/15], Step [20/22], Loss: 1.2771
Epoch [5/15], Step [4/22], Loss: 1.2900
Epoch [5/15], Step [8/22], Loss: 1.2243
Epoch [5/15], Step [12/22], Loss: 1.2478
Epoch [5/15], Step [16/22], Loss: 1.1986
Epoch [5/15], Step [20/22]

In [None]:
from matplotlib import pyplot as plt

In [None]:
folder_name = 'models/lstm/model_2_09_03/'

from matplotlib import pyplot as plt
training_loss = np.array(training_loss)
validation_loss = np.array(validation_loss)


fig = plt.figure()
plt.plot(training_loss)
plt.plot(validation_loss)
plt.legend(["Training Loss", "Validation Loss"])
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.show()
fig.savefig(folder_name + 'loss_vs_epoch.png')

In [None]:
total_batch = int(len(newsgroups_test.data)/batch_size)
correct = 0
total = 0
# Loop over all batches
for i in range(total_batch):
    batch_x,batch_y = get_batch(data['test'],i,batch_size)

    articles = Variable(torch.LongTensor(batch_x))
    labels = Variable(torch.LongTensor(batch_y))

    # Final validation accuracy
    total, correct = 0, 0
    model.eval()
    _, y_test_pred = model(articles)
    predicted = torch.argmax(y_test_pred, 1)

    total += y_test_pred.size(0)

    correct += (predicted == labels).sum()

print(f'Accuracy of the model is: {100*correct/total:.2f}%')

In [None]:
# Additional information
PATH = folder_name + "model.pt"
LOSS = 0.8401
ACCURACY = 78.1

# Store model
torch.save({
            'accuracy': ACCURACY,
            'epoch': num_epochs,
            'learning_rate': learning_rate,
            'batch_size': batch_size,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': LOSS,
            }, PATH)

# Store model architecture
import pickle
file_name = folder_name + 'model_architecture.pickle'
with open(file_name, 'wb') as f:
    pickle.dump(model, f)