In [1]:
import numpy as np
from preprocess import load_training_set, load_validation_set, load_dict, process_scores
import pandas as pd

train_path = './essay_dataset/training_set_rel3.tsv'
training_data = load_training_set(train_path)

glove_dict = load_dict('glove.840B.300d.pkl')

In [2]:
valid_path = './essay_dataset/valid_set.tsv'
valid_label_path = './essay_dataset/valid_sample_submission_2_column.csv'
valid_data = load_validation_set(valid_path, valid_label_path)

In [3]:
training_data = process_scores(training_data, 'domain1_score')
valid_data = process_scores(valid_data, 'domain1_score')

In [4]:
from configs import cfg

cfg['input_dim'] = 301 # input dimension to LSTM
cfg['hidden_dim'] = 256 # hidden dimension for LSTM
cfg['output_dim'] = 1 # output dimension of the model
cfg['layers'] = 2 # number of layers of LSTM

cfg['dropout'] = 0.005 # dropout rate between two layers of LSTM; useful only when layers > 1; between 0 and 1
cfg['bidirectional'] = True # True or False; True means using a bidirectional LSTM
cfg['batch_size'] = 50 # batch size of input
cfg['learning_rate'] = 2e-4 # learning rate to be used
cfg['L2_penalty'] = 1e-5 # weighting constant for L2 regularization term; this is a parameter when you define optimizer
cfg['epochs'] = 20 # number of epochs for which the model is trained
cfg['embed'] = False

In [5]:
from dataloader import *
from models import *
import time

model = GRU_Score(cfg, True)
model = model.to(torch.device(cfg['device']))
criterion = nn.MSELoss()

#optimizer = torch.optim.RMSprop(model.parameters(), lr=cfg['learning_rate'], weight_decay=cfg['L2_penalty'])
optimizer = torch.optim.Adam(model.parameters(), lr=cfg['learning_rate'], weight_decay=cfg['L2_penalty'])

train_indices = list(range(len(training_data)))
valid_indices = list(range(len(valid_data)))

print('ready')

ready


In [6]:
model.train()

GRU_Score(
  (embed_linear): Linear(in_features=301, out_features=301, bias=True)
  (gru): GRU(301, 256, num_layers=2, batch_first=True, dropout=0.005, bidirectional=True)
  (fc1): Linear(in_features=1024, out_features=64, bias=True)
  (fc1_normed): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc2_normed): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
)

In [7]:
 for epoch in range(cfg['epochs']):
    tloader = DataLoader(training_data, train_indices, cfg, glove_dict)
    vloader = DataLoader(valid_data, valid_indices, cfg, glove_dict)
    
    count = 0
    avg_loss = 0
    while tloader.has_next():
        train, label = tloader.get_next()
        
        model.zero_grad()
        
        y = model(train)
        
        
        loss = criterion(y, label)
        loss.backward()
        optimizer.step()

        count += 1
        avg_loss += loss.item()
        if count % 20 == 0:
            print("count = %d, loss = %.5f" %(count, avg_loss / 20))
            save_model(model, 'models_GRU/e' + str(epoch + 1) + 'b' + str(count) + '.pt')
            avg_loss = 0
        del train, label, y, loss
    
    count = 0
    avg_loss = 0
    with torch.no_grad():
        while vloader.has_next():
            train, label = vloader.get_next()
            y = model(train)
            loss = criterion(y, label)
            count += 1
            avg_loss += loss.item()
            del train, label, y, loss
    print('validation loss:', avg_loss / count)
    print('epoch finished:', epoch + 1)
    

count = 20, loss = 38.83587
count = 40, loss = 37.36517
count = 60, loss = 35.46296
count = 80, loss = 35.82201
count = 100, loss = 35.42758
count = 120, loss = 34.15661
count = 140, loss = 34.00531
count = 160, loss = 32.45257
count = 180, loss = 32.32402
count = 200, loss = 31.59592
count = 220, loss = 32.04087
count = 240, loss = 31.39356
validation loss: 30.113539673033216
epoch finished: 1
count = 20, loss = 28.69732
count = 40, loss = 28.51657
count = 60, loss = 27.97312
count = 80, loss = 27.27143
count = 100, loss = 27.28612
count = 120, loss = 26.23125
count = 140, loss = 25.64420
count = 160, loss = 25.17755
count = 180, loss = 26.02645
count = 200, loss = 24.44790
count = 220, loss = 23.27822
count = 240, loss = 24.04417
validation loss: 22.816298235030402
epoch finished: 2
count = 20, loss = 22.55088
count = 40, loss = 21.54279
count = 60, loss = 22.07767
count = 80, loss = 21.22541
count = 100, loss = 21.87920
count = 120, loss = 19.07689
count = 140, loss = 19.95761
count

In [8]:
cfg['batch_size'] = 5
vloader = DataLoader(valid_data, valid_indices, cfg, glove_dict)
count = 0
avg_loss = 0
with torch.no_grad():
    while vloader.has_next():
        train, label = vloader.get_next()
        y = model(train)
        #y = y.permute(0, 2, 1)
        print(label - y)
        loss = criterion(y, label)
        count += 1
        avg_loss += loss.item()
        del train, label, y, loss
        break
print('validation loss:', avg_loss / count)

tensor([[-3.1493],
        [-0.8064],
        [ 1.7591],
        [-1.7096],
        [-2.6727]], device='cuda:0')
validation loss: 4.745800495147705
