In [None]:
import numpy as np
from preprocess import load_training_set, load_validation_set, load_dict, process_scores
import pandas as pd

train_path = './essay_dataset/training_set_rel3.tsv'
training_data = load_training_set(train_path, 1)

glove_dict = load_dict('glove.840B.300d.pkl')

In [None]:
valid_path = './essay_dataset/valid_set.tsv'
valid_label_path = './essay_dataset/valid_sample_submission_2_column.csv'
valid_data = load_validation_set(valid_path, valid_label_path, 1)

In [None]:
training_data = process_scores(training_data, 'domain1_score')
valid_data = process_scores(valid_data, 'domain1_score')

In [None]:
from configs import cfg

cfg['input_dim'] = 301 # input dimension to LSTM
cfg['hidden_dim'] = 256 # hidden dimension for LSTM
cfg['output_dim'] = 1 # output dimension of the model
cfg['layers'] = 2 # number of layers of LSTM

cfg['dropout'] = 0.005 # dropout rate between two layers of LSTM; useful only when layers > 1; between 0 and 1
cfg['bidirectional'] = True # True or False; True means using a bidirectional LSTM
cfg['batch_size'] = 50 # batch size of input
cfg['learning_rate'] = 2e-4 # learning rate to be used
cfg['L2_penalty'] = 1e-5 # weighting constant for L2 regularization term; this is a parameter when you define optimizer
cfg['epochs'] = 20 # number of epochs for which the model is trained
cfg['embed'] = False

In [None]:
from dataloader import *
from models import *
import time

model = GRU_Score(cfg, True)
model = model.to(torch.device(cfg['device']))
criterion = nn.MSELoss()

#optimizer = torch.optim.RMSprop(model.parameters(), lr=cfg['learning_rate'], weight_decay=cfg['L2_penalty'])
optimizer = torch.optim.Adam(model.parameters(), lr=cfg['learning_rate'], weight_decay=cfg['L2_penalty'])

train_indices = list(range(len(training_data)))
valid_indices = list(range(len(valid_data)))

print('ready')

In [None]:
model.train()

In [None]:
train_losses = []
val_losses = []

for epoch in range(cfg['epochs']):
    tloader = DataLoader(training_data, train_indices, cfg, glove_dict)
    vloader = DataLoader(valid_data, valid_indices, cfg, glove_dict)
    
    count = 0
    avg_loss = 0
    while tloader.has_next():
        train, label = tloader.get_next()
        
        model.zero_grad()
        
        y = model(train)
        
        
        loss = criterion(y*10, label*10)
        loss.backward()
        optimizer.step()

        count += 1
        avg_loss += loss.item()
        if count % 2 == 0:
            print("count = %d, loss = %.5f" %(count, avg_loss / 2))
            save_model(model, 'models_GRU/e' + str(epoch + 1) + 'b' + str(count) + '.pt')
            train_losses.append(avg_loss/2)
            avg_loss = 0
        del train, label, y, loss
    
    count = 0
    avg_loss = 0
    with torch.no_grad():
        while vloader.has_next():
            train, label = vloader.get_next()
            y = model(train)
            loss = criterion(y*10, label*10)
            count += 1
            avg_loss += loss.item()
            del train, label, y, loss
    print('validation loss:', avg_loss / count)
    print('epoch finished:', epoch + 1)
    val_losses.append(avg_loss/count)
    

In [8]:
cfg['batch_size'] = 5
vloader = DataLoader(valid_data, valid_indices, cfg, glove_dict)
count = 0
avg_loss = 0
with torch.no_grad():
    while vloader.has_next():
        train, label = vloader.get_next()
        y = model(train)
        #y = y.permute(0, 2, 1)
        print(label - y)
        loss = criterion(y, label)
        count += 1
        avg_loss += loss.item()
        del train, label, y, loss
        break
print('validation loss:', avg_loss / count)

tensor([[-3.1493],
        [-0.8064],
        [ 1.7591],
        [-1.7096],
        [-2.6727]], device='cuda:0')
validation loss: 4.745800495147705


In [None]:
import matplotlib.pyplot as plt
import numpy as np


fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(np.arange(len(train_losses)) / (len(train_losses) // len(val_losses)),train_losses,label='training loss')
ax.plot(np.arange(1,len(val_losses)+1) ,val_losses,label='validation loss')

ax.set_xlabel('epoch')
ax.set_ylabel('loss value')
ax.legend()