In [35]:
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from model import RNN

### Preparation ####

# set random seed
seed = 3
np.random.seed(seed)
torch.manual_seed(seed)

# set precision and device
torch.set_default_dtype(torch.float)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

### load dataset ###

def load_data(mode='train'):
    data = np.loadtxt('./data_{}.txt'.format(mode), dtype='str')
    inputs = data[:,:2]
    labels = data[:,2]

    def strs2mat(strings):
        num = strings.shape[0]
        mat = []
        for i in range(num):
            mat.append([*strings[i]])
        return mat

    inputs_ = np.transpose(np.array([strs2mat(inputs[:,0]), strs2mat(inputs[:,1])]), (1,2,0)).astype('float')
    labels_ = np.array(strs2mat(labels))[:,:,np.newaxis].astype('float')

    return inputs_, labels_

inputs_train, labels_train = load_data(mode='train')
inputs_test, labels_test = load_data(mode='test')

inputs_train = torch.tensor(inputs_train, dtype=torch.float, requires_grad=True).to(device)
labels_train = torch.tensor(labels_train, dtype=torch.float, requires_grad=True).to(device)
inputs_test = torch.tensor(inputs_test, dtype=torch.float, requires_grad=True).to(device)
labels_test = torch.tensor(labels_test, dtype=torch.float, requires_grad=True).to(device)

def l1(model):
    l1_reg = torch.tensor(0.).to(device)
    for param in model.parameters():
        l1_reg += torch.sum(torch.abs(param))
    return l1_reg
    

model = RNN(hidden_dim=2, device=device).to(device)


### Training ###

optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=0.0)
steps = 3000
log = 200
lamb = 0e-4

for step in range(steps):
    
    optimizer.zero_grad()
    
    pred_train = model(inputs_train)
    loss_train = torch.mean((pred_train-labels_train)**2)
    acc_train = 1-loss_train

    pred_test = model(inputs_test)
    loss_test = torch.mean((pred_test-labels_test)**2)
    acc_test = 1-loss_test
    
    reg = l1(model)
    loss = loss_train + lamb * reg
    
    loss.backward()
    optimizer.step()
    
    if step % log == 0:
        print("step = %d | train loss: %.2e | test loss %.2e | train acc: %.2e | test acc: %.2e | reg: %.2e "%(step, loss_train.cpu().detach().numpy(), loss_test.cpu().detach().numpy(), acc_train.cpu().detach().numpy(), acc_test.cpu().detach().numpy(), reg.cpu().detach().numpy()))
    
torch.save(model.state_dict(), './model')

cuda
step = 0 | train loss: 2.45e+00 | test loss 2.40e+00 | train acc: -1.45e+00 | test acc: -1.40e+00 | reg: 6.06e+00 
step = 200 | train loss: 2.36e-01 | test loss 2.38e-01 | train acc: 7.64e-01 | test acc: 7.62e-01 | reg: 7.36e+00 
step = 400 | train loss: 2.09e-01 | test loss 2.13e-01 | train acc: 7.91e-01 | test acc: 7.87e-01 | reg: 9.69e+00 
step = 600 | train loss: 1.83e-01 | test loss 1.88e-01 | train acc: 8.17e-01 | test acc: 8.12e-01 | reg: 1.23e+01 
step = 800 | train loss: 1.72e-01 | test loss 1.77e-01 | train acc: 8.28e-01 | test acc: 8.23e-01 | reg: 1.42e+01 
step = 1000 | train loss: 1.67e-01 | test loss 1.72e-01 | train acc: 8.33e-01 | test acc: 8.28e-01 | reg: 1.58e+01 
step = 1200 | train loss: 1.29e-01 | test loss 1.33e-01 | train acc: 8.71e-01 | test acc: 8.67e-01 | reg: 1.93e+01 
step = 1400 | train loss: 1.51e-02 | test loss 1.55e-02 | train acc: 9.85e-01 | test acc: 9.85e-01 | reg: 2.65e+01 
step = 1600 | train loss: 7.66e-05 | test loss 7.81e-05 | train acc: 1.0