In [1]:
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from neural_verification import RNN

### Preparation ####

# set random seed
seed = 3
np.random.seed(seed)
torch.manual_seed(seed)

# set precision and device
torch.set_default_dtype(torch.float64)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

### load dataset ###

def load_data(mode='train'):
    data = np.loadtxt('./data_{}.txt'.format(mode), dtype='str')
    inputs = data[:,:2]
    labels = data[:,2]

    def strs2mat(strings):
        num = strings.shape[0]
        mat = []
        for i in range(num):
            mat.append([*strings[i]])
        return mat

    inputs_ = np.transpose(np.array([strs2mat(inputs[:,0]), strs2mat(inputs[:,1])]), (1,2,0)).astype('float')
    labels_ = np.array(strs2mat(labels))[:,:,np.newaxis].astype('float')

    return inputs_, labels_

inputs_train, labels_train = load_data(mode='train')
inputs_test, labels_test = load_data(mode='test')

inputs_train = torch.tensor(inputs_train, dtype=torch.float64, requires_grad=True).to(device)
labels_train = torch.tensor(labels_train, dtype=torch.float64, requires_grad=True).to(device)
inputs_test = torch.tensor(inputs_test, dtype=torch.float64, requires_grad=True).to(device)
labels_test = torch.tensor(labels_test, dtype=torch.float64, requires_grad=True).to(device)


def l1(model):
    l1_reg = torch.tensor(0.).to(device)
    for param in model.parameters():
        l1_reg += torch.sum(torch.abs(param))
    return l1_reg
    

model = RNN(hidden_dim=2, device=device).to(device)


### Training ###

optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=0.0)
steps = 1001
log = 200
lamb = 0e-4

for step in range(steps):
    
    optimizer.zero_grad()
    
    pred_train = model(inputs_train)
    loss_train = torch.mean((pred_train-labels_train)**2)
    acc_train = torch.mean(((pred_train - 0.5)*(labels_train - 0.5) > 0).long().float())

    pred_test = model(inputs_test)
    loss_test = torch.mean((pred_test-labels_test)**2)
    acc_test = torch.mean(((pred_test - 0.5)*(labels_test - 0.5) > 0).long().float())
    
    reg = l1(model)
    loss = loss_train + lamb * reg
    
    loss.backward()
    optimizer.step()
    
    if step % log == 0:
        print("step = %d | train loss: %.2e | test loss %.2e | train acc: %.2e | test acc: %.2e | reg: %.2e "%(step, loss_train.cpu().detach().numpy(), loss_test.cpu().detach().numpy(), acc_train.cpu().detach().numpy(), acc_test.cpu().detach().numpy(), reg.cpu().detach().numpy()))
    
torch.save(model.state_dict(), './model')

cuda
step = 0 | train loss: 2.50e-01 | test loss 2.50e-01 | train acc: 4.63e-01 | test acc: 4.41e-01 | reg: 4.91e+00 
step = 200 | train loss: 1.33e-01 | test loss 1.18e-01 | train acc: 8.03e-01 | test acc: 8.41e-01 | reg: 2.64e+01 
step = 400 | train loss: 6.26e-02 | test loss 5.23e-02 | train acc: 9.34e-01 | test acc: 9.46e-01 | reg: 4.07e+01 
step = 600 | train loss: 3.66e-03 | test loss 3.38e-03 | train acc: 1.00e+00 | test acc: 1.00e+00 | reg: 4.53e+01 
step = 800 | train loss: 8.54e-04 | test loss 8.55e-04 | train acc: 1.00e+00 | test acc: 1.00e+00 | reg: 4.64e+01 
step = 1000 | train loss: 6.05e-04 | test loss 6.04e-04 | train acc: 1.00e+00 | test acc: 1.00e+00 | reg: 4.60e+01 
