In [12]:
import os
import torch
import math
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from dataloader import dataset
from model import FFD, TRT

In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [14]:
def loss_fn(truth, ReP):
    ReP = ReP[0].to(device)
    truth = truth.to(device)
    loss = torch.mean(torch.abs(truth-ReP))

    return loss

In [15]:
def eval(FFD, TRT, valloader):
    lf = 0
    lt = 0
    loop = tqdm(enumerate(valloader, start=len(valloader)), total=len(valloader), leave=False)
    for step, (sentence,FFDAvg, FFDStd,TRTAvg,TRTStd) in loop:
        sentence = [w[0] for w in sentence]
        # True_FFD = torch.cat((FFDAvg,FFDStd)).T
        True_TRT = torch.cat((TRTAvg,TRTStd)).T

        # FFD_I = FFD(sentence)
        TRT_I = TRT(sentence)

        # lossF = loss_fn(True_FFD, FFD_I)
        lossT = loss_fn(True_TRT, TRT_I)

        # lf = lf+lossF
        lt = lt+lossT
    
    lf = lf/len(valloader)
    lt = lt/len(valloader)

    return lf, lt

In [16]:
def train(FFD, TRT, trainloader, valloader, epochs, optimizerF, optimizerT):
    writer = SummaryWriter("./log/")
    best_lf = 1000
    best_lt = 1000
    for epoch in range(epochs):
        loop = tqdm(enumerate(trainloader, start=epoch * len(trainloader)), total=len(trainloader), leave=False)
        lf = 0
        lt = 0
 
        for step, (sentence,FFDAvg, FFDStd,TRTAvg,TRTStd) in loop:
            sentence = [w[0] for w in sentence]
            # True_FFD = torch.cat((FFDAvg,FFDStd)).T
            True_TRT = torch.cat((TRTAvg,TRTStd)).T

            # optimizerF.param_groups[0]['lr'] = 0.000001#adjust_learning_rate(epochs, batch_size, trainloader, step)
            # optimizerF.zero_grad()

            optimizerT.param_groups[0]['lr'] = 0.000001#adjust_learning_rate(epochs, batch_size, trainloader, step)
            optimizerT.zero_grad()

            # FFD_I = FFD(sentence)
            TRT_I = TRT(sentence)

            # lossF = loss_fn(True_FFD, FFD_I)
            # lossF.backward()
            # optimizerF.step()

            lossT = loss_fn(True_TRT, TRT_I)
            lossT.backward()
            optimizerT.step()

            # lf = lf+lossF
            lt = lt+lossT

            # writer.add_scalar("LossF/LossT/train", lossF, lossT, epoch)

            loop.set_description(f'Epoch [{epoch}/{epochs}]')
            # loop.set_postfix(loss = lossF.cpu().detach().numpy())
            loop.set_postfix(loss = lossT.cpu().detach().numpy())

        lf = lf/len(trainloader)
        lt = lt/len(trainloader)
        # print(f'Loss for epoch {epoch} is {lf.cpu().detach().numpy()} and {lt.cpu().detach().numpy()}')
        lf, lt = eval(FFD, TRT, valloader)
        # print(f'Loss for epoch {epoch} is {lf.cpu().detach().numpy()} and {lt.cpu().detach().numpy()}')

        # if best_lf>lf:
        #     best_lf = lf
        #     torch.save(FFD.state_dict(), os.path.join('.', 'checkpoints',  'best_checkpointF'+str(lf.item())+'.pth'))
        if best_lt>lt:
            best_lt = lt
            torch.save(TRT.state_dict(), os.path.join('.', 'checkpoints',  'best_checkpointT'+str(lt.item())+'.pth'))
        
    print('End of the Training. Saving final checkpoints.')

    state = dict(epoch=epochs, model=FFD.state_dict(),
                 optimizer=optimizerF.state_dict())
    torch.save(state, os.path.join('.', 'checkpoints',  'final_checkpointF.pth'))

    state = dict(epoch=epochs, model=TRT.state_dict(),
                 optimizer=optimizerT.state_dict())
    torch.save(state, os.path.join('.', 'checkpoints',  'final_checkpointT.pth'))

    writer.flush()
    writer.close()    
                
                

In [17]:
Training_set = dataset(file_path='c:\\Users\\ludandan\\Desktop\\CCS3\\dataset\\Training\\train.csv', language='en')
trainingloader = DataLoader(dataset=Training_set,batch_size=1,shuffle=True)

Val_set = dataset(file_path='c:\\Users\\ludandan\\Desktop\\CCS3\\dataset\\EvaluationSubtask1.txt', language='en')
valloader = DataLoader(dataset=Val_set,batch_size=1,shuffle=True)

In [18]:
ffd = FFD(padding=False).to(device)
optimizerF = torch.optim.Adam(filter(lambda p: p.requires_grad, ffd.parameters()),
                lr=0.1,
                betas=(0.9, 0.999),
                eps=1e-08,
                weight_decay=0,
                amsgrad=False)


trt = TRT(padding=False).to(device)
optimizerT = torch.optim.Adam(filter(lambda p: p.requires_grad, trt.parameters()),
                lr=0.1,
                betas=(0.9, 0.999),
                eps=1e-08,
                weight_decay=0,
                amsgrad=False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transfo

In [19]:
epochs = 400
lr = 0.1
batch_size = 1

train(ffd,trt,trainingloader,valloader,epochs,optimizerF,optimizerT)

                                       

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument tensors in method wrapper_cat)