In [5]:
import pandas as pd
import torch 
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset
import numpy as np
from tqdm import tqdm

!pip install --upgrade --force-reinstall --no-deps kaggle==1.5.8
!mkdir /root/.kaggle

with open("/root/.kaggle/kaggle.json", "w+") as f:
    f.write('{"username":"yimingxiao","key":"a7d197fbcf8334a668ce1ba47f1dc75d"}') # Put your kaggle username & key here

!chmod 600 /root/.kaggle/kaggle.json
!kaggle competitions download -c ubiquant-market-prediction



!unzip -q ubiquant-market-prediction.zip

!ls

In [6]:
path="/root/Ubiquant/"
originaldatapath = "train.csv"
targetpath ="mytrain.csv"
valpath = "myval.csv"
testpath ="mytest.csv"

df= pd.read_csv(path+originaldatapath)


offset1 = int(df.shape[0]*0.8)
offset2 = int(df.shape[0]*0.90)
train =df.iloc[:offset1,:]
val = df.iloc[offset1:offset2,:]
test = df.iloc[offset2:,:]

train.to_csv(path+targetpath)
val.to_csv(path+valpath)
test.to_csv(path+testpath)

df= pd.read_csv(path+valpath)

df

In [3]:
#两方面去考虑attention,首先,可以忽视事件数据,将所有Id一样的人聚集在一起,将他们视作一个sequence,将他们的target作为另一个,观察两个sequence的关系
#其次,可以以同一个时域做Attention,将这个时域中的数据作为一个切面sequence,然后跟他们的结果做相关性

In [7]:
def GetDataFromKaggle():
    print("successful get date")

In [8]:
def GetMaxFeature(x):
    id_size = x["investment_id"].nunique()
    time_id_size = x["time_id"].nunique()
    return id_size,time_id_size

In [9]:
class Ubiquant(Dataset):
    def __init__(self,datapath,context = 100,train_data = True,get_time_data = True):
        super(Ubiquant,self).__init__()
        self.get_time = get_time_data
        if(train_data):
            self.data = pd.read_csv(datapath)
            self.id_size,self.time_size = GetMaxFeature(self.data)
            if(get_time_data):#get an x,y base on time frame
                self.y = self.data.loc[:,"target"]
                self.x = self.data.drop(columns=["Unnamed: 0","row_id","time_id","target"])
                #self.x.loc[:,"time_id"] -=self.x.loc[0,"time_id"]
                #self.id = self.x[["time_id"]]
                self.id = self.x[["investment_id"]]
                self.x = self.x.drop(columns=["investment_id"])
                
                #self.x = self.x.drop(columns=["time_id"])
            else:#get (x,y) base on investment_id
                self.data = self.data.sort_values(by=["investment_id"])
                self.y = self.data.loc[:,"target"]
                self.x = self.data.drop(columns=["Unnamed: 0","row_id","time_id","target"])
                self.id = self.x[["investment_id"]]
                self.x = self.x.drop(columns=["investment_id"])
        
        #pad context before the data
        self.context = context
        self.pad_id =np.pad(self.id.to_numpy(),((context-1,0),(0,0)),constant_values = 0)
        self.pad_x =np.pad(self.x.to_numpy(),((context-1,0),(0,0)),constant_values = 0)
        self.y =self.y.to_numpy()
        
        
        self.pad_id =torch.LongTensor(self.pad_id)
        self.pad_x = torch.FloatTensor(self.pad_x)
        self.y = torch.FloatTensor(self.y)
        
        print(f"succesfully load data with shape x : {self.x.shape} | shape y :{self.y.shape} | get time data is: {get_time_data}")
        
                
            
    def __getitem__(self,index):
        context = self.context
        return self.pad_id[index:index+context],self.pad_x[index:index+context],self.y[index]
        
    def __len__(self):
        return self.x.shape[0]
    
    def GetSize(self):
        if(get_time_data):
            return self.time_size
        else:
            return self.id_size

            
            

In [10]:
def LoadData(config, get_time_data = True):
    trainSet =Ubiquant(path+targetpath,context = config["context"], get_time_data = get_time_data)
    valSet =Ubiquant(path+valpath, context = config["context"], get_time_data = get_time_data)
    testSet =Ubiquant(path+testpath, context = config["context"], get_time_data = get_time_data)
    trainLoader = DataLoader(trainSet,batch_size = config["batch_size"],shuffle =True,drop_last=True)#,num_workers=2)
    valLoader = DataLoader(valSet,batch_size = config["batch_size"],shuffle =True,drop_last=True)#,num_workers=2)
    testLoader = DataLoader(testSet,batch_size = config["batch_size"],shuffle =False,drop_last=False)
    return trainLoader,valLoader,testLoader

In [11]:
class SimpleAttention(nn.Module):
    def __init__(self,seq_len = 32, input_feature =311):
        super(SimpleAttention,self).__init__() 
        
        self.id_embedding = nn.Embedding(10000,12)
        self.cnn = nn.Conv1d(1,64,3,1,padding=1)
        self.att = nn.MultiheadAttention(input_feature,24)
       
       
        
        self.flat = nn.Flatten()
        self.MLP =nn.Sequential(
            nn.Linear(input_feature*64,512),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(512,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,8),
     
        )
        self.FinalOutPut = nn.Sequential(
            nn.BatchNorm1d(8),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(8,1)
        )
        self.criterion = nn.MSELoss()
        
    def forward(self,_id, f_features , Get_fea = False):
        invest_embedding = self.id_embedding(_id).squeeze(dim=2)
        
        _input =torch.cat((invest_embedding,f_features),axis =-1)
        _input = self.cnn(_input)
        #print(_input.shape)
        _input =_input.permute(1,0,2)
        feature, _ = self.att(_input,_input,_input)
        feature = feature.permute(1,0,2)
        #print(feature.shape)
        feature = self.flat(feature)
        #print(output.shape)
        feature = self.MLP(feature)
        output =  self.FinalOutPut(feature)
        if(Get_fea):
            return feature
        else:
            return output
    
    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [12]:
class DNN(nn.Module):
    def __init__(self,seq_len = 32,input_feature = 312):
        super(DNN,self).__init__()
        
        self.id_embedding = nn.Embedding(10000,12)
        self.dnn = nn.Sequential(
            nn.Conv1d(1,64,13,1,padding=6),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
            nn.Conv1d(64,128,13,1,padding=6),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
            nn.Conv1d(128,256,13,1,padding=6),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
            nn.Conv1d(256,64,13,1,padding=6),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
        )
        
        self.flat = nn.Flatten()
        self.MLP =nn.Sequential(
            nn.Linear(input_feature*64,64),
            nn.BatchNorm1d(64),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(64,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,512),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(512,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,8),
            nn.BatchNorm1d(8),
            nn.SiLU(),
           
        )
        self.FinalOutPut = nn.Sequential(
         
            nn.Linear(8,1)
        )
        self.criterion = nn.MSELoss()
        
    def forward(self,_id,f_features ,Get_fea = False):
        invest_embedding = self.id_embedding(_id).squeeze(dim=2)
        
        
        _input =torch.cat((invest_embedding,f_features),axis =-1)
        _input = self.dnn(_input)
        _input = self.flat(_input)
        feature = self.MLP(_input)
        output= self.FinalOutPut(feature)
        if(Get_fea):
            return feature
        else:
            return output

    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [13]:
class SimpleMLP(nn.Module):
    def __init__(self,seq_len = 32,input_feature = 312):
        super(SimpleMLP,self).__init__()
        
        self.id_embedding = nn.Embedding(10000,64)
        self.feature_embed = nn.Sequential(
            nn.Linear(300,256),
            nn.SiLU(),
            nn.Dropout(0.1),
            nn.Linear(256,256),
            nn.SiLU(),
            nn.Dropout(0.1),
            nn.Linear(256,256),
            nn.SiLU(),
            nn.Dropout(0.1),
            
        )
        self.flat = nn.Flatten()
        self.MLP =nn.Sequential(
            nn.Linear(320,512),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(0.1),
            nn.Linear(512,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,32),
            nn.BatchNorm1d(32),
            nn.SiLU(),
            nn.Dropout(0.4),
          
           
            
        )
        self.FinalOutPut = nn.Sequential(
           
            nn.Linear(32,1)
        )
        self.criterion = nn.MSELoss()
        
    def forward(self,_id,f_features, Get_fea=False ):
        invest_embedding = self.id_embedding(_id).squeeze(dim=2)
        invest_embedding = self.flat(invest_embedding)
        f_features =self.flat(f_features)
        f_features = self.feature_embed(f_features)
        _input =torch.cat((invest_embedding,f_features),axis =-1)

        
        #print(_input.shape)
        feature = self.MLP(_input)
        output = self.FinalOutPut(feature)
        if(Get_fea):
            return feature
        else:
            return output

    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [14]:
def get_corr(y,target):
    y,target = y.reshape(-1),target.reshape(-1)
    ymean,targetmean = torch.mean(y),torch.mean(target)
    
    vy = y-ymean
    vt = target-targetmean
    
    corr = torch.sum(vy*vt)/(torch.sqrt(torch.sum(vy**2))* torch.sqrt(torch.sum(vt**2)))
    return corr

In [15]:
def train(model,trainLoader,valLoader, optimizer, scheduler, config, model_ind):
    best_loss =1000
    best_corr = 0
    epochs =config["epoch"]
    for epoch in range(epochs):
        batch_bar = tqdm(total = len(trainLoader),dynamic_ncols = True,leave =False,position = 0,desc = "train")
        model.train()
        train_total_loss = 0
        for i,(x1,x2,y) in enumerate(trainLoader):
            optimizer.zero_grad()
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            y_hat = model(x1,x2)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
            train_total_loss+=float(loss.cpu())
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(train_total_loss / (i + 1))),
                lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
            loss.backward()
            optimizer.step()
            scheduler.step()
            batch_bar.update()
        batch_bar.close()
        
        val_total_loss = 0
        val_total_corr = 0
        batch_bar = tqdm(total = len(valLoader),dynamic_ncols = True,leave =False,position = 0,desc = "val")
        model.eval()
        for i,(x1,x2,y) in enumerate(valLoader):
            with torch.no_grad():
                x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
                y_hat = model(x1,x2)
                y_hat = y_hat.reshape(-1,1)
                y =y.reshape(-1,1)
                loss = model.cal_Loss(y_hat,y)
                

                corr = get_corr(y_hat,y)
                val_total_corr+=corr
               
                val_total_loss+=loss
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(val_total_loss / (i + 1))),
                
                corr="{:.04f}".format(float(val_total_corr / (i + 1))),
             )
            batch_bar.update()
        batch_bar.close()
        val_loss = float(val_total_loss/len(valLoader))
        corr = float(val_total_corr / len(valLoader))
        if(corr>best_corr):
            best_corr = corr
            torch.save(model.state_dict(),config["store_path"]+str(model_ind)+".pth")
            print("successfully save model")
        
        print(f"Epoch {epoch+1}/{epochs}: train loss {float(train_total_loss / len(trainLoader)):0.04f}, Learning Rate {optimizer.param_groups[0]['lr']:0.04f}, val loss{float(val_total_loss/len(valLoader)):0.04f}, corr {float(val_total_corr / (i + 1)):0.04f}")

In [16]:
def test(model,testLoader,config,model_ind):
    model.load_state_dict(torch.load(config["store_path"]+str(model_ind)+".pth"))
    model.eval()
    test_total_loss = 0
    test_total_corr = 0
    batch_bar = tqdm(total = len(testLoader),dynamic_ncols = True,leave =False,position = 0,desc = "Test")
    for i,(x1,x2,y) in enumerate(testLoader):
        with torch.no_grad():
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            y_hat = model(x1,x2)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
                

            corr = get_corr(y_hat,y)
            test_total_corr+=corr
            test_total_loss+=loss
        batch_bar.set_postfix(
                test_loss="{:.04f}".format(float(test_total_loss / (i + 1))),
                
                test_corr="{:.04f}".format(float(test_total_corr / (i + 1))),
             )
        batch_bar.update()
    batch_bar.close()
    print(f"test loss{float(test_total_loss/len(testLoader)):0.04f},test corr {float(test_total_corr / len(testLoader)):0.04f}")

In [17]:
trainLoader,valLoader,testLoader = LoadData(DNN_config,get_time_data = True)

succesfully load data with shape x : (2513128, 300) | shape y :torch.Size([2513128]) | get time data is: True
succesfully load data with shape x : (314141, 300) | shape y :torch.Size([314141]) | get time data is: True
succesfully load data with shape x : (314141, 300) | shape y :torch.Size([314141]) | get time data is: True


In [18]:
def Main(trainLoader,valLoader,testLoader):
    #set all parameter
   
    model = SimpleAttention(Att_config["context"],312).cuda()
    model.train()
    MLP_model = SimpleMLP(MLP_config["context"],312).cuda()
    MLP_model.train()
    DNN_model = DNN(DNN_config["context"],312).cuda()
    DNN_model.train()
    
    optimizer = getattr(torch.optim,Att_config["optimz"])(model.parameters(),lr =Att_config["learning_rate"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(trainLoader) * Att_config["epoch"]))
    
    
    MLP_optimizer = getattr(torch.optim,MLP_config["optimz"])(MLP_model.parameters(),lr =MLP_config["learning_rate"])
    MLP_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(MLP_optimizer, T_max=(len(trainLoader) * MLP_config["epoch"]))
    
    DNN_optimizer = getattr(torch.optim,DNN_config["optimz"])(DNN_model.parameters(),lr =DNN_config["learning_rate"])
    DNN_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(DNN_optimizer, T_max=(len(trainLoader) * DNN_config["epoch"]))
    #params
    num_para =0
    for p in DNN_model.parameters():
        num_para+=p.numel()
    print(f'Number of params : {num_para}')
    print("///////////Start Train ATT Model//////////")
    for i in range(5):
        print("_____Att" + str(i)+"_____")
        train(model,trainLoader,valLoader,optimizer,scheduler,Att_config,i)
    print("///////////Start Train MLP Model//////////")
    for i in range(5):
        print("_____MLP" + str(i)+"_____")
        train(MLP_model,trainLoader,valLoader,MLP_optimizer,MLP_scheduler,MLP_config,i)
    print("///////////Start Train DNN Model//////////")
    for i in range(5):
        print("_____DNN" + str(i)+"_____")
        train(DNN_model,trainLoader,valLoader,DNN_optimizer,DNN_scheduler,DNN_config,i)
    
    print("//////////Start test///////////")
    for i in range(3):
        print("----Attention-----")
        test(model,testLoader,Att_config,i)
        print("---- MLP Model-----")
        test(MLP_model,testLoader,MLP_config,i)
        print("----DNN Model-----")
        test(DNN_model,testLoader,DNN_config,i)


In [19]:
Att_config={"epoch" : 20,"batch_size":32768,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/Att_","context":1}

In [20]:
MLP_config={"epoch" :20,"batch_size":32768,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/Mlp_","context":1}

In [21]:
DNN_config={"epoch" :20,"batch_size":10000,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/Dnn_","context":1}

In [None]:
Main(trainLoader,valLoader,testLoader)

train:   0%|          | 0/251 [00:00<?, ?it/s]

Number of params : 2486753
///////////Start Train ATT Model//////////
_____Att0_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8431, Learning Rate 0.0199, val loss0.8059, corr 0.1266


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.8361, Learning Rate 0.0195, val loss0.8058, corr 0.1247


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8352, Learning Rate 0.0189, val loss0.8054, corr 0.1276


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 4/20: train loss 0.8342, Learning Rate 0.0181, val loss0.8052, corr 0.1302


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.8327, Learning Rate 0.0171, val loss0.8046, corr 0.1289


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8312, Learning Rate 0.0159, val loss0.8083, corr 0.1274


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8300, Learning Rate 0.0145, val loss0.8062, corr 0.1279


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8300, Learning Rate 0.0131, val loss0.8079, corr 0.1219


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8291, Learning Rate 0.0116, val loss0.8068, corr 0.1246


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.8279, Learning Rate 0.0100, val loss0.8095, corr 0.1281


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 11/20: train loss 0.8266, Learning Rate 0.0084, val loss0.8070, corr 0.1361


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8251, Learning Rate 0.0069, val loss0.8075, corr 0.1261


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8240, Learning Rate 0.0055, val loss0.8076, corr 0.1301


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.8224, Learning Rate 0.0041, val loss0.8097, corr 0.1238


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.8209, Learning Rate 0.0029, val loss0.8124, corr 0.1175


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8194, Learning Rate 0.0019, val loss0.8113, corr 0.1229


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8173, Learning Rate 0.0011, val loss0.8130, corr 0.1243


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8157, Learning Rate 0.0005, val loss0.8138, corr 0.1219


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8144, Learning Rate 0.0001, val loss0.8150, corr 0.1205


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.8135, Learning Rate 0.0000, val loss0.8162, corr 0.1207
_____Att1_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8136, Learning Rate 0.0001, val loss0.8160, corr 0.1210


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.8136, Learning Rate 0.0005, val loss0.8166, corr 0.1192


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 3/20: train loss 0.8140, Learning Rate 0.0011, val loss0.8176, corr 0.1177


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.8147, Learning Rate 0.0019, val loss0.8168, corr 0.1175


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 5/20: train loss 0.8149, Learning Rate 0.0029, val loss0.8138, corr 0.1226


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8156, Learning Rate 0.0041, val loss0.8166, corr 0.1188


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 7/20: train loss 0.8163, Learning Rate 0.0055, val loss0.8154, corr 0.1242


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8168, Learning Rate 0.0069, val loss0.8182, corr 0.1165


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8171, Learning Rate 0.0084, val loss0.8121, corr 0.1229


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 10/20: train loss 0.8177, Learning Rate 0.0100, val loss0.8124, corr 0.1273


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8182, Learning Rate 0.0116, val loss0.8175, corr 0.1107


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8180, Learning Rate 0.0131, val loss0.8136, corr 0.1262


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8176, Learning Rate 0.0145, val loss0.8145, corr 0.1232


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.8179, Learning Rate 0.0159, val loss0.8252, corr 0.1024


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.8187, Learning Rate 0.0171, val loss0.8107, corr 0.1245


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8190, Learning Rate 0.0181, val loss0.8164, corr 0.1190


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8190, Learning Rate 0.0189, val loss0.8114, corr 0.1212


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8190, Learning Rate 0.0195, val loss0.8152, corr 0.1221


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8188, Learning Rate 0.0199, val loss0.8142, corr 0.1223


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.8186, Learning Rate 0.0200, val loss0.8181, corr 0.1196
_____Att2_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8183, Learning Rate 0.0199, val loss0.8205, corr 0.1103


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.8173, Learning Rate 0.0195, val loss0.8148, corr 0.1225


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8161, Learning Rate 0.0189, val loss0.8111, corr 0.1227


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.8156, Learning Rate 0.0181, val loss0.8157, corr 0.1169


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.8140, Learning Rate 0.0171, val loss0.8175, corr 0.1166


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8128, Learning Rate 0.0159, val loss0.8160, corr 0.1140


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8106, Learning Rate 0.0145, val loss0.8150, corr 0.1184


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8094, Learning Rate 0.0131, val loss0.8201, corr 0.1185


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8080, Learning Rate 0.0116, val loss0.8227, corr 0.1106


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.8053, Learning Rate 0.0100, val loss0.8255, corr 0.1147


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8022, Learning Rate 0.0084, val loss0.8245, corr 0.1174


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.7992, Learning Rate 0.0069, val loss0.8220, corr 0.1176


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.7952, Learning Rate 0.0055, val loss0.8334, corr 0.1103


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7903, Learning Rate 0.0041, val loss0.8335, corr 0.1062


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7858, Learning Rate 0.0029, val loss0.8322, corr 0.1082


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.7818, Learning Rate 0.0019, val loss0.8345, corr 0.1051


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7776, Learning Rate 0.0011, val loss0.8383, corr 0.1031


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.7738, Learning Rate 0.0005, val loss0.8383, corr 0.1047


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.7716, Learning Rate 0.0001, val loss0.8451, corr 0.1023


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.7710, Learning Rate 0.0000, val loss0.8442, corr 0.1014
_____Att3_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.7706, Learning Rate 0.0001, val loss0.8453, corr 0.1012


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.7710, Learning Rate 0.0005, val loss0.8424, corr 0.1028


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 3/20: train loss 0.7710, Learning Rate 0.0011, val loss0.8412, corr 0.1021


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.7708, Learning Rate 0.0019, val loss0.8426, corr 0.1010


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 5/20: train loss 0.7711, Learning Rate 0.0029, val loss0.8386, corr 0.1033


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.7720, Learning Rate 0.0041, val loss0.8385, corr 0.1032


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.7737, Learning Rate 0.0055, val loss0.8393, corr 0.0961


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.7757, Learning Rate 0.0069, val loss0.8438, corr 0.0969


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.7780, Learning Rate 0.0084, val loss0.8386, corr 0.1026


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 10/20: train loss 0.7800, Learning Rate 0.0100, val loss0.8269, corr 0.1036


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.7867, Learning Rate 0.0116, val loss0.8288, corr 0.1027


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 12/20: train loss 0.7892, Learning Rate 0.0131, val loss0.8282, corr 0.1050


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 13/20: train loss 0.7947, Learning Rate 0.0145, val loss0.8223, corr 0.1088


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7977, Learning Rate 0.0159, val loss0.8232, corr 0.1028


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7980, Learning Rate 0.0171, val loss0.8290, corr 0.1035


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.7962, Learning Rate 0.0181, val loss0.8328, corr 0.1027


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7960, Learning Rate 0.0189, val loss0.8270, corr 0.1078


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.7950, Learning Rate 0.0195, val loss0.8276, corr 0.0928


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 19/20: train loss 0.7983, Learning Rate 0.0199, val loss0.8133, corr 0.1144


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.8016, Learning Rate 0.0200, val loss0.8203, corr 0.1136
_____Att4_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.7991, Learning Rate 0.0199, val loss0.8182, corr 0.1135


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.7984, Learning Rate 0.0195, val loss0.8304, corr 0.1092


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 3/20: train loss 0.7963, Learning Rate 0.0189, val loss0.8334, corr 0.1030


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.7942, Learning Rate 0.0181, val loss0.8220, corr 0.1068


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.7943, Learning Rate 0.0171, val loss0.8279, corr 0.1089


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.7918, Learning Rate 0.0159, val loss0.8329, corr 0.1054


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.7905, Learning Rate 0.0145, val loss0.8280, corr 0.1102


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.7873, Learning Rate 0.0131, val loss0.8412, corr 0.1010


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.7836, Learning Rate 0.0116, val loss0.8298, corr 0.1002


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.7799, Learning Rate 0.0100, val loss0.8400, corr 0.1025


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.7765, Learning Rate 0.0084, val loss0.8435, corr 0.1044


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.7728, Learning Rate 0.0069, val loss0.8416, corr 0.0961


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.7654, Learning Rate 0.0055, val loss0.8461, corr 0.0970


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7600, Learning Rate 0.0041, val loss0.8426, corr 0.0974


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7557, Learning Rate 0.0029, val loss0.8474, corr 0.0973


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.7515, Learning Rate 0.0019, val loss0.8449, corr 0.0981


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7472, Learning Rate 0.0011, val loss0.8489, corr 0.0993


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.7437, Learning Rate 0.0005, val loss0.8500, corr 0.0966


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.7429, Learning Rate 0.0001, val loss0.8525, corr 0.0960


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.7415, Learning Rate 0.0000, val loss0.8522, corr 0.0961
///////////Start Train MLP Model//////////
_____MLP0_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8421, Learning Rate 0.0199, val loss0.8047, corr 0.1337


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.8327, Learning Rate 0.0195, val loss0.8031, corr 0.1368


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 3/20: train loss 0.8300, Learning Rate 0.0189, val loss0.8083, corr 0.1249


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.8276, Learning Rate 0.0181, val loss0.8083, corr 0.1269


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.8246, Learning Rate 0.0171, val loss0.8057, corr 0.1333


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8219, Learning Rate 0.0159, val loss0.8071, corr 0.1327


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8189, Learning Rate 0.0145, val loss0.8139, corr 0.1190


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8155, Learning Rate 0.0131, val loss0.8064, corr 0.1331


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8121, Learning Rate 0.0116, val loss0.8127, corr 0.1236


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.8085, Learning Rate 0.0100, val loss0.8141, corr 0.1227


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8047, Learning Rate 0.0084, val loss0.8172, corr 0.1232


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8003, Learning Rate 0.0069, val loss0.8237, corr 0.1144


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.7959, Learning Rate 0.0055, val loss0.8243, corr 0.1116


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7918, Learning Rate 0.0041, val loss0.8278, corr 0.1095


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7875, Learning Rate 0.0029, val loss0.8291, corr 0.1054


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.7833, Learning Rate 0.0019, val loss0.8292, corr 0.1062


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7802, Learning Rate 0.0011, val loss0.8315, corr 0.1069


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.7778, Learning Rate 0.0005, val loss0.8326, corr 0.1056


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.7762, Learning Rate 0.0001, val loss0.8393, corr 0.1025


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.7761, Learning Rate 0.0000, val loss0.8361, corr 0.1029
_____MLP1_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.7761, Learning Rate 0.0001, val loss0.8364, corr 0.1031


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.7759, Learning Rate 0.0005, val loss0.8356, corr 0.1039


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 3/20: train loss 0.7762, Learning Rate 0.0011, val loss0.8333, corr 0.1029


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.7766, Learning Rate 0.0019, val loss0.8353, corr 0.0998


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.7768, Learning Rate 0.0029, val loss0.8345, corr 0.1036


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.7773, Learning Rate 0.0041, val loss0.8376, corr 0.0968


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.7776, Learning Rate 0.0055, val loss0.8332, corr 0.1022


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.7785, Learning Rate 0.0069, val loss0.8442, corr 0.0965


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.7793, Learning Rate 0.0084, val loss0.8371, corr 0.0995


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.7800, Learning Rate 0.0100, val loss0.8314, corr 0.0985


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 11/20: train loss 0.7807, Learning Rate 0.0116, val loss0.8407, corr 0.1065


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.7815, Learning Rate 0.0131, val loss0.8349, corr 0.0968


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.7821, Learning Rate 0.0145, val loss0.8276, corr 0.1060


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7822, Learning Rate 0.0159, val loss0.8271, corr 0.0976


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7826, Learning Rate 0.0171, val loss0.8344, corr 0.1034


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.7822, Learning Rate 0.0181, val loss0.8223, corr 0.1050


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7819, Learning Rate 0.0189, val loss0.8371, corr 0.0930


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.7811, Learning Rate 0.0195, val loss0.8387, corr 0.0909


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 19/20: train loss 0.7801, Learning Rate 0.0199, val loss0.8313, corr 0.1071


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.7790, Learning Rate 0.0200, val loss0.8334, corr 0.1011
_____MLP2_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.7770, Learning Rate 0.0199, val loss0.8376, corr 0.0995


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.7750, Learning Rate 0.0195, val loss0.8287, corr 0.0946


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 3/20: train loss 0.7735, Learning Rate 0.0189, val loss0.8353, corr 0.0957


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.7707, Learning Rate 0.0181, val loss0.8420, corr 0.0960


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 5/20: train loss 0.7685, Learning Rate 0.0171, val loss0.8380, corr 0.1029


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.7652, Learning Rate 0.0159, val loss0.8435, corr 0.0954


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.7620, Learning Rate 0.0145, val loss0.8361, corr 0.0983


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.7598, Learning Rate 0.0131, val loss0.8400, corr 0.0939


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.7556, Learning Rate 0.0116, val loss0.8433, corr 0.0869


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.7526, Learning Rate 0.0100, val loss0.8305, corr 0.0912


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.7477, Learning Rate 0.0084, val loss0.8482, corr 0.0907


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.7447, Learning Rate 0.0069, val loss0.8428, corr 0.0899


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.7411, Learning Rate 0.0055, val loss0.8501, corr 0.0962


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7374, Learning Rate 0.0041, val loss0.8558, corr 0.0917


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7347, Learning Rate 0.0029, val loss0.8593, corr 0.0910


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.7319, Learning Rate 0.0019, val loss0.8560, corr 0.0885


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7300, Learning Rate 0.0011, val loss0.8611, corr 0.0863


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.7284, Learning Rate 0.0005, val loss0.8630, corr 0.0872


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.7279, Learning Rate 0.0001, val loss0.8617, corr 0.0883


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.7268, Learning Rate 0.0000, val loss0.8605, corr 0.0882
_____MLP3_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.7271, Learning Rate 0.0001, val loss0.8633, corr 0.0886


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.7276, Learning Rate 0.0005, val loss0.8606, corr 0.0877


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 3/20: train loss 0.7281, Learning Rate 0.0011, val loss0.8604, corr 0.0872


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 4/20: train loss 0.7281, Learning Rate 0.0019, val loss0.8577, corr 0.0888


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.7292, Learning Rate 0.0029, val loss0.8600, corr 0.0881


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.7299, Learning Rate 0.0041, val loss0.8540, corr 0.0883


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.7315, Learning Rate 0.0055, val loss0.8519, corr 0.0880


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 8/20: train loss 0.7337, Learning Rate 0.0069, val loss0.8443, corr 0.0919


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.7355, Learning Rate 0.0084, val loss0.8681, corr 0.0826


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.7375, Learning Rate 0.0100, val loss0.8747, corr 0.0847


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.7398, Learning Rate 0.0116, val loss0.8512, corr 0.0879


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.7416, Learning Rate 0.0131, val loss0.8722, corr 0.0825


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.7443, Learning Rate 0.0145, val loss0.8485, corr 0.0851


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7452, Learning Rate 0.0159, val loss0.8661, corr 0.0886


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7471, Learning Rate 0.0171, val loss0.8541, corr 0.0911


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 16/20: train loss 0.7480, Learning Rate 0.0181, val loss0.8393, corr 0.0927


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7490, Learning Rate 0.0189, val loss0.8396, corr 0.0880


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 18/20: train loss 0.7488, Learning Rate 0.0195, val loss0.8504, corr 0.0935


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.7499, Learning Rate 0.0199, val loss0.8509, corr 0.0895


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.7489, Learning Rate 0.0200, val loss0.8453, corr 0.0902
_____MLP4_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.7483, Learning Rate 0.0199, val loss0.8475, corr 0.0832


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.7472, Learning Rate 0.0195, val loss0.8498, corr 0.0891


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.7463, Learning Rate 0.0189, val loss0.8490, corr 0.0898


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 4/20: train loss 0.7443, Learning Rate 0.0181, val loss0.8480, corr 0.0904


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.7424, Learning Rate 0.0171, val loss0.8600, corr 0.0830


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.7400, Learning Rate 0.0159, val loss0.8573, corr 0.0876


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.7371, Learning Rate 0.0145, val loss0.8517, corr 0.0836


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 8/20: train loss 0.7349, Learning Rate 0.0131, val loss0.8491, corr 0.0936


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.7318, Learning Rate 0.0116, val loss0.8710, corr 0.0886


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.7287, Learning Rate 0.0100, val loss0.8717, corr 0.0814


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.7252, Learning Rate 0.0084, val loss0.8678, corr 0.0846


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.7224, Learning Rate 0.0069, val loss0.8658, corr 0.0865


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.7193, Learning Rate 0.0055, val loss0.8589, corr 0.0859


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.7158, Learning Rate 0.0041, val loss0.8717, corr 0.0845


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.7138, Learning Rate 0.0029, val loss0.8632, corr 0.0843


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.7117, Learning Rate 0.0019, val loss0.8703, corr 0.0851


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.7107, Learning Rate 0.0011, val loss0.8670, corr 0.0845


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.7093, Learning Rate 0.0005, val loss0.8668, corr 0.0833


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.7082, Learning Rate 0.0001, val loss0.8660, corr 0.0843


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.7079, Learning Rate 0.0000, val loss0.8680, corr 0.0840
///////////Start Train DNN Model//////////
_____DNN0_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8508, Learning Rate 0.0199, val loss0.8067, corr 0.1175


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.8395, Learning Rate 0.0195, val loss0.8048, corr 0.1299


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8359, Learning Rate 0.0189, val loss0.8043, corr 0.1325


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 4/20: train loss 0.8334, Learning Rate 0.0181, val loss0.8041, corr 0.1326


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 5/20: train loss 0.8311, Learning Rate 0.0171, val loss0.8030, corr 0.1389


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8295, Learning Rate 0.0159, val loss0.8043, corr 0.1386


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8275, Learning Rate 0.0145, val loss0.8044, corr 0.1358


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8255, Learning Rate 0.0131, val loss0.8048, corr 0.1363


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8235, Learning Rate 0.0116, val loss0.8050, corr 0.1345


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.8217, Learning Rate 0.0100, val loss0.8061, corr 0.1365


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8199, Learning Rate 0.0084, val loss0.8059, corr 0.1366


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8179, Learning Rate 0.0069, val loss0.8060, corr 0.1358


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8159, Learning Rate 0.0055, val loss0.8061, corr 0.1340


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.8138, Learning Rate 0.0041, val loss0.8071, corr 0.1333


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.8121, Learning Rate 0.0029, val loss0.8074, corr 0.1364


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8102, Learning Rate 0.0019, val loss0.8078, corr 0.1344


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8085, Learning Rate 0.0011, val loss0.8100, corr 0.1315


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8073, Learning Rate 0.0005, val loss0.8101, corr 0.1334


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8072, Learning Rate 0.0001, val loss0.8100, corr 0.1329


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.8066, Learning Rate 0.0000, val loss0.8101, corr 0.1325
_____DNN1_____


train:   0%|          | 0/251 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8065, Learning Rate 0.0001, val loss0.8102, corr 0.1326


train:  88%|████████▊ | 221/251 [07:49<01:02,  2.10s/it, loss=0.8064, lr=0.0004]

In [20]:
class MixModel(nn.Module):
    def __init__(self):
        super(MixModel,self).__init__()
        self.MLP =nn.Sequential(
            nn.Linear(24,64),
            nn.BatchNorm1d(64),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(64,8),
            nn.BatchNorm1d(8),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(8,1),
        )
 
        self.criterion = nn.MSELoss()
        
    def forward(self,x):
        output = self.MLP(x)
        return output

    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [21]:
def mixtrain(model,trainLoader,valLoader, optimizer, scheduler, config):
    #load model
    att =SimpleAttention(Att_config["context"],312).cuda()
    att.load_state_dict(torch.load(Att_config['store_path']),strict=False)
    att.eval()
    
    mlp =SimpleMLP(MLP_config["context"],312).cuda()
    mlp.load_state_dict(torch.load(MLP_config['store_path']),strict=False)
    mlp.eval()
    
    dnn =DNN(DNN_config["context"],312).cuda()
    dnn.load_state_dict(torch.load(DNN_config['store_path']),strict=False)
    dnn.eval()
    
    best_corr =0
    best_loss =1000
    epochs =config["epoch"]
    for epoch in range(epochs):
        batch_bar = tqdm(total = len(trainLoader),dynamic_ncols = True,leave =False,position = 0,desc = "train")
        model.train()
        train_total_loss = 0
        for i,(x1,x2,y) in enumerate(trainLoader):
            optimizer.zero_grad()
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            f1 = att(x1,x2,Get_fea = True)
            f2 = mlp(x1,x2,Get_fea = True)
            f3 = dnn(x1,x2,Get_fea = True)
            #print(f'f1 shape: {f1.shape}')
            x = torch.cat((f1,f2),axis =-1)
            x = torch.cat((x,f3),axis =-1)
            #print(f'x shape: {x.shape}')
            y_hat = model(x)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
            train_total_loss+=float(loss.cpu())
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(train_total_loss / (i + 1))),
                lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
            loss.backward()
            optimizer.step()
            scheduler.step()
            batch_bar.update()
        batch_bar.close()
        
        val_total_loss = 0
        val_total_corr = 0
        batch_bar = tqdm(total = len(valLoader),dynamic_ncols = True,leave =False,position = 0,desc = "val")
        model.eval()
        for i,(x1,x2,y) in enumerate(valLoader):
            with torch.no_grad():
                x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
                f1 = att(x1,x2,Get_fea = True)
                f2 = mlp(x1,x2,Get_fea = True)
                f3 = dnn(x1,x2,Get_fea = True)
                
                x = torch.cat((f1,f2),axis =-1)
                x = torch.cat((x,f3),axis =-1)
                y_hat = model(x)
                y_hat = y_hat.reshape(-1,1)
                y =y.reshape(-1,1)
                loss = model.cal_Loss(y_hat,y)
                

                corr = get_corr(y_hat,y)
                val_total_corr+=corr
               
                val_total_loss+=loss
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(val_total_loss / (i + 1))),
                
                corr="{:.04f}".format(float(val_total_corr / (i + 1))),
             )
            batch_bar.update()
        batch_bar.close()
        val_loss = float(val_total_loss/len(valLoader))
        corr = float(val_total_corr/len(valLoader))
        if(corr>best_corr):
            best_corr = corr
            torch.save(model.state_dict(),config["store_path"])
            print("successfully save model")
        
        print(f"Epoch {epoch+1}/{epochs}: train loss {float(train_total_loss / len(trainLoader)):0.04f}, Learning Rate {optimizer.param_groups[0]['lr']:0.04f}, val loss{float(val_total_loss/len(valLoader)):0.04f}, corr {float(val_total_corr / (i + 1)):0.04f}")

In [22]:
def Mixtest(model,testLoader,config):
    #load model
    att =SimpleAttention(Att_config["context"],312).cuda()
    att.load_state_dict(torch.load(Att_config['store_path']),strict=False)
    att.eval()
    
    mlp =SimpleMLP(MLP_config["context"],312).cuda()
    mlp.load_state_dict(torch.load(MLP_config['store_path']),strict=False)
    mlp.eval()
    
    dnn =DNN(DNN_config["context"],312).cuda()
    dnn.load_state_dict(torch.load(DNN_config['store_path']),strict=False)
    dnn.eval()
    
    
    model.load_state_dict(torch.load(config["store_path"]))
    model.eval()
    test_total_loss = 0
    test_total_corr = 0
    batch_bar = tqdm(total = len(testLoader),dynamic_ncols = True,leave =False,position = 0,desc = "Test")
    for i,(x1,x2,y) in enumerate(testLoader):
        with torch.no_grad():
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            f1 = att(x1,x2,Get_fea = True)
            f2 = mlp(x1,x2,Get_fea = True)
            f3 = dnn(x1,x2,Get_fea = True)   
            x = torch.cat((f1,f2),axis =-1)
            x = torch.cat((x,f3),axis =-1)
            y_hat = model(x)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
                

            corr = get_corr(y_hat,y)
            test_total_corr+=corr
            test_total_loss+=loss
        batch_bar.set_postfix(
                test_loss="{:.04f}".format(float(test_total_loss / (i + 1))),
                
                test_corr="{:.04f}".format(float(test_total_corr / (i + 1))),
             )
        batch_bar.update()
    batch_bar.close()
    print(f"test loss{float(test_total_loss/len(testLoader)):0.04f},test corr {float(test_total_corr / len(testLoader)):0.04f}")

In [23]:
def mixMain():
    model = MixModel().cuda()
    model.train()
    optimizer = getattr(torch.optim,mix_config["optimz"])(model.parameters(),lr =mix_config["learning_rate"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(trainLoader) * mix_config["epoch"]))
    print("///////////Start Train mix Model//////////")
    mixtrain(model,trainLoader,valLoader,optimizer,scheduler,mix_config)
    print("//////////Start test///////////")
    print("----Attention-----")
    Mixtest(model,testLoader,mix_config)

In [24]:
mix_config={"epoch" : 20,"batch_size":10000,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/Mix.pth","context":1}

In [25]:
mixMain()

///////////Start Train mix Model//////////


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8175, Learning Rate 0.0199, val loss0.8028, corr 0.1137


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.8114, Learning Rate 0.0195, val loss0.8056, corr 0.1153


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8110, Learning Rate 0.0189, val loss0.8024, corr 0.1150


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.8107, Learning Rate 0.0181, val loss0.8109, corr 0.1141


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.8105, Learning Rate 0.0171, val loss0.8071, corr 0.1193


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8104, Learning Rate 0.0159, val loss0.8075, corr 0.1165


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8102, Learning Rate 0.0145, val loss0.8073, corr 0.1155


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8102, Learning Rate 0.0131, val loss0.8102, corr 0.1200


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8104, Learning Rate 0.0116, val loss0.8078, corr 0.1171


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.8099, Learning Rate 0.0100, val loss0.8073, corr 0.1147


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8099, Learning Rate 0.0084, val loss0.8060, corr 0.1149


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8096, Learning Rate 0.0069, val loss0.8075, corr 0.1136


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8095, Learning Rate 0.0055, val loss0.8091, corr 0.1135


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.8095, Learning Rate 0.0041, val loss0.8100, corr 0.1195


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.8094, Learning Rate 0.0029, val loss0.8091, corr 0.1175


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8091, Learning Rate 0.0019, val loss0.8068, corr 0.1152


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8092, Learning Rate 0.0011, val loss0.8082, corr 0.1170


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8092, Learning Rate 0.0005, val loss0.8058, corr 0.1179


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8090, Learning Rate 0.0001, val loss0.8090, corr 0.1164


                                                                                

Epoch 20/20: train loss 0.8091, Learning Rate 0.0000, val loss0.8062, corr 0.1174
//////////Start test///////////
----Attention-----


RuntimeError: Error(s) in loading state_dict for MixModel:
	Unexpected key(s) in state_dict: "id_embedding.weight", "cnn.weight", "cnn.bias", "att.in_proj_weight", "att.in_proj_bias", "att.out_proj.weight", "att.out_proj.bias", "FinalOutPut.0.weight", "FinalOutPut.0.bias", "FinalOutPut.0.running_mean", "FinalOutPut.0.running_var", "FinalOutPut.0.num_batches_tracked", "FinalOutPut.3.weight", "FinalOutPut.3.bias", "MLP.9.weight", "MLP.9.bias", "MLP.9.running_mean", "MLP.9.running_var", "MLP.9.num_batches_tracked", "MLP.12.weight", "MLP.12.bias". 
	size mismatch for MLP.0.weight: copying a param with shape torch.Size([512, 19968]) from checkpoint, the shape in current model is torch.Size([64, 24]).
	size mismatch for MLP.0.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.4.weight: copying a param with shape torch.Size([256, 512]) from checkpoint, the shape in current model is torch.Size([8, 64]).
	size mismatch for MLP.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.8.weight: copying a param with shape torch.Size([128, 256]) from checkpoint, the shape in current model is torch.Size([1, 8]).
	size mismatch for MLP.8.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([1]).