In [1]:
import pandas as pd
import torch 
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset
import numpy as np
from tqdm import tqdm

!pip install --upgrade --force-reinstall --no-deps kaggle==1.5.8
!mkdir /root/.kaggle

with open("/root/.kaggle/kaggle.json", "w+") as f:
    f.write('{"username":"yimingxiao","key":"a7d197fbcf8334a668ce1ba47f1dc75d"}') # Put your kaggle username & key here

!chmod 600 /root/.kaggle/kaggle.json
!kaggle competitions download -c ubiquant-market-prediction



!unzip -q ubiquant-market-prediction.zip

!ls

In [2]:
path="/root/Ubiquant/"
originaldatapath = "train.csv"
targetpath ="mytrain.csv"
valpath = "myval.csv"
testpath ="mytest.csv"

df= pd.read_csv(path+originaldatapath)


offset1 = int(df.shape[0]*0.9)
offset2 = int(df.shape[0]*0.95)
train =df.iloc[:offset1,:]
val = df.iloc[offset1:offset2,:]
test = df.iloc[offset2:,:]

train.to_csv(path+targetpath)
val.to_csv(path+valpath)
test.to_csv(path+testpath)

df= pd.read_csv(path+valpath)

df

In [3]:
#两方面去考虑attention,首先,可以忽视事件数据,将所有Id一样的人聚集在一起,将他们视作一个sequence,将他们的target作为另一个,观察两个sequence的关系
#其次,可以以同一个时域做Attention,将这个时域中的数据作为一个切面sequence,然后跟他们的结果做相关性

In [3]:
def GetDataFromKaggle():
    print("successful get date")

In [4]:
def GetMaxFeature(x):
    id_size = x["investment_id"].nunique()
    time_id_size = x["time_id"].nunique()
    return id_size,time_id_size

In [5]:
class Ubiquant(Dataset):
    def __init__(self,datapath,context = 100,train_data = True,get_time_data = True):
        super(Ubiquant,self).__init__()
        self.get_time = get_time_data
        if(train_data):
            self.data = pd.read_csv(datapath)
            self.id_size,self.time_size = GetMaxFeature(self.data)
            if(get_time_data):#get an x,y base on time frame
                self.y = self.data.loc[:,"target"]
                self.x = self.data.drop(columns=["Unnamed: 0","row_id","time_id","target"])
                #self.x.loc[:,"time_id"] -=self.x.loc[0,"time_id"]
                #self.id = self.x[["time_id"]]
                self.id = self.x[["investment_id"]]
                self.x = self.x.drop(columns=["investment_id"])
                
                #self.x = self.x.drop(columns=["time_id"])
            else:#get (x,y) base on investment_id
                self.data = self.data.sort_values(by=["investment_id"])
                self.y = self.data.loc[:,"target"]
                self.x = self.data.drop(columns=["Unnamed: 0","row_id","time_id","target"])
                self.id = self.x[["investment_id"]]
                self.x = self.x.drop(columns=["investment_id"])
        
        #pad context before the data
        self.context = context
        self.pad_id =np.pad(self.id.to_numpy(),((context-1,0),(0,0)),constant_values = 0)
        self.pad_x =np.pad(self.x.to_numpy(),((context-1,0),(0,0)),constant_values = 0)
        self.y =self.y.to_numpy()
        
        
        self.pad_id =torch.LongTensor(self.pad_id)
        self.pad_x = torch.FloatTensor(self.pad_x)
        self.y = torch.FloatTensor(self.y)
        
        print(f"succesfully load data with shape x : {self.x.shape} | shape y :{self.y.shape} | get time data is: {get_time_data}")
        
                
            
    def __getitem__(self,index):
        context = self.context
        return self.pad_id[index:index+context],self.pad_x[index:index+context],self.y[index]
        
    def __len__(self):
        return self.x.shape[0]
    
    def GetSize(self):
        if(get_time_data):
            return self.time_size
        else:
            return self.id_size

            
            

In [6]:
def LoadData(config, get_time_data = True):
    trainSet =Ubiquant(path+targetpath,context = config["context"], get_time_data = get_time_data)
    valSet =Ubiquant(path+valpath, context = config["context"], get_time_data = get_time_data)
    testSet =Ubiquant(path+testpath, context = config["context"], get_time_data = get_time_data)
    trainLoader = DataLoader(trainSet,batch_size = config["batch_size"],shuffle =True,drop_last=True)#,num_workers=2)
    valLoader = DataLoader(valSet,batch_size = config["batch_size"],shuffle =True,drop_last=True)#,num_workers=2)
    testLoader = DataLoader(testSet,batch_size = config["batch_size"],shuffle =False,drop_last=False)
    return trainLoader,valLoader,testLoader

In [7]:
class SimpleAttention(nn.Module):
    def __init__(self,seq_len = 32, input_feature =311):
        super(SimpleAttention,self).__init__() 
        
        self.id_embedding = nn.Embedding(10000,12)
        self.cnn = nn.Conv1d(1,64,3,1,padding=1)
        self.att = nn.MultiheadAttention(input_feature,8)
        
       
        
        self.flat = nn.Flatten()
        self.MLP =nn.Sequential(
            nn.Linear(input_feature*64,512),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(512,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,8),
     
        )
        self.FinalOutPut = nn.Sequential(
            nn.BatchNorm1d(8),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(8,1)
        )
        self.criterion = nn.MSELoss()
        
    def forward(self,_id, f_features , Get_fea = False):
        invest_embedding = self.id_embedding(_id).squeeze(dim=2)
        
        _input =torch.cat((invest_embedding,f_features),axis =-1)
        _input = self.cnn(_input)
        #print(_input.shape)
        _input =_input.permute(1,0,2)
        feature, _ = self.att(_input,_input,_input)
        feature = feature.permute(1,0,2)
        #print(feature.shape)
        feature = self.flat(feature)
        #print(output.shape)
        feature = self.MLP(feature)
        output =  self.FinalOutPut(feature)
        if(Get_fea):
            return feature
        else:
            return output
    
    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [8]:
class DNN(nn.Module):
    def __init__(self,seq_len = 32,input_feature = 312):
        super(DNN,self).__init__()
        
        self.id_embedding = nn.Embedding(10000,12)
        self.dnn = nn.Sequential(
            nn.Conv1d(1,256,15,1,padding=7),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
            nn.Conv1d(256,64,3,1,padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
            nn.Conv1d(64,64,3,1,padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
            nn.Conv1d(64,64,3,1,padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.4),
        )
        
        self.flat = nn.Flatten()
        self.MLP =nn.Sequential(
            nn.Linear(input_feature*64,64),
            nn.BatchNorm1d(64),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(64,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,512),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(512,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,8),
            nn.BatchNorm1d(8),
            nn.SiLU(),
           
        )
        self.FinalOutPut = nn.Sequential(
         
            nn.Linear(8,1)
        )
        self.criterion = nn.MSELoss()
        
    def forward(self,_id,f_features ,Get_fea = False):
        invest_embedding = self.id_embedding(_id).squeeze(dim=2)
        
        
        _input =torch.cat((invest_embedding,f_features),axis =-1)
        _input = self.dnn(_input)
        _input = self.flat(_input)
        feature = self.MLP(_input)
        output= self.FinalOutPut(feature)
        if(Get_fea):
            return feature
        else:
            return output

    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [9]:
class SimpleMLP(nn.Module):
    def __init__(self,seq_len = 32,input_feature = 311):
        super(SimpleMLP,self).__init__()
        
        self.id_embedding = nn.Embedding(10000,12)
        self.flat = nn.Flatten()
        self.MLP =nn.Sequential(
            nn.Linear(input_feature*seq_len,64),
            nn.BatchNorm1d(64),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(64,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,512),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(512,256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(256,128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(128,8),
            nn.BatchNorm1d(8),
            nn.SiLU(),
            
        )
        self.FinalOutPut = nn.Sequential(
           
            nn.Linear(8,1)
        )
        self.criterion = nn.MSELoss()
        
    def forward(self,_id,f_features, Get_fea=False ):
        invest_embedding = self.id_embedding(_id).squeeze(dim=2)
     
        _input =torch.cat((invest_embedding,f_features),axis =-1)
  
        _input = self.flat(_input)
        #print(_input.shape)
        feature = self.MLP(_input)
        output = self.FinalOutPut(feature)
        if(Get_fea):
            return feature
        else:
            return output

    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [10]:
def get_corr(y,target):
    y,target = y.reshape(-1),target.reshape(-1)
    ymean,targetmean = torch.mean(y),torch.mean(target)
    
    vy = y-ymean
    vt = target-targetmean
    
    corr = torch.sum(vy*vt)/(torch.sqrt(torch.sum(vy**2))* torch.sqrt(torch.sum(vt**2)))
    return corr

In [11]:
def train(model,trainLoader,valLoader, optimizer, scheduler, config):
    best_loss =1000
    best_corr = 0
    epochs =config["epoch"]
    for epoch in range(epochs):
        batch_bar = tqdm(total = len(trainLoader),dynamic_ncols = True,leave =False,position = 0,desc = "train")
        model.train()
        train_total_loss = 0
        for i,(x1,x2,y) in enumerate(trainLoader):
            optimizer.zero_grad()
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            y_hat = model(x1,x2)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
            train_total_loss+=float(loss.cpu())
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(train_total_loss / (i + 1))),
                lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
            loss.backward()
            optimizer.step()
            scheduler.step()
            batch_bar.update()
        batch_bar.close()
        
        val_total_loss = 0
        val_total_corr = 0
        batch_bar = tqdm(total = len(valLoader),dynamic_ncols = True,leave =False,position = 0,desc = "val")
        model.eval()
        for i,(x1,x2,y) in enumerate(valLoader):
            with torch.no_grad():
                x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
                y_hat = model(x1,x2)
                y_hat = y_hat.reshape(-1,1)
                y =y.reshape(-1,1)
                loss = model.cal_Loss(y_hat,y)
                

                corr = get_corr(y_hat,y)
                val_total_corr+=corr
               
                val_total_loss+=loss
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(val_total_loss / (i + 1))),
                
                corr="{:.04f}".format(float(val_total_corr / (i + 1))),
             )
            batch_bar.update()
        batch_bar.close()
        val_loss = float(val_total_loss/len(valLoader))
        corr = float(val_total_corr / len(valLoader))
        if(corr>best_corr):
            best_corr = corr
            torch.save(model.state_dict(),config["store_path"])
            print("successfully save model")
        
        print(f"Epoch {epoch+1}/{epochs}: train loss {float(train_total_loss / len(trainLoader)):0.04f}, Learning Rate {optimizer.param_groups[0]['lr']:0.04f}, val loss{float(val_total_loss/len(valLoader)):0.04f}, corr {float(val_total_corr / (i + 1)):0.04f}")

In [12]:
def test(model,testLoader,config):
    model.load_state_dict(torch.load(config["store_path"]))
    model.eval()
    test_total_loss = 0
    test_total_corr = 0
    batch_bar = tqdm(total = len(testLoader),dynamic_ncols = True,leave =False,position = 0,desc = "Test")
    for i,(x1,x2,y) in enumerate(testLoader):
        with torch.no_grad():
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            y_hat = model(x1,x2)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
                

            corr = get_corr(y_hat,y)
            test_total_corr+=corr
            test_total_loss+=loss
        batch_bar.set_postfix(
                test_loss="{:.04f}".format(float(test_total_loss / (i + 1))),
                
                test_corr="{:.04f}".format(float(test_total_corr / (i + 1))),
             )
        batch_bar.update()
    batch_bar.close()
    print(f"test loss{float(test_total_loss/len(testLoader)):0.04f},test corr {float(test_total_corr / len(testLoader)):0.04f}")

In [18]:
trainLoader,valLoader,testLoader = LoadData(DNN_config,get_time_data = True)

succesfully load data with shape x : (2827269, 300) | shape y :torch.Size([2827269]) | get time data is: True
succesfully load data with shape x : (157070, 300) | shape y :torch.Size([157070]) | get time data is: True
succesfully load data with shape x : (157071, 300) | shape y :torch.Size([157071]) | get time data is: True


In [14]:
def Main(trainLoader,valLoader,testLoader):
    #set all parameter
   
    model = SimpleAttention(Att_config["context"],312).cuda()
    model.train()
    MLP_model = SimpleMLP(MLP_config["context"],312).cuda()
    MLP_model.train()
    DNN_model = DNN(DNN_config["context"],312).cuda()
    DNN_model.train()
    
    optimizer = getattr(torch.optim,Att_config["optimz"])(model.parameters(),lr =Att_config["learning_rate"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(trainLoader) * Att_config["epoch"]))
    
    
    MLP_optimizer = getattr(torch.optim,MLP_config["optimz"])(MLP_model.parameters(),lr =MLP_config["learning_rate"])
    MLP_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(MLP_optimizer, T_max=(len(trainLoader) * MLP_config["epoch"]))
    
    DNN_optimizer = getattr(torch.optim,DNN_config["optimz"])(DNN_model.parameters(),lr =DNN_config["learning_rate"])
    DNN_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(DNN_optimizer, T_max=(len(trainLoader) * DNN_config["epoch"]))
    #params
    num_para =0
    for p in DNN_model.parameters():
        num_para+=p.numel()
    print(f'Number of params : {num_para}')
    print("///////////Start Train ATT Model//////////")
    train(model,trainLoader,valLoader,optimizer,scheduler,Att_config)
    print("///////////Start Train MLP Model//////////")
    train(MLP_model,trainLoader,valLoader,MLP_optimizer,MLP_scheduler,MLP_config)
    print("///////////Start Train DNN Model//////////")
    train(DNN_model,trainLoader,valLoader,DNN_optimizer,DNN_scheduler,DNN_config)
    
    print("//////////Start test///////////")
    print("----Attention-----")
    test(model,testLoader,Att_config)
    print("---- MLP Model-----")
    test(MLP_model,testLoader,MLP_config)
    print("----DNN Model-----")
    test(DNN_model,testLoader,DNN_config)
    

In [15]:
Att_config={"epoch" : 20,"batch_size":32768,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/Att.pth","context":1}

In [16]:
MLP_config={"epoch" :20,"batch_size":32768,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/MLP.pth","context":1}

In [17]:
DNN_config={"epoch" :20,"batch_size":10000,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/DNN.pth","context":1}

In [19]:
Main(trainLoader,valLoader,testLoader)

train:   0%|          | 0/282 [00:00<?, ?it/s]

Number of params : 1817825
///////////Start Train ATT Model//////////


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8418, Learning Rate 0.0199, val loss0.8044, corr 0.0902


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.8343, Learning Rate 0.0195, val loss0.8052, corr 0.0858


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8336, Learning Rate 0.0189, val loss0.8076, corr 0.1015


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.8322, Learning Rate 0.0181, val loss0.8052, corr 0.0980


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 5/20: train loss 0.8309, Learning Rate 0.0171, val loss0.8031, corr 0.1069


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8291, Learning Rate 0.0159, val loss0.8024, corr 0.1015


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8288, Learning Rate 0.0145, val loss0.8050, corr 0.0996


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 8/20: train loss 0.8273, Learning Rate 0.0131, val loss0.8040, corr 0.1101


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8272, Learning Rate 0.0116, val loss0.8103, corr 0.0952


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.8263, Learning Rate 0.0100, val loss0.8050, corr 0.1055


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8246, Learning Rate 0.0084, val loss0.8041, corr 0.1091


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 12/20: train loss 0.8235, Learning Rate 0.0069, val loss0.8019, corr 0.1118


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8222, Learning Rate 0.0055, val loss0.8046, corr 0.1064


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 14/20: train loss 0.8206, Learning Rate 0.0041, val loss0.8051, corr 0.1140


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 15/20: train loss 0.8190, Learning Rate 0.0029, val loss0.8031, corr 0.1207


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8176, Learning Rate 0.0019, val loss0.8057, corr 0.1132


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8157, Learning Rate 0.0011, val loss0.8071, corr 0.1102


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8138, Learning Rate 0.0005, val loss0.8092, corr 0.1130


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8122, Learning Rate 0.0001, val loss0.8074, corr 0.1098


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.8117, Learning Rate 0.0000, val loss0.8089, corr 0.1094
///////////Start Train MLP Model//////////


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8412, Learning Rate 0.0199, val loss0.8076, corr 0.0903


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.8325, Learning Rate 0.0195, val loss0.8047, corr 0.0928


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8299, Learning Rate 0.0189, val loss0.8075, corr 0.0977


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 4/20: train loss 0.8279, Learning Rate 0.0181, val loss0.8077, corr 0.1019


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 5/20: train loss 0.8260, Learning Rate 0.0171, val loss0.8031, corr 0.1090


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8242, Learning Rate 0.0159, val loss0.8046, corr 0.1077


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8226, Learning Rate 0.0145, val loss0.8082, corr 0.1083


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8210, Learning Rate 0.0131, val loss0.8056, corr 0.1045


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8196, Learning Rate 0.0116, val loss0.8088, corr 0.1059


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 10/20: train loss 0.8185, Learning Rate 0.0100, val loss0.8064, corr 0.1094


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8172, Learning Rate 0.0084, val loss0.8062, corr 0.1038


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8159, Learning Rate 0.0069, val loss0.8085, corr 0.1061


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8148, Learning Rate 0.0055, val loss0.8083, corr 0.1040


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.8136, Learning Rate 0.0041, val loss0.8086, corr 0.1057


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.8128, Learning Rate 0.0029, val loss0.8076, corr 0.1074


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8117, Learning Rate 0.0019, val loss0.8090, corr 0.1072


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8112, Learning Rate 0.0011, val loss0.8118, corr 0.1046


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8106, Learning Rate 0.0005, val loss0.8095, corr 0.1053


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8104, Learning Rate 0.0001, val loss0.8124, corr 0.1052


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 20/20: train loss 0.8100, Learning Rate 0.0000, val loss0.8133, corr 0.1058
///////////Start Train DNN Model//////////


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8420, Learning Rate 0.0199, val loss0.8060, corr 0.0957


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 2/20: train loss 0.8343, Learning Rate 0.0195, val loss0.8058, corr 0.0978


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8316, Learning Rate 0.0189, val loss0.8029, corr 0.1028


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 4/20: train loss 0.8295, Learning Rate 0.0181, val loss0.8033, corr 0.1087


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 5/20: train loss 0.8277, Learning Rate 0.0171, val loss0.8038, corr 0.1156


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 6/20: train loss 0.8259, Learning Rate 0.0159, val loss0.8015, corr 0.1160


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 7/20: train loss 0.8243, Learning Rate 0.0145, val loss0.8022, corr 0.1194


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8229, Learning Rate 0.0131, val loss0.8013, corr 0.1175


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8213, Learning Rate 0.0116, val loss0.8025, corr 0.1188


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 10/20: train loss 0.8196, Learning Rate 0.0100, val loss0.8001, corr 0.1221


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8180, Learning Rate 0.0084, val loss0.8017, corr 0.1212


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8165, Learning Rate 0.0069, val loss0.8051, corr 0.1149


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8154, Learning Rate 0.0055, val loss0.8033, corr 0.1180


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.8136, Learning Rate 0.0041, val loss0.8053, corr 0.1155


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.8121, Learning Rate 0.0029, val loss0.8043, corr 0.1197


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8110, Learning Rate 0.0019, val loss0.8077, corr 0.1175


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8098, Learning Rate 0.0011, val loss0.8025, corr 0.1149


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8090, Learning Rate 0.0005, val loss0.8042, corr 0.1189


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8084, Learning Rate 0.0001, val loss0.8058, corr 0.1182


Test:   0%|          | 0/16 [00:00<?, ?it/s]                                    

Epoch 20/20: train loss 0.8079, Learning Rate 0.0000, val loss0.8036, corr 0.1190
//////////Start test///////////
----Attention-----


Test:   6%|▋         | 1/16 [00:00<00:02,  6.98it/s, test_corr=0.1468, test_loss=0.8170] 

test loss0.8068,test corr 0.1443
---- MLP Model-----


Test:   0%|          | 0/16 [00:00<?, ?it/s]                                             

test loss0.8077,test corr 0.1426
----DNN Model-----


                                                                                         

test loss0.8067,test corr 0.1462




In [20]:
class MixModel(nn.Module):
    def __init__(self):
        super(MixModel,self).__init__()
        self.MLP =nn.Sequential(
            nn.Linear(24,64),
            nn.BatchNorm1d(64),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(64,8),
            nn.BatchNorm1d(8),
            nn.SiLU(),
            nn.Dropout(0.4),
            nn.Linear(8,1),
        )
 
        self.criterion = nn.MSELoss()
        
    def forward(self,x):
        output = self.MLP(x)
        return output

    def cal_Loss(self,y_hat,y):
        return self.criterion(y_hat,y)

In [21]:
def mixtrain(model,trainLoader,valLoader, optimizer, scheduler, config):
    #load model
    att =SimpleAttention(Att_config["context"],312).cuda()
    att.load_state_dict(torch.load(Att_config['store_path']),strict=False)
    att.eval()
    
    mlp =SimpleMLP(MLP_config["context"],312).cuda()
    mlp.load_state_dict(torch.load(MLP_config['store_path']),strict=False)
    mlp.eval()
    
    dnn =DNN(DNN_config["context"],312).cuda()
    dnn.load_state_dict(torch.load(DNN_config['store_path']),strict=False)
    dnn.eval()
    
    best_corr =0
    best_loss =1000
    epochs =config["epoch"]
    for epoch in range(epochs):
        batch_bar = tqdm(total = len(trainLoader),dynamic_ncols = True,leave =False,position = 0,desc = "train")
        model.train()
        train_total_loss = 0
        for i,(x1,x2,y) in enumerate(trainLoader):
            optimizer.zero_grad()
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            f1 = att(x1,x2,Get_fea = True)
            f2 = mlp(x1,x2,Get_fea = True)
            f3 = dnn(x1,x2,Get_fea = True)
            #print(f'f1 shape: {f1.shape}')
            x = torch.cat((f1,f2),axis =-1)
            x = torch.cat((x,f3),axis =-1)
            #print(f'x shape: {x.shape}')
            y_hat = model(x)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
            train_total_loss+=float(loss.cpu())
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(train_total_loss / (i + 1))),
                lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
            loss.backward()
            optimizer.step()
            scheduler.step()
            batch_bar.update()
        batch_bar.close()
        
        val_total_loss = 0
        val_total_corr = 0
        batch_bar = tqdm(total = len(valLoader),dynamic_ncols = True,leave =False,position = 0,desc = "val")
        model.eval()
        for i,(x1,x2,y) in enumerate(valLoader):
            with torch.no_grad():
                x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
                f1 = att(x1,x2,Get_fea = True)
                f2 = mlp(x1,x2,Get_fea = True)
                f3 = dnn(x1,x2,Get_fea = True)
                
                x = torch.cat((f1,f2),axis =-1)
                x = torch.cat((x,f3),axis =-1)
                y_hat = model(x)
                y_hat = y_hat.reshape(-1,1)
                y =y.reshape(-1,1)
                loss = model.cal_Loss(y_hat,y)
                

                corr = get_corr(y_hat,y)
                val_total_corr+=corr
               
                val_total_loss+=loss
            batch_bar.set_postfix(
                loss="{:.04f}".format(float(val_total_loss / (i + 1))),
                
                corr="{:.04f}".format(float(val_total_corr / (i + 1))),
             )
            batch_bar.update()
        batch_bar.close()
        val_loss = float(val_total_loss/len(valLoader))
        corr = float(val_total_corr/len(valLoader))
        if(corr>best_corr):
            best_corr = corr
            torch.save(model.state_dict(),config["store_path"])
            print("successfully save model")
        
        print(f"Epoch {epoch+1}/{epochs}: train loss {float(train_total_loss / len(trainLoader)):0.04f}, Learning Rate {optimizer.param_groups[0]['lr']:0.04f}, val loss{float(val_total_loss/len(valLoader)):0.04f}, corr {float(val_total_corr / (i + 1)):0.04f}")

In [22]:
def Mixtest(model,testLoader,config):
    #load model
    att =SimpleAttention(Att_config["context"],312).cuda()
    att.load_state_dict(torch.load(Att_config['store_path']),strict=False)
    att.eval()
    
    mlp =SimpleMLP(MLP_config["context"],312).cuda()
    mlp.load_state_dict(torch.load(MLP_config['store_path']),strict=False)
    mlp.eval()
    
    dnn =DNN(DNN_config["context"],312).cuda()
    dnn.load_state_dict(torch.load(DNN_config['store_path']),strict=False)
    dnn.eval()
    
    
    model.load_state_dict(torch.load(config["store_path"]))
    model.eval()
    test_total_loss = 0
    test_total_corr = 0
    batch_bar = tqdm(total = len(testLoader),dynamic_ncols = True,leave =False,position = 0,desc = "Test")
    for i,(x1,x2,y) in enumerate(testLoader):
        with torch.no_grad():
            x1,x2,y = x1.cuda(),x2.cuda(),y.cuda()
            f1 = att(x1,x2,Get_fea = True)
            f2 = mlp(x1,x2,Get_fea = True)
            f3 = dnn(x1,x2,Get_fea = True)   
            x = torch.cat((f1,f2),axis =-1)
            x = torch.cat((x,f3),axis =-1)
            y_hat = model(x)
            y_hat = y_hat.reshape(-1,1)
            y =y.reshape(-1,1)
            loss = model.cal_Loss(y_hat,y)
                

            corr = get_corr(y_hat,y)
            test_total_corr+=corr
            test_total_loss+=loss
        batch_bar.set_postfix(
                test_loss="{:.04f}".format(float(test_total_loss / (i + 1))),
                
                test_corr="{:.04f}".format(float(test_total_corr / (i + 1))),
             )
        batch_bar.update()
    batch_bar.close()
    print(f"test loss{float(test_total_loss/len(testLoader)):0.04f},test corr {float(test_total_corr / len(testLoader)):0.04f}")

In [23]:
def mixMain():
    model = MixModel().cuda()
    model.train()
    optimizer = getattr(torch.optim,mix_config["optimz"])(model.parameters(),lr =mix_config["learning_rate"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(trainLoader) * mix_config["epoch"]))
    print("///////////Start Train mix Model//////////")
    mixtrain(model,trainLoader,valLoader,optimizer,scheduler,mix_config)
    print("//////////Start test///////////")
    print("----Attention-----")
    Mixtest(model,testLoader,Att_config)

In [24]:
mix_config={"epoch" : 20,"batch_size":10000,"learning_rate":0.02,"optimz":"Adam","store_path":"/root/Ubiquant/Mix.pth","context":1}

In [25]:
mixMain()

///////////Start Train mix Model//////////


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 1/20: train loss 0.8175, Learning Rate 0.0199, val loss0.8028, corr 0.1137


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 2/20: train loss 0.8114, Learning Rate 0.0195, val loss0.8056, corr 0.1153


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

successfully save model
Epoch 3/20: train loss 0.8110, Learning Rate 0.0189, val loss0.8024, corr 0.1150


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 4/20: train loss 0.8107, Learning Rate 0.0181, val loss0.8109, corr 0.1141


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 5/20: train loss 0.8105, Learning Rate 0.0171, val loss0.8071, corr 0.1193


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 6/20: train loss 0.8104, Learning Rate 0.0159, val loss0.8075, corr 0.1165


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 7/20: train loss 0.8102, Learning Rate 0.0145, val loss0.8073, corr 0.1155


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 8/20: train loss 0.8102, Learning Rate 0.0131, val loss0.8102, corr 0.1200


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 9/20: train loss 0.8104, Learning Rate 0.0116, val loss0.8078, corr 0.1171


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 10/20: train loss 0.8099, Learning Rate 0.0100, val loss0.8073, corr 0.1147


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 11/20: train loss 0.8099, Learning Rate 0.0084, val loss0.8060, corr 0.1149


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 12/20: train loss 0.8096, Learning Rate 0.0069, val loss0.8075, corr 0.1136


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 13/20: train loss 0.8095, Learning Rate 0.0055, val loss0.8091, corr 0.1135


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 14/20: train loss 0.8095, Learning Rate 0.0041, val loss0.8100, corr 0.1195


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 15/20: train loss 0.8094, Learning Rate 0.0029, val loss0.8091, corr 0.1175


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 16/20: train loss 0.8091, Learning Rate 0.0019, val loss0.8068, corr 0.1152


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 17/20: train loss 0.8092, Learning Rate 0.0011, val loss0.8082, corr 0.1170


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 18/20: train loss 0.8092, Learning Rate 0.0005, val loss0.8058, corr 0.1179


train:   0%|          | 0/282 [00:00<?, ?it/s]                                  

Epoch 19/20: train loss 0.8090, Learning Rate 0.0001, val loss0.8090, corr 0.1164


                                                                                

Epoch 20/20: train loss 0.8091, Learning Rate 0.0000, val loss0.8062, corr 0.1174
//////////Start test///////////
----Attention-----


RuntimeError: Error(s) in loading state_dict for MixModel:
	Unexpected key(s) in state_dict: "id_embedding.weight", "cnn.weight", "cnn.bias", "att.in_proj_weight", "att.in_proj_bias", "att.out_proj.weight", "att.out_proj.bias", "FinalOutPut.0.weight", "FinalOutPut.0.bias", "FinalOutPut.0.running_mean", "FinalOutPut.0.running_var", "FinalOutPut.0.num_batches_tracked", "FinalOutPut.3.weight", "FinalOutPut.3.bias", "MLP.9.weight", "MLP.9.bias", "MLP.9.running_mean", "MLP.9.running_var", "MLP.9.num_batches_tracked", "MLP.12.weight", "MLP.12.bias". 
	size mismatch for MLP.0.weight: copying a param with shape torch.Size([512, 19968]) from checkpoint, the shape in current model is torch.Size([64, 24]).
	size mismatch for MLP.0.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for MLP.4.weight: copying a param with shape torch.Size([256, 512]) from checkpoint, the shape in current model is torch.Size([8, 64]).
	size mismatch for MLP.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.5.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([8]).
	size mismatch for MLP.8.weight: copying a param with shape torch.Size([128, 256]) from checkpoint, the shape in current model is torch.Size([1, 8]).
	size mismatch for MLP.8.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([1]).