# 2021 Wechat Big Data Challenges

In [34]:
import pdb
import time

import pickle
import numpy as np
import pandas as pd
from collections import defaultdict

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from tqdm import tqdm

from sklearn.metrics import roc_auc_score

### DataLoader

In [13]:
class TrainTensorData(torch.utils.data.Dataset):
    
    def __init__(self, inputs_feat, inputs_label, use_label):    
        self.inputs_feat = inputs_feat
        self.inputs_label = inputs_label
        self.use_label = use_label
        
    def __getitem__(self, index):
        
        userid = self.inputs_feat['userid'][index] 
        feedid = self.inputs_feat['feedid'][index]
        device = self.inputs_feat['device'][index]
        
        # feed
        authorid = self.inputs_feat['authorid'][feedid]
        bgm_song_id = self.inputs_feat['bgm_song_id'][feedid]
        bgm_singer_id = self.inputs_feat['bgm_singer_id'][feedid]
        feed_embeddings = self.inputs_feat['feed_embeddings'][feedid]
        tags = self.inputs_feat['tags'][feedid]
        keywords = self.inputs_feat['keywords'][feedid]
        videoplayseconds_discrete = self.inputs_feat['videoplayseconds_discrete'][feedid]
        
        # user
        # feed_history = self.inputs_feat['feed_history'][userid]
        label = self.inputs_label[self.use_label][index]
        
        sparseX = {
            'userid' : userid,
            'feedid' : feedid,
            'authorid': authorid,
            'bgm_song_id': bgm_song_id,
            'bgm_singer_id': bgm_singer_id,
            'device': device,
            'videoplayseconds_discrete' : videoplayseconds_discrete,
        }
        
        varlenX = {
            'tags': tags,
            'keywords': keywords,
        }
        
        denseX = {
            'feed_embeddings': feed_embeddings,
            # 'feed_history': feed_history,
        }
        
        return sparseX, varlenX, denseX, label
    
    def __len__(self):
        return self.inputs_feat['userid'].shape[0]

In [14]:
class TestTensorData(torch.utils.data.Dataset):
    
    def __init__(self, inputs_feat):    
        self.inputs_feat = inputs_feat
        
    def __getitem__(self, index):
        
        userid = self.inputs_feat['userid'][index] 
        feedid = self.inputs_feat['feedid'][index]
        device = self.inputs_feat['device'][index]
        
        # feed
        authorid = self.inputs_feat['authorid'][feedid]
        bgm_song_id = self.inputs_feat['bgm_song_id'][feedid]
        bgm_singer_id = self.inputs_feat['bgm_singer_id'][feedid]
        feed_embeddings = self.inputs_feat['feed_embeddings'][feedid]
        tags = self.inputs_feat['tags'][feedid]
        keywords = self.inputs_feat['keywords'][feedid]
        videoplayseconds_discrete = self.inputs_feat['videoplayseconds_discrete'][feedid]
        
        # user
        # feed_history = self.inputs_feat['feed_history'][userid]
        
        sparseX = {
            'userid' : userid,
            'feedid' : feedid,
            'authorid': authorid,
            'bgm_song_id': bgm_song_id,
            'bgm_singer_id': bgm_singer_id,
            'device': device,
            'videoplayseconds_discrete' : videoplayseconds_discrete,
        }
        
        varlenX = {
            'tags': tags,
            'keywords': keywords,
        }
        
        denseX = {
            'feed_embeddings': feed_embeddings,
            # 'feed_history': feed_history,
        }
        
        return sparseX, varlenX, denseX
    
    def __len__(self):
        return self.inputs_feat['userid'].shape[0]

### Feature Process

In [15]:
# feed侧特征以 0 作为 mask value
def padding(origin):
    if len(origin.shape) == 1:
        return np.insert(origin, 0, 0, axis=0)

    elif len(origin.shape) == 2:
        pad = np.zeros(shape=(1, origin.shape[1]), dtype=origin.dtype)
        return np.concatenate([pad, origin], axis=0)

In [16]:
# videoplayseconds 离散化
def videoplayseconds_discrete_process(df):
    video_src = df['videoplayseconds'].unique()
    video_des = {'des': np.arange(1, video_src.shape[0] + 1)}
    video_map = pd.DataFrame(index=video_src, data=video_des)
    videoplayseconds_discrete = video_map.loc[(df['videoplayseconds']).tolist()].values.squeeze()
    videoplayseconds_discrete_voc = videoplayseconds_discrete.max()

    return videoplayseconds_discrete, videoplayseconds_discrete_voc

In [17]:
# videoplayseconds log+归一化
def process_videoplayseconds(df):
    videoplay = np.log(1 + df['videoplayseconds'])
    videoplay = (videoplay - videoplay.min()) / (videoplay.max() - videoplay.min())

    return videoplay

In [18]:
def process_varlensparse(df, col_name):
    df[col_name].fillna('-1', inplace=True)
    feats = [list(map(eval, d.strip(' ').split(';'))) for d in df[col_name]]

    # 全体向右移动 1个单位
    for i in range(len(feats)):
        for j in range(len(feats[i])):
            feats[i][j] += 1

    # 统计出现的最大id
    feats_max_id = 0
    for feat in feats:
        for f in feat:
            feats_max_id = max(f, feats_max_id)
    
    # 统计出现的最大长度
    feats_max_len = 0
    for feat in feats:
        feats_max_len = max(len(feat), feats_max_len)

    # 按最大长度填充 0
    for i in range(len(feats)):
        feats[i] = feats[i] + [0] * (feats_max_len - len(feats[i]))

    feats = np.array(feats, dtype=np.int32)
    feats = padding(feats)

    return feats, feats_max_len, feats_max_id

In [19]:
# Data Split
def split_data_to_train_validation(df):
    train_df = df[df['date_'] < 14].reset_index(drop=True)
    valid_df = df[df['date_'] == 14].reset_index(drop=True)

    return train_df, valid_df

### DeepFM

In [20]:
class EmbeddingLayer(nn.Module):

    def __init__(self, SparseFeatInfoList, VarLenSparseFeatInfoList, init_std=0.00001, device='cpu'):

        super(EmbeddingLayer, self).__init__()

        self.device = device
        self.init_std = init_std
        self.emb_dict = self.create_embedding_matrix(SparseFeatInfoList, VarLenSparseFeatInfoList)

    def create_embedding_matrix(self, SparseFeatInfoList, VarLenSparseFeatInfoList):
        sparse_emb_dict = {v.name: nn.Embedding(v.vocabulary_size, v.emb_dim) for v in SparseFeatInfoList}
        varlen_emb_dict = {v.name: nn.Embedding(v.vocabulary_size, v.emb_dim) for v in VarLenSparseFeatInfoList}

        sparse_emb_dict.update(varlen_emb_dict)
        emb_dict = nn.ModuleDict(sparse_emb_dict)

        for tensor in emb_dict.values():
            nn.init.normal_(tensor.weight, mean=0, std=self.init_std)

        return emb_dict.to(self.device)

    def get_seq_len(self, varlenX):
        return {u: torch.sum(v != 0, dim=1, keepdim=True) for u, v in varlenX.items()}

    def load_pre_emb(self, VarLenSparseFeat_emb, pretrain_emb):

        emb = nn.Embedding(VarLenSparseFeat_emb.vocabulary_size, VarLenSparseFeat_emb.emb_dim)
        emb = emb.from_pretrained(pretrain_emb)
        emb.weight.requires_grad = False  # note that
        self.emb_dict.update(nn.ModuleDict({VarLenSparseFeat_emb.name: emb}))

    def forward(self, sparseX, varlenX):

        sparseX_emb = {u: self.emb_dict[u](v) for u, v in sparseX.items()}
        varlenX_emb = {u: self.emb_dict[u](v) for u, v in varlenX.items()}

        return sparseX_emb, varlenX_emb

In [21]:
class SequencePoolingLayer(nn.Module):

    def __init__(self, method='mean', device='cpu'):

        super(SequencePoolingLayer, self).__init__()

        self.method = method
        self.device = device
        self.eps = torch.FloatTensor([1e-8]).to(device)
        self.to(device)

    def forward(self, varlen_list, seqlen_list):

        all_hist = []
        for varlen_x, seqlen_x in zip(varlen_list, seqlen_list):

            max_len, emb_dim = varlen_x.shape[1], varlen_x.shape[2]
            mask = self._sequence_mask(seqlen_x, max_len, emb_dim)

            if self.method == 'max':
                hist = varlen_x - (1 - mask) * self.eps
                hist = torch.max(hist, dim=1)[0]

                return hist

            hist = mask * varlen_x
            hist = torch.sum(hist, dim=1, keepdim=False)

            if self.method == 'mean':
                hist = torch.div(hist, seqlen_x.type(torch.float32) + self.eps)
            all_hist.append(hist)

        return all_hist

    def _sequence_mask(self, seqlen_x, max_len, emb_dim):

        rowvec = torch.arange(0, max_len).to(seqlen_x.device)
        mask = (seqlen_x > rowvec)  # (B, max_len)
        mask = mask.unsqueeze(dim=2)  # (B, max_len, 1)
        mask = torch.repeat_interleave(mask, emb_dim, dim=2)  # (B, max_len, E)
        return mask

In [22]:
class FM(nn.Module):

    def __init__(self):
        super(FM, self).__init__()

    def forward(self, inputs):
        fm_input = inputs

        square_of_sum = torch.pow(torch.sum(fm_input, dim=1, keepdim=True), 2)
        sum_of_square = torch.sum(fm_input * fm_input, dim=1, keepdim=True)
        cross_term = square_of_sum - sum_of_square
        cross_term = 0.5 * torch.sum(cross_term, dim=2, keepdim=False)

        return cross_term

In [23]:
class DeepFM(nn.Module):

    def __init__(self, SparseFeatInfoList, VarLenSparseFeatInfoList, DenseFeatInfoList, device='cpu'):
        super(DeepFM, self).__init__()

        self.SparseFeatInfoList = SparseFeatInfoList
        self.VarLenSparseFeatInfoList = VarLenSparseFeatInfoList
        self.DenseFeatInfoList = DenseFeatInfoList

        self.sparse_dim, self.varlen_dim, self.dense_dim = self._return_dim()

        # embedding layer
        self.embedding_layer = EmbeddingLayer(SparseFeatInfoList, VarLenSparseFeatInfoList, device=device)
        self.sequence_pooling_layer = SequencePoolingLayer(device=device)

        # Linear Model
        linear_input_dim = self.sparse_dim
        self.linear_model = nn.Sequential(
            nn.Linear(linear_input_dim, 1))

        # FM
        self.fm = FM()

        # DNN for Sparse and Varlen
        dnn_input_dim1 = self.sparse_dim + self.varlen_dim
        self.dnn1 = nn.Sequential(
            nn.Linear(dnn_input_dim1, 512),
            nn.ReLU(),
        )

        # DNN for Dense
        dnn_input_dim2 = self.dense_dim
        self.dnn2 = nn.Sequential(
            nn.Linear(dnn_input_dim2, 256),
            nn.ReLU(),
        )
        
        # DNN
        dnn_input_dim3 = 256 + 512
        self.dnn3 = nn.Sequential(
            nn.Linear(dnn_input_dim3, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
        )

        for m in self.modules():
            if isinstance(m, (nn.Linear, nn.Conv2d)):
                nn.init.xavier_uniform_(m.weight)

        self.to(device)

    def _return_dim(self):

        sparse_dim, varlen_dim, dense_dim = 0, 0, 0
        for v in self.SparseFeatInfoList:
            sparse_dim += v.emb_dim

        for v in self.VarLenSparseFeatInfoList:
            varlen_dim += v.emb_dim

        for v in self.DenseFeatInfoList:
            dense_dim += v.emb_dim

        return sparse_dim, varlen_dim, dense_dim

    def _dict2list(self, feats):

        all_feats = []

        for feat in feats.values():
            if len(feat.shape) == 1:
                feat = feat.unsqueeze(dim=1)
            all_feats += [feat]

        return all_feats

    def forward(self, sparseX, varlenX, denseX):

        # pdb.set_trace()
        sparseX_emb, varlenX_emb = self.embedding_layer(sparseX, varlenX)
        seq_length = self.embedding_layer.get_seq_len(varlenX)

        # 单独找出来 userid
        sparseX_userid_emb = sparseX_emb.pop('userid')
        sparseX_userid_list = list(torch.chunk(sparseX_userid_emb, 4, dim=1))
        
        sparseX_list = self._dict2list(sparseX_emb)
        varlenX_list = self._dict2list(varlenX_emb)
        denseX_list = self._dict2list(denseX)
        seq_length_list = self._dict2list(seq_length)
        
        # sparseX_list = [x.squeeze() for x in sparseX_list]
        hist_list = self.sequence_pooling_layer(varlenX_list, seq_length_list)
        sparseX_input = torch.stack(sparseX_list + sparseX_userid_list, dim=1)
        hist_input = torch.stack(hist_list, dim=1)

        linear_input = torch.cat(sparseX_list + [sparseX_userid_emb], dim=1)
        fm_input = torch.cat([sparseX_input], dim=1)
        nn1_input = torch.cat(sparseX_list + hist_list + [sparseX_userid_emb], dim=1)
        nn2_input = torch.cat(denseX_list, dim=1)

        # note that
        logit = self.linear_model(linear_input)
        logit += self.fm(fm_input)

        nn1_output = self.dnn1(nn1_input)
        nn2_output = self.dnn2(nn2_input)

        nn3_input = torch.cat([nn1_output, nn2_output], dim=1)
        logit += self.dnn3(nn3_input)

        return logit

    def loss_func(self, logit, label, pos_weight):

        logit = logit.squeeze()
        loss = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight))

        return loss(logit, label)

### Helper

In [24]:
class SparseFeatInfo(object):

    def __init__(self, name, vocabulary_size, emb_dim):
        self.name = name
        self.vocabulary_size = vocabulary_size
        self.emb_dim = emb_dim


class DenseFeatInfo(object):

    def __init__(self, name, emb_dim):
        self.name = name
        self.emb_dim = emb_dim


class VarLenSparseFeatInfo(object):

    def __init__(self, name, max_len, vocabulary_size, emb_dim):
        self.name = name
        self.vocabulary_size = vocabulary_size
        self.emb_dim = emb_dim
        self.max_len = max_len

In [25]:
def early_stopping(evaluates, inval=2):
    if len(evaluates) <= inval:
        return False
    else:
        for i in range(len(evaluates) - inval - 1, len(evaluates) - 1):
            if evaluates[i] < evaluates[i + 1]:
                return False
        return True

### Metric

In [26]:
"""
# 评估函数加速版
def filter_userid(df, label_name):
    def func(x):

        mask = (x[label_name] == 1).mean()
        if mask == 0.0 or mask == 1.0:
            return False
        else:
            return True

    new_df = df.groupby('userid')[['userid'] + [label_name]]
    filter_u = new_df.apply(func)

    eliminate_u = filter_u[filter_u == False]

    return eliminate_u.index



def uAUC(preds, labels, eliminate_array, userid_array):

    isin_mask = ~np.isin(userid_array, eliminate_array)

    userid_array = userid_array[isin_mask]
    preds = preds[isin_mask]
    labels = labels[isin_mask]

    # 算法
    res = np.column_stack((userid_array, preds, labels))
    res = res[res[:, 0].argsort()] # 按 userid 排序 

    u_res_list = np.split(res[:,1:3], np.unique(res[:, 0], return_index=True)[1][1:]) # 类似于 groupby

    all_auc = 0.0
    for u_res in u_res_list:
        pre, lab = u_res[:, 0], u_res[:, 1]
        all_auc += roc_auc_score(lab, pre)

    return all_auc / len(u_res_list)
"""


def uAUC(labels, preds, user_id_list):
    user_pred = defaultdict(lambda: [])
    user_truth = defaultdict(lambda: [])

    for idx, truth in enumerate(labels):
        user_id = user_id_list[idx]
        pred = preds[idx]
        truth = labels[idx]
        user_pred[user_id].append(pred)
        user_truth[user_id].append(truth)

    user_flag = defaultdict(lambda: False)
    for user_id in set(user_id_list):
        truths = user_truth[user_id]
        flag = False

        # 若全是正样本或全是负样本，则flag为False
        for i in range(len(truths) - 1):
            if truths[i] != truths[i + 1]:
                flag = True
                break

        user_flag[user_id] = flag

    total_auc = 0.0
    size = 0.0
    for user_id in user_flag:
        if user_flag[user_id]:
            auc = roc_auc_score(np.asarray(user_truth[user_id]), np.asarray(user_pred[user_id]))
            total_auc += auc
            size += 1.0

    user_auc = float(total_auc) / size
    return user_auc

### Session

In [27]:
class Session(object):
    
    
    def __init__(self, model, device='cpu'):
        
        self.model = model
        self.device = device
        
    def to_device(self, x):
        
        if isinstance(x, dict):
            for key in x.keys():
                x[key] = x[key].to(self.device)
        else:
            x = x.to(self.device)
        return x
        
    def train(self, loader, optimizer, pos_weight):
        
        all_loss = 0.0
        self.model.train()
        
        with tqdm(loader) as tqdm_loader:
            for d in tqdm_loader:
                #pdb.set_trace()
                sparseX = self.to_device(d[0])
                varlenX = self.to_device(d[1])
                denseX = self.to_device(d[2])
                label = self.to_device(d[3])

                logit = self.model(sparseX, varlenX, denseX)
                loss = self.model.loss_func(logit, label, pos_weight)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                all_loss += loss.item()
        
        return all_loss / len(loader)
    
    
    def valid(self, loader):
        
        self.model.eval()
        
        all_predict, all_label, all_userid = [], [], []
        
        with torch.no_grad():
            for d in loader:
                
                sparseX = self.to_device(d[0])
                varlenX = self.to_device(d[1])
                denseX = self.to_device(d[2])
                label = self.to_device(d[3])
                
                userid_array = sparseX['userid'].detach().cpu().numpy()
                label = label.detach().cpu().numpy()
                
                logit = self.model(sparseX, varlenX, denseX).squeeze()
                logit = torch.sigmoid(logit).detach().cpu().numpy()
                
                all_userid.append(userid_array)
                all_predict.append(logit)
                all_label.append(label)
                
        #uauc = uAUC(np.concatenate(all_predict, axis=0),
        #            np.concatenate(all_label, axis=0),
        #            eliminate_u,
        #            np.concatenate(all_userid, axis=0))
        
        uauc = uAUC(np.concatenate(all_label, axis=0),
                    np.concatenate(all_predict, axis=0),
                    np.concatenate(all_userid, axis=0))
                
        return uauc
    
    
    def predict(self, loader):
        
        self.model.eval()
        
        all_pred = []
        with torch.no_grad():
            for d in loader:
                sparseX = self.to_device(d[0])
                varlenX = self.to_device(d[1])
                denseX = self.to_device(d[2])
                
                logit = self.model(sparseX, varlenX, denseX).squeeze()
                logit = torch.sigmoid(logit).detach().cpu().numpy()
                
                all_pred.append(logit)
                
        return np.concatenate(all_pred, axis=0)

### Main

In [None]:
if __name__ == '__main__':
    
    # Args
    root_path = '/zhaochen/wechat_comp'
    device = 'cuda:0'
    
    validate = True
    lr = 0.01
    l2_decay = 1e-5
    num_workers = 8
    
    num_epochs = [10, 10, 10, 10, 10, 10, 10]
    batch_size = [1024 * 16, 1024 * 10, 1024 * 10, 1024 * 10, 1024 * 10, 1024 * 10, 1024 * 10]
    pos_weight = [3.0000, 2.0000, 5.0000, 5.0000, 7.0000, 8.0000, 7.0000]
    
    # load basic
    user_action = pd.read_csv(root_path + '/data/preprocess/user_action.csv')
    feed_info = pd.read_csv(root_path + '/data/preprocess/feed_info.csv')
    test_a = pd.read_csv(root_path + '/data/preprocess/test_a.csv')
    
    # statistical information
    device_voc = max(test_a['device'].unique().max(), user_action['device'].unique().max())
    with open(root_path+'/data/preprocess/statistical.pkl', 'rb') as f:
        userid_voc, feedid_voc, authorid_voc, bgm_song_id_voc, bgm_singer_id_voc = pickle.load(f)
    
    # feature engineer
    # feed_history = np.load(root_path + 'feed_history_w2v_13_mean.npy')
    feed_embeddings = np.load(root_path+'/data/process/feed_embeddings_512.npy')
    
    USE_ACT_FEAT = [

        # basic feat
        'userid',        
        'feedid',       
        'device',
        'date_',

        # label
        'read_comment',
        'comment',
        'like',
        'click_avatar',
        'forward',
        'follow',
        'favorite']
    
    USE_FEED_FEAT = [
        'feedid',
        'authorid',
        'bgm_song_id',
        'bgm_singer_id',
        'manual_keyword_list',
        'manual_tag_list',
        'videoplayseconds', 
    ]
    
    user_action = user_action[USE_ACT_FEAT]
    feed_info = feed_info[USE_FEED_FEAT]
    
    feed_info = feed_info.sort_values(by='feedid')
    
    feed_info['videoplayseconds'] = process_videoplayseconds(feed_info)
    tags, tags_max_len, tags_voc = process_varlensparse(feed_info, 'manual_tag_list')
    keywords, keywords_max_len, keywords_voc = process_varlensparse(feed_info, 'manual_keyword_list')
    
    videoplayseconds_discrete, videoplayseconds_discrete_voc = videoplayseconds_discrete_process(feed_info)
    
    if validate:
        train_user_action, test_user_action = split_data_to_train_validation(user_action)  
    else:
        train_user_action = user_action
        test_a['date_'] = user_action['date_'].max() + 1
        test_user_action = test_a
        
    train_inputs_feat = {

        'userid': train_user_action['userid'].to_numpy(np.long),
        'feedid': train_user_action['feedid'].to_numpy(np.long),
        'device': train_user_action['device'].to_numpy(np.long),

        'authorid': padding(feed_info['authorid'].to_numpy(np.long)),
        'bgm_song_id': padding(feed_info['bgm_song_id'].to_numpy(np.long)),
        'bgm_singer_id': padding(feed_info['bgm_singer_id'].to_numpy(np.long)),

        'feed_embeddings': feed_embeddings.astype(np.float32),
        'tags': tags.astype(np.long),
        'keywords': keywords.astype(np.long),
        'videoplayseconds_discrete' : padding(videoplayseconds_discrete),

        # 'feed_history': feed_history,
    }

    train_inputs_label = {
        'read_comment' : train_user_action['read_comment'].to_numpy(np.float32),
        'like' : train_user_action['like'].to_numpy(np.float32),
        'click_avatar' : train_user_action['click_avatar'].to_numpy(np.float32),
        'forward' : train_user_action['forward'].to_numpy(np.float32),
        'follow' : train_user_action['follow'].to_numpy(np.float32),
        'favorite': train_user_action['favorite'].to_numpy(np.float32),
        'comment': train_user_action['comment'].to_numpy(np.float32),
    }
    
    test_inputs_feat = {

        'userid': test_user_action['userid'].to_numpy(np.long),
        'feedid': test_user_action['feedid'].to_numpy(np.long),
        'device': test_user_action['device'].to_numpy(np.long),
        'authorid': padding(feed_info['authorid'].to_numpy(np.long)),
        'bgm_song_id': padding(feed_info['bgm_song_id'].to_numpy(np.long)),
        'bgm_singer_id': padding(feed_info['bgm_singer_id'].to_numpy(np.long)),

        'feed_embeddings': feed_embeddings.astype(np.float32),
        'tags': tags.astype(np.long),
        'keywords': keywords.astype(np.long),
        'videoplayseconds_discrete' : padding(videoplayseconds_discrete),
        # 'feed_history': feed_history,
    }

    test_inputs_label = {
        'read_comment' : test_user_action['read_comment'].to_numpy(np.float32),
        'like' : test_user_action['like'].to_numpy(np.float32),
        'click_avatar' : test_user_action['click_avatar'].to_numpy(np.float32),
        'forward' : test_user_action['forward'].to_numpy(np.float32),
        'follow' : test_user_action['follow'].to_numpy(np.float32),
        'favorite': test_user_action['favorite'].to_numpy(np.float32),
        'comment': test_user_action['comment'].to_numpy(np.float32),
    }
    
    
    SparseFeatInfoList = [
        SparseFeatInfo('userid', userid_voc, 512),
        SparseFeatInfo('feedid', feedid_voc+1, 128),
        SparseFeatInfo('device', device_voc+1,  128),
        SparseFeatInfo('authorid', authorid_voc+1, 128),
        SparseFeatInfo('bgm_song_id', bgm_song_id_voc+1, 128),
        SparseFeatInfo('bgm_singer_id', bgm_singer_id_voc+1, 128),
        SparseFeatInfo('videoplayseconds_discrete', videoplayseconds_discrete_voc+1, 128),
    ]

    VarLenSparseFeatInfoList = [
        VarLenSparseFeatInfo('keywords', keywords_max_len, keywords_voc+1, 128),
        VarLenSparseFeatInfo('tags', tags_max_len, tags_voc+1, 128),
    ]

    DenseFeatInfoList = [
        DenseFeatInfo('feed_embeddings', 512),
        # DenseFeatInfo('feed_history', 128),
    ]

    labels = {
        'read_comment': 4,
        'like': 3,
        'click_avatar': 2,
        'forward': 1,
        'favorite': 1,
        'comment': 1,
        'follow': 1,
    }
    
    
    result = {}
    with open(root_path + '/log/result_' + time.strftime('%Y-%m-%d-%m-%s', time.localtime()) + '.txt', 'w+') as f:
        for i, (label_name, label_weight) in enumerate(labels.items()):
            train_tensordata = TrainTensorData(train_inputs_feat, train_inputs_label, label_name)
            train_loader = DataLoader(train_tensordata, batch_size=batch_size[i], shuffle=True, num_workers=num_workers)
            
            test_tensordata = TrainTensorData(test_inputs_feat, test_inputs_label, label_name)
            test_loader = DataLoader(test_tensordata, batch_size=1024*32, shuffle=False, num_workers=num_workers)
            
            deepfm = DeepFM(SparseFeatInfoList, VarLenSparseFeatInfoList, DenseFeatInfoList, device=device)
            optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, deepfm.parameters()), lr=lr, weight_decay=l2_decay)
            
            sess = Session(deepfm, device)
            
            best_epoch = 0
            best_uauc = 0
            uauc_list = []
            for j in range(num_epochs[i]):
                try:
                    loss = sess.train(train_loader, optimizer, pos_weight[i])
                    print("epoch: {:d}, loss:{:.6f}".format(j + 1, loss))
                    print("epoch: {:d}, loss:{:.6f}".format(j + 1, loss), file=f)
                    
                    uauc = sess.valid(test_loader)
                    print(label_name + " auc:{:.6f}".format(uauc))
                    print(label_name + " auc:{:.6f}".format(uauc), file=f)
                    
                    if best_uauc < uauc:
                        best_epoch = j + 1
                        best_uauc = uauc
                        
                    uauc_list.append(uauc)
                    # torch.save(deepfm.state_dict(), './parameters/valid_' + label_name + str(j+1) + '.pth')
                    
                    if early_stopping(uauc_list, 2):
                        print('earlystoping !')
                        print('earlystoping !', file=f)
                        break
                except KeyboardInterrupt:
                    break
                    
            result[label_name] = best_uauc
            print(label_name + " best uauc: ({:d}):{:.6f}".format(best_epoch, best_uauc))
            print(label_name + " best uauc: ({:d}):{:.6f}".format(best_epoch, best_uauc), file=f)
        
        total_uauc = 0.0
        all_weight = 0.0

        for u in result.keys():
            total_uauc += result[u] * labels[u]
            all_weight += labels[u] * 1.0

        print("the final result:{:.6f}".format(total_uauc / all_weight))
        print("the final result:{:.6f}".format(total_uauc / all_weight), file=f)