In [1]:
import torch
from torch import nn
from torch.nn import init
import torch.utils.data as data_utils
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
SEED = 2019
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [2]:
dataset = np.loadtxt("../ml-1m/ratings.dat",delimiter='::',dtype=int)[:,[0,1,3]]
N_USER = np.max(dataset[:,0])
N_ITEM = np.max(dataset[:,1])

In [3]:
def generate_train_from_local(path, n_user, n_item, n_neg=7):
    data = np.loadtxt(fname=path, delimiter="\t", skiprows=1, dtype=int)
    train_matrix = np.zeros((n_user, n_item), dtype = np.int8)
    for line in data:
        train_matrix[line[0],line[1]] = 1
    user_input, item_input, labels = [],[],[]  # x1 x2 -> y
    for uno, uitems in enumerate(train_matrix):
        positives = np.nonzero(uitems)[0]
        n_sample = len(positives) * n_neg
        negative_items = list(set(range(n_item))^set(positives))
        negatives = np.random.choice(negative_items, n_sample)  # 负采样 -- 不放回
        for i in range(len(positives)): # 正实例
            user_input.append(uno)
            item_input.append(positives[i])
            labels.append(1)
        for j in range(n_sample): # 负实例
            user_input.append(uno)
            item_input.append(negatives[j])
            labels.append(0)
    return np.array(user_input), np.array(item_input), np.array(labels), train_matrix

def generate_test_from_local(path, n_user, n_item):
    data = np.loadtxt(fname=path, delimiter="\t", skiprows=1, dtype=int)
    return data

In [4]:
class NCF(nn.Module):
    def __init__(self, gmf_n_factors, layers,  n_user, n_item, activation = torch.relu, batch_normalization = False, n_output = 1):
        super(NCF, self).__init__()
        self.activation = activation
        self.do_bn = batch_normalization
        self.fcs = []
        self.bns = []
        self.n_layer  = len(layers)
        parameter_LeCun = np.sqrt(gmf_n_factors + layers[-1])

        #self.bn_userInput = nn.BatchNorm1d(1)   # for input data
        #self.bn_itemInput = nn.BatchNorm1d(1)   # for input data
        
        self.mlp_user_embedding_layer = nn.Embedding(n_user, int(layers[0]/2))
        self._set_normalInit(self.mlp_user_embedding_layer, hasBias = False) 
        self.mlp_item_embedding_layer = nn.Embedding(n_item, int(layers[0]/2))
        self._set_normalInit(self.mlp_item_embedding_layer, hasBias = False) 
        
        self.gmf_user_embedding_layer = nn.Embedding(n_user, gmf_n_factors)
        self._set_normalInit(self.gmf_user_embedding_layer, hasBias = False) 
        self.gmf_item_embedding_layer = nn.Embedding(n_item, gmf_n_factors)
        self._set_normalInit(self.gmf_item_embedding_layer, hasBias = False) 
        
        for i in range(1, self.n_layer):               # build hidden layers and BN layers
            fc = nn.Linear(layers[i-1], layers[i])
            self._set_normalInit(fc)                  # parameters initialization
            setattr(self, 'fc%i' % i, fc)       # IMPORTANT set layer to the Module
            self.fcs.append(fc)
            if self.do_bn:
                bn = nn.BatchNorm1d(layers[i])
                setattr(self, 'bn%i' % i, bn)   # IMPORTANT set layer to the Module
                self.bns.append(bn)

        self.predict = nn.Linear(gmf_n_factors + layers[-1], n_output)         # output layer
        self._set_uniformInit(self.predict, parameter = parameter_LeCun)            # parameters initialization
        return

    def _set_normalInit(self, layer, parameter = [0.0, 0.01], hasBias=True):
        init.normal_(layer.weight, mean = parameter[0], std = parameter[1])
        if hasBias:
            init.normal_(layer.bias, mean = parameter[0], std = parameter[1])
        return
    
    def _set_uniformInit(self, layer, parameter = 5, hasBias = True):
        init.uniform_(layer.weight, a = - parameter, b = parameter)
        if hasBias:
            init.uniform_(layer.bias, a = - parameter, b = parameter)
        return
    
    def _set_heNormalInit(self, layer, hasBias=True):
        init.kaiming_normal_(layer.weight, nonlinearity='relu')
        if hasBias:
            init.kaiming_normal_(layer.bias, nonlinearity='relu')
        return
    
    def _set_heUniformInit(self, layer, hasBias=True):
        init.kaiming_uniform_(layer.weight, nonlinearity='relu')
        if hasBias:
            init.kaiming_uniform_(layer.bias, nonlinearity='relu')
        return

    def forward(self, x1, x2):
        #if self.do_bn: 
            #x1 = self.bn_userInput(x1)     # input batch normalization
            #x2 = self.bn_itemInput(x2)
        mlp_x1 = self.mlp_user_embedding_layer(x1)
        mlp_x2 = self.mlp_item_embedding_layer(x2)
        
        gmf_x1 = self.gmf_user_embedding_layer(x1)
        gmf_x2 = self.gmf_item_embedding_layer(x2)
        
        mlp_x3 = torch.cat((mlp_x1, mlp_x2), dim=1)
        mlp_x  = torch.flatten(mlp_x3, start_dim=1)        
        for i in range(1, self.n_layer):
            mlp_x = self.fcs[i-1](mlp_x)
            if self.do_bn: 
                mlp_x = self.bns[i-1](mlp_x)   # batch normalization
            mlp_x = self.activation(mlp_x)
        
        gmf_x3 = torch.mul(gmf_x1, gmf_x2)
        gmf_x  = torch.flatten(gmf_x3, start_dim=1)

        x = torch.cat((mlp_x, gmf_x), dim=1)
        out = torch.sigmoid(self.predict(x))
        return out

In [5]:
def getHitRatio(ranklist, gtItem):
    #HR击中率，如果topk中有正例ID即认为正确
    if gtItem in ranklist:
        return 1
    return 0

def getNDCG(ranklist, gtItem):
    #NDCG归一化折损累计增益
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return np.log(2) / np.log(i+2)
    return 0

In [6]:
def movieEval_1(model, loss_func, test, train_matrix, n_user, n_item, topK = 100):   
    item_list = np.array(range(n_item))
    item_list = torch.from_numpy(item_list.reshape(-1, 1)).type(torch.LongTensor)
    if torch.cuda.is_available():
        item_list = item_list.cuda()
    hit_list = list()
    undcg_list = list()
    rank_all_users = list()
    model.eval()
    with torch.no_grad(): 
        for line in test:
            user = line[0]
            pos_item = line[1]
            user_list = np.array([user for i in range(n_item)])
            user_list = torch.from_numpy(user_list.reshape(-1, 1)).type(torch.LongTensor)
            if torch.cuda.is_available():
                user_list = user_list.cuda()
            prediction = model(user_list, item_list)
            pred_vector = -1 * (prediction.cpu().data.numpy().reshape(-1))
            ranklist = np.argsort(pred_vector)
            real_r = list()
            i = 0
            while len(real_r) < topK:
                if train_matrix[user][ranklist[i]] == 0:
                    real_r.append(ranklist[i])
                i += 1     
            rank_all_users.append(real_r)
            hit_list.append(getHitRatio(real_r, pos_item))
            undcg_list.append(getNDCG(real_r, pos_item))
    model.train()
    hr = np.mean(hit_list)
    ndcg = np.mean(undcg_list)
    print('HR@', topK, ' = %.4f' %  hr)
    print('NDCG@', topK, ' = %.4f' % ndcg)
    return hr, ndcg, rank_all_users

In [7]:
def createLoader(train_user, train_item, train_label, batch_size):
    torch_x1 = torch.from_numpy(train_user.reshape(-1, 1)).type(torch.LongTensor)
    torch_x2 = torch.from_numpy(train_item.reshape(-1, 1)).type(torch.LongTensor)
    torch_y  = torch.from_numpy(train_label.reshape(-1, 1)).type(torch.FloatTensor)

    torch_dataset = data_utils.TensorDataset(torch_x1, torch_x2, torch_y)
    loader = data_utils.DataLoader(dataset = torch_dataset, batch_size = batch_size, shuffle = True, num_workers = 0)
    return loader

In [8]:
def createModel(n_factors, layers, lr, n_user, n_item):
    ncf = NCF(gmf_n_factors = n_factors, layers = layers, n_user = n_user, n_item = n_item, 
              activation = torch.relu, batch_normalization = False, n_output = 1)
    loss_func = torch.nn.BCELoss()
    if(torch.cuda.is_available()):
        ncf = ncf.cuda()
        loss_func = loss_func.cuda()
    optimizer = torch.optim.Adam(ncf.parameters(), lr = lr)
    print(ncf)
    return ncf, loss_func, optimizer

In [18]:
train_user, train_item, train_label, train_matrix = generate_train_from_local(path="../ml-1m/ml.train.txt",n_user=N_USER, n_item=N_ITEM)
test = generate_test_from_local(path="../ml-1m/ml.test.txt", n_user=N_USER, n_item=N_ITEM)

def train(train_user, train_item, train_label, test, train_matrix, epoch, batch_size, n_factors, layers, lr, topK, n_user, n_item):    
    loader = createLoader(train_user, train_item, train_label, batch_size)
    model, loss_func, optimizer = createModel(n_factors, layers, lr, n_user, n_item)
    train_loss_list = list()
    hr_list = [0.0]
    ndcg_list = [0.0]
    for e in range(epoch):
        train_loss = list()
        for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
            if torch.cuda.is_available():
                batch_x1, batch_x2, batch_y = batch_x1.cuda(), batch_x2.cuda(), batch_y.cuda()
            optimizer.zero_grad()
            prediction = model(batch_x1, batch_x2)
            loss = loss_func(prediction, batch_y) 
            loss.backward()        
            train_loss.append(loss.cpu().item())
            optimizer.step()
        print('------第'+str(e+1)+'个epoch------')
        mean_train_loss = np.mean(train_loss)
        print('train_loss', '= %.4f' % mean_train_loss)
        train_loss_list.append(mean_train_loss)  
    '''
        if (e+1)%5==0:
            hr, ndcg, rank_all_users = movieEval_1(model, loss_func, test, train_matrix, n_user=n_user, n_item=n_item, topK=topK)
            hr_list.append(hr)
            ndcg_list.append(ndcg)
    np.savetxt("./evalres/ncf/train_loss_list_"+str(epoch)+"epoch.txt", train_loss_list)    
    np.savetxt("./evalres/ncf/hr_list_"+str(epoch)+"epoch.txt", hr_list)
    np.savetxt("./evalres/ncf/ndcg_list_"+str(epoch)+"epoch.txt", ndcg_list) 
    '''
    movieEval_1(model, loss_func, test, train_matrix, n_user=n_user, n_item=n_item, topK=topK)
    torch.cuda.empty_cache()
    print('------Finished------')
    return model

# Hyper parameters
ACTIVATION = torch.relu
TOPK = 100
BATCH_SIZE = 256
LEARNING_RATE = 0.001
EPOCH = 200
LAYERS = [128, 64, 32, 16, 8]    # MLP  0层为输入层  0层/2为嵌入层  
GMF_N_FACTORS  = 64          # GMF隐层size  
#train(train_user, train_item, train_label, test, train_matrix, epoch=EPOCH, batch_size=BATCH_SIZE, n_factors=GMF_N_FACTORS, layers=LAYERS, lr=LEARNING_RATE, topK=TOPK, n_user = N_USER, n_item = N_ITEM)
model = train(train_user, train_item, train_label, test, train_matrix, epoch=6, batch_size=BATCH_SIZE, n_factors=GMF_N_FACTORS, layers=LAYERS, lr=LEARNING_RATE, topK=TOPK, n_user = N_USER, n_item = N_ITEM)

NCF(
  (mlp_user_embedding_layer): Embedding(6040, 64)
  (mlp_item_embedding_layer): Embedding(3952, 64)
  (gmf_user_embedding_layer): Embedding(6040, 64)
  (gmf_item_embedding_layer): Embedding(3952, 64)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=8, bias=True)
  (predict): Linear(in_features=72, out_features=1, bias=True)
)
------第1个epoch------
train_loss = 0.2277
------第2个epoch------
train_loss = 0.1837
------第3个epoch------
train_loss = 0.1687
------第4个epoch------
train_loss = 0.1602
------第5个epoch------
train_loss = 0.1542
------第6个epoch------
train_loss = 0.1495
HR@ 100  = 0.3709
NDCG@ 100  = 0.0949
------Finished------


In [19]:
torch.save(model, "./evalres/model/NCF.pkl")

In [10]:
train_user, train_item, train_label, train_matrix = generate_train_from_local(path="../ml-1m/ml.train.txt",n_user=N_USER, n_item=N_ITEM)
test = generate_test_from_local(path="../ml-1m/ml.test.txt", n_user=N_USER, n_item=N_ITEM)

def train_eval_d(train_user, train_item, train_label, test, train_matrix, epoch, batch_size, n_factors, layers, lr, topK, n_user, n_item):    
    loader = createLoader(train_user, train_item, train_label, batch_size)
    hr_list = list()
    ndcg_list = list()
    for i, d in enumerate(n_factors):
        model, loss_func, optimizer = createModel(d, layers[i], lr, n_user, n_item)
        model.train()
        for e in range(epoch):
            train_loss = list()
            for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
                if torch.cuda.is_available():
                    batch_x1, batch_x2, batch_y = batch_x1.cuda(), batch_x2.cuda(), batch_y.cuda()
                optimizer.zero_grad()
                prediction = model(batch_x1, batch_x2)
                loss = loss_func(prediction, batch_y) 
                loss.backward() 
                optimizer.step()
        hr, ndcg, rank_all_users = movieEval_1(model, loss_func, test, train_matrix, n_user=n_user, n_item=n_item, topK=topK)
        hr_list.append(hr)
        ndcg_list.append(ndcg)
    np.savetxt("./evalres/ncf/hr_list_d.txt", hr_list)
    np.savetxt("./evalres/ncf/ndcg_list_d.txt", ndcg_list) 
    torch.cuda.empty_cache()
    print('------Finished------')
    return

# Hyper parameters
ACTIVATION = torch.relu
TOPK = 100
BATCH_SIZE = 128
LEARNING_RATE = 0.001
EPOCH = 6
LAYERS = [[16,8,4,2],[32,16,8,4],[64,32,16,8],[128,64,32,16,8]]    # MLP  0层为输入层  0层/2为嵌入层  
GMF_N_FACTORS  = [8,16,32,64]          # GMF隐层size  
train_eval_d(train_user, train_item, train_label, test, train_matrix, epoch=EPOCH, batch_size=BATCH_SIZE, n_factors=GMF_N_FACTORS, layers=LAYERS, lr=LEARNING_RATE, topK=TOPK, n_user = N_USER, n_item = N_ITEM)

HR@ 100  = 0.3543
NDCG@ 100  = 0.0876
HR@ 100  = 0.3892
NDCG@ 100  = 0.0930
HR@ 100  = 0.3858
NDCG@ 100  = 0.0954
HR@ 100  = 0.3760
NDCG@ 100  = 0.0964
------Finished------


In [10]:
train_user, train_item, train_label, train_matrix = generate_train_from_local(path="../ml-1m/ml.train.txt",n_user=N_USER, n_item=N_ITEM)
test = generate_test_from_local(path="../ml-1m/ml.test.txt", n_user=N_USER, n_item=N_ITEM)

def train_eval_topK(train_user, train_item, train_label, test, train_matrix, epoch, batch_size, n_factors, layers, lr, topK, n_user, n_item):    
    loader = createLoader(train_user, train_item, train_label, batch_size)
    hr_list = list()
    ndcg_list = list()    
    model, loss_func, optimizer = createModel(n_factors, layers, lr, n_user, n_item)
    model.train()
    for e in range(epoch):
        for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
            if torch.cuda.is_available():
                batch_x1, batch_x2, batch_y = batch_x1.cuda(), batch_x2.cuda(), batch_y.cuda()
            optimizer.zero_grad()
            prediction = model(batch_x1, batch_x2)
            loss = loss_func(prediction, batch_y) 
            loss.backward() 
            optimizer.step()
    for k in topK:
        hr, ndcg, rank_all_users = movieEval_1(model, loss_func, test, train_matrix, n_user=n_user, n_item=n_item, topK=k)
        hr_list.append(hr)
        ndcg_list.append(ndcg)
    np.savetxt("./evalres/ncf/hr_list_topk.txt", hr_list)
    np.savetxt("./evalres/ncf/ndcg_list_topk.txt", ndcg_list) 
    torch.cuda.empty_cache()
    print('------Finished------')
    return

# Hyper parameters
ACTIVATION = torch.relu
TOPK = [50,100,200]
BATCH_SIZE = 128
LEARNING_RATE = 0.001
EPOCH = 6
LAYERS = [128, 64, 32, 16, 8]    # MLP  0层为输入层  0层/2为嵌入层  
GMF_N_FACTORS  = 64          # GMF隐层size  
train_eval_topK(train_user, train_item, train_label, test, train_matrix, epoch=EPOCH, batch_size=BATCH_SIZE, n_factors=GMF_N_FACTORS, layers=LAYERS, lr=LEARNING_RATE, topK=TOPK, n_user = N_USER, n_item = N_ITEM)

NCF(
  (mlp_user_embedding_layer): Embedding(6040, 64)
  (mlp_item_embedding_layer): Embedding(3952, 64)
  (gmf_user_embedding_layer): Embedding(6040, 64)
  (gmf_item_embedding_layer): Embedding(3952, 64)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=8, bias=True)
  (predict): Linear(in_features=72, out_features=1, bias=True)
)
HR@ 50  = 0.2402
NDCG@ 50  = 0.0728
NCF(
  (mlp_user_embedding_layer): Embedding(6040, 64)
  (mlp_item_embedding_layer): Embedding(3952, 64)
  (gmf_user_embedding_layer): Embedding(6040, 64)
  (gmf_item_embedding_layer): Embedding(3952, 64)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=8, bias=True)
  (predic

In [9]:
def train_eval_negNum(n_neg, epoch, batch_size, n_factors, layers, lr, topK, n_user, n_item):
    hr_list = list()
    ndcg_list = list()
    test = generate_test_from_local("../ml-1m/ml.test.txt", n_user, n_item)
    for n in n_neg:
        train_user,train_item,train_label,train_matrix=generate_train_from_local("../ml-1m/ml.train.txt", n_user, n_item, n_neg=n)
        loader = createLoader(train_user, train_item, train_label, batch_size)
        model, loss_func, optimizer = createModel(n_factors, layers, lr, n_user, n_item)
        model.train()
        for e in range(epoch):
            for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
                if torch.cuda.is_available():
                    batch_x1, batch_x2, batch_y = batch_x1.cuda(), batch_x2.cuda(), batch_y.cuda()
                optimizer.zero_grad()
                prediction = model(batch_x1, batch_x2)
                loss = loss_func(prediction, batch_y) 
                loss.backward() 
                optimizer.step()
        hr, ndcg, rank_all_users = movieEval_1(model, loss_func, test, train_matrix, n_user=n_user, n_item=n_item, topK=topK)
        hr_list.append(hr)
        ndcg_list.append(ndcg) 
        torch.cuda.empty_cache()
    np.savetxt("./evalres/ncf/hr_list_neg.txt", hr_list)
    np.savetxt("./evalres/ncf/ndcg_list_neg.txt", ndcg_list)
    print('------Finished------')
    return

ACTIVATION = torch.relu
TOPK = 100
BATCH_SIZE = 128
LEARNING_RATE = 0.001
EPOCH = 6
LAYERS = [128, 64, 32, 16, 8]    # MLP  0层为输入层  0层/2为嵌入层  
GMF_N_FACTORS  = 64          # GMF隐层size  
N_NEG = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
train_eval_negNum(n_neg=N_NEG, epoch=EPOCH, batch_size=BATCH_SIZE, n_factors=GMF_N_FACTORS, layers=LAYERS, lr=LEARNING_RATE, topK=TOPK, n_user = N_USER, n_item = N_ITEM)

NCF(
  (mlp_user_embedding_layer): Embedding(6040, 64)
  (mlp_item_embedding_layer): Embedding(3952, 64)
  (gmf_user_embedding_layer): Embedding(6040, 64)
  (gmf_item_embedding_layer): Embedding(3952, 64)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=8, bias=True)
  (predict): Linear(in_features=72, out_features=1, bias=True)
)
HR@ 100  = 0.2896
NDCG@ 100  = 0.0714
NCF(
  (mlp_user_embedding_layer): Embedding(6040, 64)
  (mlp_item_embedding_layer): Embedding(3952, 64)
  (gmf_user_embedding_layer): Embedding(6040, 64)
  (gmf_item_embedding_layer): Embedding(3952, 64)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=8, bias=True)
  (pred

In [10]:
import time
def train_eval_time(n_neg, epoch, batch_size, n_factors, layers, lr, topK, n_user, n_item):
    test = generate_test_from_local("../ml-1m/ml.test.txt", n_user, n_item)
    train_user,train_item,train_label,train_matrix=generate_train_from_local("../ml-1m/ml.train.txt", n_user, n_item, n_neg)
    loader = createLoader(train_user, train_item, train_label, batch_size)
    model, loss_func, optimizer = createModel(n_factors, layers, lr, n_user, n_item)
    model.train()
    time_start, time_end = 0, 0
    for e in range(1):
        time_start=time.time()
        for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
            if torch.cuda.is_available():
                batch_x1, batch_x2, batch_y = batch_x1.cuda(), batch_x2.cuda(), batch_y.cuda()
            optimizer.zero_grad()
            prediction = model(batch_x1, batch_x2)
            loss = loss_func(prediction, batch_y) 
            loss.backward() 
            optimizer.step()
        time_end=time.time()
    print('time cost:', time_end-time_start)
    torch.cuda.empty_cache()
    np.savetxt("./evalres/ncf/single_time.txt", [time_end-time_start]) 
    print('------Finished------')
    return

ACTIVATION = torch.relu
TOPK = 100
BATCH_SIZE = 128
LEARNING_RATE = 0.001
EPOCH = 6
LAYERS = [128, 64, 32, 16, 8]    # MLP  0层为输入层  0层/2为嵌入层  
GMF_N_FACTORS  = 64          # GMF隐层size  
N_NEG = 4
train_eval_time(n_neg=N_NEG, epoch=EPOCH, batch_size=BATCH_SIZE, n_factors=GMF_N_FACTORS, layers=LAYERS, lr=LEARNING_RATE, topK=TOPK, n_user = N_USER, n_item = N_ITEM)

NCF(
  (mlp_user_embedding_layer): Embedding(6040, 64)
  (mlp_item_embedding_layer): Embedding(3952, 64)
  (gmf_user_embedding_layer): Embedding(6040, 64)
  (gmf_item_embedding_layer): Embedding(3952, 64)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=8, bias=True)
  (predict): Linear(in_features=72, out_features=1, bias=True)
)
time cost: 201.6445610523224
------Finished------
