In [6]:
import numpy as np
import time
import pandas as pd
import torch
from FM_pytorch.fm import FactorizationMachineModel
from FM_pytorch.movielens import MovieLens1MDataset
from FM_pytorch.train import train,test,EarlyStopper
from torch.utils.data import DataLoader

# 获取数据集与模型

In [7]:
dataset=MovieLens1MDataset('./data/ml-1m/ratings.dat')
#field_dims = dataset.field_dims
#print(field_dims)
#offsets = np.array((0, *np.cumsum(field_dims)))   
model=FactorizationMachineModel(dataset.field_dims, embed_dim=16)

## 数据集拆分并用DataLoader加载

In [8]:
#按8:1:1比例拆分为训练集、验证集、测试集
train_length = int(len(dataset) * 0.8)
valid_length = int(len(dataset) * 0.1)
test_length = len(dataset) - train_length - valid_length
train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(
    dataset, (train_length, valid_length, test_length))

#利用DataLoader加载，每个batch_size=256
train_data_loader = DataLoader(train_dataset, batch_size=256, num_workers=0)
valid_data_loader = DataLoader(valid_dataset, batch_size=256, num_workers=0)
test_data_loader = DataLoader(test_dataset, batch_size=256, num_workers=0)

# GPU

In [9]:
def try_gpu(i=0):  #@save
    #如果存在,则返回gpu(i),否则返回cpu()
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

# 开始训练模型

In [11]:
device = try_gpu()   #torch.device('cpu') 
print(device)
model = model.to(device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001, weight_decay=0.000001)
#num_trials:表示尝试num_trials次后，如果没有提升就提前终止训练
#save_path：表示每次最优模型的存放路径
early_stopper = EarlyStopper(num_trials=2, save_path='result/model_001.pt')
#开始训练
time_start = time.time() #开始计时
for epoch_i in range(100):
    
    train(model, optimizer, train_data_loader, criterion, device)
    auc_train = test(model, train_data_loader, device)
    auc_valid = test(model, valid_data_loader, device)
    auc_test = test(model, test_data_loader, device)
    print('第{}个epoch结束：'.format(epoch_i))
    print('训练集AUC:{}'.format(auc_train))
    print('验证集AUC:{}'.format(auc_valid))
    print('测试集AUC:{}'.format(auc_test))
    
    if not early_stopper.is_continuable(model, auc_valid):
        print('验证集上AUC的最高值是:{}'.format(early_stopper.best_accuracy))
        break
time_end = time.time()    #结束计时
time_c= time_end - time_start   #运行所花时间
print('用时', time_c, 's')

cuda:0


100%|██████████| 3126/3126 [00:07<00:00, 404.65it/s, loss=0.441]
100%|██████████| 3126/3126 [00:04<00:00, 771.82it/s]
100%|██████████| 391/391 [00:00<00:00, 741.07it/s]
100%|██████████| 391/391 [00:00<00:00, 758.25it/s]


第0个epoch结束：
训练集AUC:0.8817858048783035
验证集AUC:0.8105748050675798
测试集AUC:0.8108016607333699


100%|██████████| 3126/3126 [00:07<00:00, 425.23it/s, loss=0.434]
100%|██████████| 3126/3126 [00:04<00:00, 747.52it/s]
100%|██████████| 391/391 [00:00<00:00, 751.05it/s]
100%|██████████| 391/391 [00:00<00:00, 732.75it/s]


第1个epoch结束：
训练集AUC:0.8863932196706997
验证集AUC:0.8096108919816281
测试集AUC:0.8097845281566891


100%|██████████| 3126/3126 [00:07<00:00, 421.40it/s, loss=0.428]
100%|██████████| 3126/3126 [00:04<00:00, 701.52it/s]
100%|██████████| 391/391 [00:00<00:00, 777.80it/s]
100%|██████████| 391/391 [00:00<00:00, 762.78it/s]

第2个epoch结束：
训练集AUC:0.8901298557079893
验证集AUC:0.808446683150216
测试集AUC:0.8085473064782682
验证集上AUC的最高值是:0.8105748050675798
用时 39.523096561431885 s



