In [54]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import LightGCN
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger
from recbole.utils import get_model, get_trainer
    
# size mismatch for user_embedding.weight: copying a param with shape torch.Size([494, 64]) from checkpoint, the shape in current model is torch.Size([587, 64]).
# size mismatch for item_embedding.weight: copying a param with shape torch.Size([1609, 64]) from checkpoint, the shape in current model is torch.Size([1622, 64]).

# 처음에 임베딩 할때의 유저와 아이템의 차원개수가 동일해야한다.
# -> 동일한 개수로 자르게 된다면? 하지만, 아이템의 개수가 다를것같아, 재학습이 안될것같다.

In [55]:
# configurations initialization
config = Config(model='LightGCN', dataset='Custom_ml-100k_2')

In [56]:
# init random seed
init_seed(config['seed'], config['reproducibility'])

In [57]:
# logger initialization
init_logger(config)
logger = getLogger()

In [58]:
# write config info into log
logger.info(config)

06 Feb 07:55    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2023
state = INFO
reproducibility = True
data_path = /home/RecBole/dataset/Custom_ml-100k_2
checkpoint_dir = /home/RecBole/saved/
show_progress = True
save_dataset = True
dataset_save_path = saved_dataset
save_dataloaders = True
dataloaders_save_path = saved_dataloaders
log_wandb = True

Training Hyper Parameters:
epochs = 100
train_batch_size = 4048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 1000
clip_grad_norm = None
weight_decay = 0.0001
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'group_by': 'user', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'MAP', 'Precision', 'GAUC', 'ItemCoverage', 'AveragePopularity', 'GiniIndex', 'ShannonEntropy', 'TailPercentage']
t

In [59]:
# dataset creating and filtering
dataset = create_dataset(config)
logger.info(dataset)

06 Feb 07:55    INFO  Saving filtered dataset into [/home/RecBole/saved/Custom_ml-100k_2-dataset.pth]
06 Feb 07:55    INFO  Custom_ml-100k_2
The number of users: 587
Average actions of users: 85.32423208191126
The number of items: 1622
Average actions of items: 30.845157310302284
The number of inters: 50000
The sparsity of the dataset: 94.74852801240188%
Remain Fields: ['user_id', 'item_id', 'rating', 'timestamp']


In [81]:
dataset[1]

The batch_size of interaction: 1
    user_id, torch.Size([]), cpu, torch.int64
    item_id, torch.Size([]), cpu, torch.int64
    rating, torch.Size([]), cpu, torch.float32
    timestamp, torch.Size([]), cpu, torch.float32


In [61]:
# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)

06 Feb 07:55    INFO  Saving split dataloaders into: [/home/RecBole/saved/Custom_ml-100k_2-for-LightGCN-dataloader.pth]
06 Feb 07:55    INFO  [Training]: train_batch_size = [4048] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
06 Feb 07:55    INFO  [Evaluation]: eval_batch_size = [4048] eval_args: [{'split': {'RS': [0.8, 0.1, 0.1]}, 'group_by': 'user', 'order': 'RO', 'mode': 'full'}]


In [79]:
# model loading and initialization
model = LightGCN(config, train_data.dataset).to(config['device'])
logger.info(model)

06 Feb 08:05    INFO  LightGCN(
  (user_embedding): Embedding(587, 64)
  (item_embedding): Embedding(1622, 64)
  (mf_loss): BPRLoss()
  (reg_loss): EmbLoss()
)
Trainable parameters: 141376


In [80]:
model

LightGCN(
  (user_embedding): Embedding(587, 64)
  (item_embedding): Embedding(1622, 64)
  (mf_loss): BPRLoss()
  (reg_loss): EmbLoss()
)

In [34]:
# trainer loading and initialization
trainer = Trainer(config, model)

03 Feb 17:37    ERROR  Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: d9249. Use `wandb login --relogin` to force relogin




In [64]:
# trainer loading and initialization
trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)



In [65]:
# resume from break point
checkpoint_file = '/home/RecBole/saved/LightGCN-Feb-03-2023_16-02-57.pth'

In [67]:
model.load_state_dict(torch.load('/home/RecBole/saved/LightGCN-Feb-04-2023_11-42-40.pth'))

RuntimeError: Error(s) in loading state_dict for LightGCN:
	Missing key(s) in state_dict: "user_embedding.weight", "item_embedding.weight". 
	Unexpected key(s) in state_dict: "config", "epoch", "cur_step", "best_valid_score", "state_dict", "other_parameter", "optimizer". 

In [68]:
import torch

model = torch.load('/home/RecBole/saved/LightGCN-Feb-04-2023_11-42-40.pth')

In [77]:
dataset.get_preload_weight(model['state_dict']['user_embedding.weight'])

ValueError: Field [tensor([[-2.5589e-20,  3.7290e-23,  2.9994e-17,  ...,  8.9380e-18,
          6.0686e-22, -9.5904e-20],
        [-1.6532e-03, -2.3781e-02,  7.2779e-02,  ..., -2.4039e-02,
         -4.0891e-02,  1.2984e-02],
        [ 1.9119e-01, -6.3899e-02, -1.1617e-01,  ...,  2.1161e-01,
         -2.3116e-02, -1.4247e-01],
        ...,
        [ 6.2891e-02,  8.9123e-02, -1.0889e-01,  ..., -1.3002e-01,
          2.7237e-02, -2.0415e-02],
        [-2.6580e-02, -4.0312e-02, -1.0649e-02,  ..., -5.7399e-02,
         -3.9977e-02,  6.9652e-02],
        [-3.2392e-02,  6.7972e-02,  8.0179e-02,  ...,  6.2905e-02,
         -3.5536e-02,  1.9342e-02]], device='cuda:0')] not in preload_weight

In [69]:
len(model['state_dict']['user_embedding.weight'])

494

In [70]:
model['state_dict']['user_embedding.weight']

tensor([[-2.5589e-20,  3.7290e-23,  2.9994e-17,  ...,  8.9380e-18,
          6.0686e-22, -9.5904e-20],
        [-1.6532e-03, -2.3781e-02,  7.2779e-02,  ..., -2.4039e-02,
         -4.0891e-02,  1.2984e-02],
        [ 1.9119e-01, -6.3899e-02, -1.1617e-01,  ...,  2.1161e-01,
         -2.3116e-02, -1.4247e-01],
        ...,
        [ 6.2891e-02,  8.9123e-02, -1.0889e-01,  ..., -1.3002e-01,
          2.7237e-02, -2.0415e-02],
        [-2.6580e-02, -4.0312e-02, -1.0649e-02,  ..., -5.7399e-02,
         -3.9977e-02,  6.9652e-02],
        [-3.2392e-02,  6.7972e-02,  8.0179e-02,  ...,  6.2905e-02,
         -3.5536e-02,  1.9342e-02]], device='cuda:0')

In [71]:
model['state_dict']['user_embedding.weight'].size()

torch.Size([494, 64])

In [72]:
model['state_dict']['user_embedding.weight'].replus(123, 1).size()

AttributeError: 'Tensor' object has no attribute 'replus'

In [73]:
trainer.resume_checkpoint(checkpoint_file)

FileNotFoundError: [Errno 2] No such file or directory: '/home/RecBole/saved/LightGCN-Feb-03-2023_16-02-57.pth'

In [None]:
# model training
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

In [None]:
# model evaluation
test_result = trainer.evaluate(test_data)
print(test_result)