# A Practice on "Neural Collaborative Filtering"

# 0. Mount and load raw data

In [None]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


In [None]:
cd /gdrive/MyDrive/Lectures/2024/RecSys/ncf

/gdrive/MyDrive/Lectures/2024/RecSys/ncf


# 1. Import necessary modules and classes

In [None]:
# <!-- collapse=True -->
%matplotlib inline
from datetime import datetime
from functools import reduce
from os.path import exists
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
import gc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sys

import os
import time
import argparse
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter

# # Import User + Movie baseline model
# from cf_utils import DampedUserMovieBaselineModel

# Use custom matplotlib stylesheet
plt.style.use('seaborn-darkgrid')

  plt.style.use('seaborn-darkgrid')


# 2. Loading Main models

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import model
import config
import evaluate
import data_utils

In [None]:
config.main_path

'/gdrive/MyDrive/Lectures/2024/RecSys/ncf/NCF-Data/'

Basic Settings

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument("--lr",
	type=float,
	default=0.001,
	help="learning rate")
parser.add_argument("--dropout",
	type=float,
	default=0.0,
	help="dropout rate")
parser.add_argument("--batch_size",
	type=int,
	default=256,
	help="batch size for training")
parser.add_argument("--epochs",
	type=int,
	default=20,
	help="training epoches")
parser.add_argument("--top_k",
	type=int,
	default=10,
	help="compute metrics@top_k")
parser.add_argument("--factor_num",
	type=int,
	default=32,
	help="predictive factors numbers in the model")
parser.add_argument("--num_layers",
	type=int,
	default=3,
	help="number of layers in MLP model")
parser.add_argument("--num_ng",
	type=int,
	default=4,
	help="sample negative items for training")
parser.add_argument("--test_num_ng",
	type=int,
	default=99,
	help="sample part of negative items for testing")
parser.add_argument("--out",
	default=True,
	help="save model or not")
parser.add_argument("--gpu",
	type=str,
	default="0",
	help="gpu card ID")
args = parser.parse_args(args=[])

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
cudnn.benchmark = True

# main.py : 데이터셋 준비
###Loading datas

- **train_loader → shuffle ON**
    - NCFData 생성: 이 때 num_ng=4, 훈련모드 켬
    - 배치사이즈 256으로 설정 → 배치사이즈는 변경 가능
- **test_loader**
    - NCFData 생성: 훈련모드 끔
    - **배치사이즈 100로 설정 → 배치 사이즈 변경 불가**
    
    shuffle X → Test data는 평가가 목적이므로 shuffle X
    
    - 배치 사이즈를 100으로 고정하는 이유?
    - test data 생성 당시 한 라인에 총 100개의 item을 Load 했으므로 user 1명에 대해 정확하게 P : 1 / N : 99로 배치가 구성되기 때문!!
    - 배치 사이즈가 100이면 정확히 한번 라인 읽을 때 postivie 1, negative 99개의 data를 읽음

In [None]:
train_data, test_data, user_num ,item_num, train_mat = data_utils.load_all()

In [None]:
train_dataset = data_utils.NCFData( #훈련모드 ON
		train_data, item_num, train_mat, args.num_ng, True)
test_dataset = data_utils.NCFData( #훈련모드 OFF -> negative example은 의미 X(0으로 설정)
		test_data, item_num, train_mat, 0, False)
train_loader = data.DataLoader(train_dataset, #배치 사이즈 = 256
		batch_size=args.batch_size, shuffle=True, num_workers=2)
test_loader = data.DataLoader(test_dataset, #배치사이즈 = 100 test는 평가를 목적으로 하기 때문에 shuffle하지 X
		batch_size=args.test_num_ng+1, shuffle=False, num_workers=0)

#main.py : 모델 초기화
###Configurations for Model and Optimizer < main.py : 모델 초기화 >

- 'NeuMF-pre’
    - 사전 학습된 모델 로딩
    - GMF_model, MLP_model을 로딩

In [None]:
if config.model == 'NeuMF-pre':
	assert os.path.exists(config.GMF_model_path), 'lack of GMF model'
	assert os.path.exists(config.MLP_model_path), 'lack of MLP model'
	GMF_model = torch.load(config.GMF_model_path)
	MLP_model = torch.load(config.MLP_model_path)
else:
	GMF_model = None
	MLP_model = None

model = model.NCF(user_num, item_num, args.factor_num, args.num_layers,
						args.dropout, config.model, GMF_model, MLP_model)
model.cuda()
loss_function = nn.BCEWithLogitsLoss()

if config.model == 'NeuMF-pre':
	optimizer = optim.SGD(model.parameters(), lr=args.lr)
else:
	optimizer = optim.Adam(model.parameters(), lr=args.lr)

In [None]:
model

NCF(
  (embed_user_GMF): Embedding(6040, 32)
  (embed_item_GMF): Embedding(3706, 32)
  (embed_user_MLP): Embedding(6040, 128)
  (embed_item_MLP): Embedding(3706, 128)
  (MLP_layers): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=256, out_features=128, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=64, out_features=32, bias=True)
    (8): ReLU()
  )
  (predict_layer): Linear(in_features=32, out_features=1, bias=True)
)

##Training

In [None]:
count, best_hr = 0, 0 #best_hr : 최상의 Hit Ratio 를 기록하는 변수
#epoch 반복
for epoch in range(args.epochs):
	model.train() # Enable dropout (if have). -> 모델을 학습 모드로 설정
	start_time = time.time()
	train_loader.dataset.ng_sample() #np_sample 수는 4개
	# 매 Epoch 마다 positive 1개 + negative 4개 = 총 5개의 샘플이 훈련에 참여

	#미니 배치 반복
	for user, item, label in train_loader: #batch_size = 256
	#user,item,label 모두 256 size
		user = user.cuda()
		item = item.cuda()
		label = label.float().cuda() #label을 float로 변경해서 cuda로 보냄

		model.zero_grad() #gradient를 0으로 바꾸기
		prediction = model(user, item) #model을 통과시켜 예측값 내기
		loss = loss_function(prediction, label) # loss 계산
		loss.backward() # 그레이언트 계산
		optimizer.step() #optimizer에게 parameter 갱신 요청
		# writer.add_scalar('data/loss', loss.item(), count)
		count += 1

	#모델 평가 및 기록
	model.eval() #module을 evaluation 과정으로 바꿈 -> 평가과정으로 변경
	#테스트 데이터에 대해서 베스트 HR, NDCG를 추적
	HR, NDCG = evaluate.metrics(model, test_loader, args.top_k)

	elapsed_time = time.time() - start_time
	print("The time elapse of epoch {:03d}".format(epoch) + " is: " +
			time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
	print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))

	#최적의 모델 저장
	if HR > best_hr:
		best_hr, best_ndcg, best_epoch = HR, NDCG, epoch #best_hr이 갱신된 시점에서의 베스트 값을 저장해놓음
		if args.out:
			if not os.path.exists(config.model_path):
				os.mkdir(config.model_path)
			torch.save(model,  #모델을 save
				'{}{}.pth'.format(config.model_path, config.model))

print("End. Best epoch {:03d}: HR = {:.3f}, NDCG = {:.3f}".format(
									best_epoch, best_hr, best_ndcg))

  self.pid = os.fork()


The time elapse of epoch 000 is: 00: 01: 53
HR: 0.580	NDCG: 0.326
The time elapse of epoch 001 is: 00: 01: 50
HR: 0.628	NDCG: 0.363
The time elapse of epoch 002 is: 00: 01: 46
HR: 0.654	NDCG: 0.381
The time elapse of epoch 003 is: 00: 01: 43
HR: 0.669	NDCG: 0.395
The time elapse of epoch 004 is: 00: 01: 42
HR: 0.678	NDCG: 0.399
The time elapse of epoch 005 is: 00: 01: 41
HR: 0.680	NDCG: 0.407
The time elapse of epoch 006 is: 00: 01: 42
HR: 0.680	NDCG: 0.408
The time elapse of epoch 007 is: 00: 01: 40
HR: 0.687	NDCG: 0.413
The time elapse of epoch 008 is: 00: 01: 41
HR: 0.697	NDCG: 0.417
The time elapse of epoch 009 is: 00: 01: 40
HR: 0.696	NDCG: 0.419
The time elapse of epoch 010 is: 00: 01: 41
HR: 0.694	NDCG: 0.419
The time elapse of epoch 011 is: 00: 01: 40
HR: 0.697	NDCG: 0.422
The time elapse of epoch 012 is: 00: 01: 43
HR: 0.699	NDCG: 0.422
The time elapse of epoch 013 is: 00: 01: 39
HR: 0.697	NDCG: 0.423
The time elapse of epoch 014 is: 00: 01: 41
HR: 0.700	NDCG: 0.424
The time e