In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from NeuralMF import NeuralMF
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import pytorch_lightning as pl
import optuna
from sklearn.preprocessing import StandardScaler
from optuna.integration import PyTorchLightningPruningCallback
import gc  # 가비지 컬렉션


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!pip install pytorch_lightning
!pip install optuna
!pip install optuna-integration[pytorch_lightning]

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.12.0-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.1.0->pytorch_lightning)
  Dow

In [4]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [2]:
movie_ratings = pd.read_csv('./dataset/ratings.csv')
movie_ratings_small = pd.read_csv('./dataset/ratings_small.csv')
movies_metadata = pd.read_csv('./dataset/movies_metadata.csv')


  movies_metadata = pd.read_csv('./dataset/movies_metadata.csv')


In [3]:
movie_ratings.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [4]:
movies_metadata.drop_duplicates(subset='id',keep='first', inplace=True)

In [5]:
movies_metadata = movies_metadata[movies_metadata['id'].str.isdigit()]
movies_metadata['id'] = movies_metadata['id'].astype('int64')

In [6]:
movies_metadata = movies_metadata.merge(movie_ratings_small, left_on='id', right_on='movieId', how='left')

In [7]:
movies_metadata.dropna(subset='userId', inplace= True)
movies_metadata.drop(columns=['movieId'], inplace=True)

In [8]:
movies_metadata.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'userId', 'rating', 'timestamp'],
      dtype='object')

In [9]:
movies_metadata['genres']

5        [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...
6        [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...
7        [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...
8        [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...
9        [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...
                               ...                        
87527    [{'id': 10749, 'name': 'Romance'}, {'id': 18, ...
87528    [{'id': 10749, 'name': 'Romance'}, {'id': 18, ...
87529    [{'id': 10749, 'name': 'Romance'}, {'id': 18, ...
87532    [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...
87576    [{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na...
Name: genres, Length: 44989, dtype: object

In [10]:
import json
def extract_genre_names(metadata):
    # 각 리스트에서 'name' 키 값을 추출
    corrected_json_string = metadata.replace("'", '"')
    json_data = json.loads(corrected_json_string)
    a  = [genre['name'] for genre in json_data if 'name' in genre]
    return a

movies_metadata['genres'] = movies_metadata['genres'].apply(lambda x: extract_genre_names(x))


In [11]:
users_stats = movies_metadata.groupby('userId')['rating'].agg(['mean','std','count']).reset_index()
users_stats.columns = ['userId','user_mean_rating','user_rating_std','user_review_count']
movies_metadata = movies_metadata.merge(users_stats, on='userId', how='left')

In [12]:
movies_metadata['release_year'] = pd.to_datetime(movies_metadata['release_date']).dt.year

movie_stats = movies_metadata.groupby('id')['rating'].agg(['mean', 'count']).reset_index()
movie_stats.columns = ['id', 'movie_mean_rating', 'movie_review_count']

movies_metadata = movies_metadata.merge(movie_stats, on='id', how='left')


In [13]:
movies_metadata.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'userId', 'rating', 'timestamp',
       'user_mean_rating', 'user_rating_std', 'user_review_count',
       'release_year', 'movie_mean_rating', 'movie_review_count'],
      dtype='object')

In [14]:
unique_genres = sorted(set(genre for genres in movies_metadata['genres'] for genre in genres))
genre_to_idx = {genre: idx for idx, genre in enumerate(unique_genres)}


# 장르를 숫자로 변환
movies_metadata['genre_ids'] = movies_metadata['genres'].apply(lambda x: [genre_to_idx[genre] for genre in x])

In [15]:
genre_ratings = movies_metadata.explode('genre_ids').groupby(['userId','genre_ids'])['rating'].mean().reset_index()
genre_ratings.columns = ['userId','genre_ids', 'user_preference']


In [16]:
movies_metadata_exploded = movies_metadata.explode('genre_ids')
movies_metadata_exploded.fillna({'genre_ids': 0}, inplace= True)



  movies_metadata_exploded.fillna({'genre_ids': 0}, inplace= True)


In [17]:
movies_metadata_exploded = movies_metadata_exploded.merge(genre_ratings, on=['userId','genre_ids'], how='left')

In [18]:
movies_metadata_exploded.isnull().sum()

adult                        0
belongs_to_collection    80502
budget                       0
genres                       0
homepage                 81995
id                           0
imdb_id                      0
original_language            0
original_title               0
overview                   150
popularity                   0
poster_path                 42
production_companies         0
production_countries         0
release_date                29
revenue                      0
runtime                      5
spoken_languages             0
status                       3
tagline                  30071
title                        0
video                        0
vote_average                 0
vote_count                   0
userId                       0
rating                       0
timestamp                    0
user_mean_rating             0
user_rating_std              0
user_review_count            0
release_year                29
movie_mean_rating            0
movie_re

In [19]:
numeric_features_cols = ['user_mean_rating', 'user_rating_std', 'user_review_count',
       'release_year', 'movie_mean_rating', 'movie_review_count', 'genre_ids',
       'user_preference']
movies_metadata_exploded.fillna({'release_year':0}, inplace=True)
movies_metadata_exploded.fillna({'user_preference':0}, inplace=True)
movies_metadata_exploded.fillna({'user_rating_std':0}, inplace= True)

In [20]:
numeric_features_cols = ['user_mean_rating', 'user_rating_std', 'user_review_count',
       'release_year', 'movie_mean_rating', 'movie_review_count', 'genre_ids',
       'user_preference']

scaler = StandardScaler()
movies_metadata_exploded[numeric_features_cols] = scaler.fit_transform(movies_metadata_exploded[numeric_features_cols])

In [21]:
target = ['user_mean_rating', 'user_rating_std', 'user_review_count',
       'release_year', 'movie_mean_rating', 'movie_review_count', 'genre_ids',
       'user_preference','id','userId','rating']
movies_metadata_exploded[target]

Unnamed: 0,user_mean_rating,user_rating_std,user_review_count,release_year,movie_mean_rating,movie_review_count,genre_ids,user_preference,id,userId,rating
0,0.207586,-0.354892,0.669992,0.217014,0.049479,-0.826605,-1.405291,0.245843,949,23.0,3.5
1,0.207586,-0.354892,0.669992,0.217014,0.049479,-0.826605,-0.686947,0.574224,949,23.0,3.5
2,0.207586,-0.354892,0.669992,0.217014,0.049479,-0.826605,-0.327775,0.149901,949,23.0,3.5
3,0.207586,-0.354892,0.669992,0.217014,0.049479,-0.826605,1.647671,0.354781,949,23.0,3.5
4,0.949350,-0.427886,0.654369,0.217014,0.049479,-0.826605,-1.405291,0.721596,949,102.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...
110073,0.733337,-1.364769,-0.840205,0.659329,0.817022,-1.027519,-0.866533,0.938690,98604,352.0,4.0
110074,0.733337,-1.364769,-0.840205,0.659329,0.817022,-1.027519,1.108913,0.516978,98604,352.0,4.0
110075,0.320610,0.231559,-0.199674,-2.254746,2.706358,-1.027519,0.031397,1.165766,49280,187.0,5.0
110076,0.320610,0.231559,-0.199674,-2.254746,2.706358,-1.027519,-1.405291,0.268912,49280,187.0,5.0


In [None]:
from optuna.integration import PyTorchLightningPruningCallback
data = movies_metadata_exploded[target]
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
small_train_data = train_data.sample(frac=0.2, random_state=42)
val_data, test_data = train_test_split(val_data, test_size=0.5, random_state=42)  # 검증 & 테스트 분리

def objective(trial):
    latent_dim = trial.suggest_int("latent_dim", 8, 64)  # 8~64 사이 정수
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])  # 16, 32, 64 중 선택
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-3, log=True)  # log=True 사용
    epochs = trial.suggest_int("epochs", 10, 30)  # 10~50 사이 정수
    num_workers = trial.suggest_int("num_workers", 0, 4)  # 🔥 num_workers도 최적화 가능!
    
    
   

    def prepare(data, is_train):
      data['user_idx'] = data['userId'].astype('category').cat.codes
      data['item_idx'] = data['id'].astype('category').cat.codes
      user_ids = torch.tensor(data['user_idx'].values, dtype=torch.long)
      item_ids = torch.tensor(data['item_idx'].values, dtype=torch.long)
      ratings = torch.tensor(data['rating'].values, dtype=torch.float32)
      num_users = data['user_idx'].nunique()
      num_items = data['item_idx'].nunique()
      numeric_features = torch.tensor(data[numeric_features_cols].values, dtype=torch.float32)
      dataset = TensorDataset(user_ids, item_ids, numeric_features, ratings)
      loader = DataLoader(dataset, batch_size=batch_size, shuffle= is_train, num_workers= num_workers)
      return loader, num_users, num_items, numeric_features.shape[1]

    train_loader, num_users, num_items, num_numeric = prepare(small_train_data, True)
    val_loader, _, _, _ = prepare(val_data, False)

    model = NeuralMF(num_users, num_items, num_numeric,  latent_dim, learning_rate)
    pruning_callback = PyTorchLightningPruningCallback(trial, monitor="validation_loss")
    trainer = pl.Trainer(
        max_epochs=epochs,
        enable_checkpointing=False, # 체크포인트 저장 비활성화
        enable_progress_bar=False, # 진행 바 비활성화
        logger=False # 로그 저장 비활성화
    )
    trainer.callbacks.append(pruning_callback)  # 🔥 여기서 직접 추가
    trainer.fit(model, train_loader, val_loader)
    loss = trainer.callback_metrics.get("validation_loss", torch.tensor(float('inf'))).item()
    # del train_loader
    # del val_loader
    gc.collect()  # 🔥 가비지 컬렉션 실행
    torch.cuda.empty_cache()  # 🔥 GPU 캐시 정리 (GPU 사용 시)
    return loss

# ✅ Optuna 실행
study = optuna.create_study(direction="minimize")  # 손실(loss) 최소화
study.optimize(objective, n_trials=10)  # 10번의 실험 수행

# ✅ 최적 하이퍼파라미터 출력
print("Best Hyperparameters:", study.best_params)

In [29]:
print("Best Hyperparameters:", study.best_params)

Best Hyperparameters: {'latent_dim': 43, 'batch_size': 32, 'learning_rate': 0.000532381383106112, 'epochs': 11, 'num_workers': 1}


In [30]:
print(f"Number of trials: {len(study.trials)}")
print(f"Completed trials: {[t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]}")


Number of trials: 10
Completed trials: [FrozenTrial(number=0, state=1, values=[0.6989112496376038], datetime_start=datetime.datetime(2025, 2, 14, 2, 48, 15, 190872), datetime_complete=datetime.datetime(2025, 2, 14, 2, 51, 4, 633386), params={'latent_dim': 43, 'batch_size': 32, 'learning_rate': 0.000532381383106112, 'epochs': 11, 'num_workers': 1}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.6847357749938965, 1: 0.6837887167930603, 2: 0.671941339969635, 3: 0.6864969730377197, 4: 0.6732457876205444, 5: 0.6827700734138489, 6: 0.6807805299758911, 7: 0.6864109039306641, 8: 0.698116660118103, 9: 0.7005264759063721, 10: 0.6989112496376038}, distributions={'latent_dim': IntDistribution(high=64, log=False, low=8, step=1), 'batch_size': CategoricalDistribution(choices=(16, 32, 64)), 'learning_rate': FloatDistribution(high=0.001, log=True, low=0.0001, step=None), 'epochs': IntDistribution(high=30, log=False, low=10, step=1), 'num_workers': IntDistribution(high=4, log=False, low=0, s

In [38]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [22]:
#Best Hyperparameters: {'latent_dim': 43, 'batch_size': 32, 'learning_rate': 0.000532381383106112, 'epochs': 11, 'num_workers': 1}

#best_params = study.best_params

data = movies_metadata_exploded[target]
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
small_train_data = train_data.sample(frac=0.2, random_state=42)
val_data, test_data = train_test_split(val_data, test_size=0.5, random_state=42)  # 검증 & 테스트 분리

best_params =  {'latent_dim': 43, 'batch_size': 32, 'learning_rate': 0.000532381383106112, 'epochs': 11, 'num_workers': 1}
latent_dim = best_params['latent_dim']
batch_size = best_params['batch_size']
learning_rate = best_params['learning_rate']
epochs = best_params['epochs']
num_workers = best_params['num_workers']


def prepare(data, is_train):
    data['user_idx'] = data['userId'].astype('category').cat.codes
    data['item_idx'] = data['id'].astype('category').cat.codes
    user_ids = torch.tensor(data['user_idx'].values, dtype=torch.long)
    item_ids = torch.tensor(data['item_idx'].values, dtype=torch.long)
    ratings = torch.tensor(data['rating'].values, dtype=torch.float32)
    num_users = data['user_idx'].nunique()
    num_items = data['item_idx'].nunique()
    numeric_features = torch.tensor(data[numeric_features_cols].values, dtype=torch.float32)
    dataset = TensorDataset(user_ids, item_ids, numeric_features, ratings)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle= is_train, num_workers= 2)
    return loader, num_users, num_items, numeric_features.shape[1]

train_loader, num_users, num_items, num_numeric = prepare(small_train_data, True)
val_loader, _, _, _ = prepare(val_data, False)

model = NeuralMF(num_users, num_items, num_numeric,  latent_dim, learning_rate)
trainer = pl.Trainer(
    max_epochs=epochs,
    enable_checkpointing=False, # 체크포인트 저장 비활성화
    enable_progress_bar=False, # 진행 바 비활성화
    logger=False # 로그 저장 비활성화
)
trainer.fit(model, train_loader, val_loader)

# 모델 학습이 끝난 후 불필요한 변수 제거
del train_loader, val_loader
torch.cuda.empty_cache()  # GPU 메모리 해제
gc.collect()  # Python 가비지 컬렉션 실행



GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name           | Type      | Params | Mode 
-----------------------------------------------------
0 | user_embedding | Embedding | 28.6 K | train
1 | item_embedding | Embedding | 85.1 K | train
2 | numeric_fc     | Linear    | 387    | train
3 | fc1            | Linear    | 16.6 K | train
4 | fc2            | Linear    | 8.3 K  | train
5 | fc3            | Linear    | 65     | train
6 | dropout        | Dropout   | 0      | train
7 | activation     | ReLU      | 0      | train
8 | criterion      | MSELoss   | 0      | train
-----------------------------------------------------
139 K     Trainable params
0         Non-trainable params
139 K     Total params
0.556     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode
/Users/chahyeon-yeong/miniconda3/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data

prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size([32])
prediction torch.Size([32])
ratings torch.Size

`Trainer.fit` stopped: `max_epochs=11` reached.


60

In [23]:
test_loader, _, _, _ = prepare(test_data, False)
predictions = trainer.predict(model, test_loader)


/Users/chahyeon-yeong/miniconda3/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:420: Consider setting `persistent_workers=True` in 'predict_dataloader' to speed up the dataloader worker initialization.


In [24]:
predicted_ratings = []
actual_ratings = []
for pred, rating in predictions:
    predicted_ratings.append(pred)
    actual_ratings.append(rating)

In [29]:
predicted_ratings

[tensor([3.8492, 3.7854, 4.1271, 4.1240, 3.8416, 3.1686, 2.1678, 4.1172, 4.5826,
         2.9225, 3.8950, 3.0852, 2.2887, 3.6712, 4.2466, 3.4570, 4.1998, 4.7975,
         4.0666, 2.5500, 3.4712, 4.1682, 3.0720, 2.1043, 4.2269, 4.3225, 3.2645,
         2.5891, 1.9980, 2.7818, 3.5926, 2.6254]),
 tensor([3.1486, 3.1745, 2.4713, 2.9310, 2.6850, 3.4470, 3.3469, 3.3467, 3.7579,
         4.0835, 1.5918, 3.5280, 3.9046, 2.8131, 3.5515, 2.3795, 3.1391, 4.8226,
         4.0184, 4.0224, 3.7307, 3.9448, 3.6026, 3.1143, 3.7847, 2.9337, 4.3034,
         3.2864, 2.9873, 2.8813, 2.0102, 3.8888]),
 tensor([3.4435, 4.2099, 3.0812, 4.4408, 2.0848, 4.3652, 3.9336, 3.0415, 3.4362,
         3.0881, 2.8527, 3.2474, 3.5393, 4.1725, 3.5517, 3.4211, 2.9600, 3.5940,
         3.2609, 3.7255, 3.8347, 3.0784, 4.1823, 3.9755, 3.8969, 3.7416, 3.5480,
         2.6758, 3.9282, 3.7525, 4.2409, 3.4151]),
 tensor([2.9401, 4.3964, 2.6204, 2.8902, 1.5051, 3.0098, 2.8808, 3.2918, 4.4125,
         3.4234, 1.4446, 3.4954, 2.80

In [27]:
# ✅ MSE 계산 (sklearn)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(actual_ratings, predicted_ratings)
print(f"Test MSE: {mse}")

Test MSE: 0.7238844633102417


In [30]:
torch.save(model.state_dict(), "best_model.pth")

In [31]:
model

NeuralMF(
  (user_embedding): Embedding(665, 43)
  (item_embedding): Embedding(1979, 43)
  (numeric_fc): Linear(in_features=8, out_features=43, bias=True)
  (fc1): Linear(in_features=129, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (activation): ReLU()
  (criterion): MSELoss()
)

In [47]:
num_users = 665
num_items = 1979
numeric_features = 8
latent_dim = 43

model = NeuralMF(num_users, num_items, numeric_features, latent_dim)

# 저장된 가중치 로드
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("best_model.pth"))
# 모델 평가 모드로 설정
model.eval()

  model.load_state_dict(torch.load("best_model.pth"))


NeuralMF(
  (user_embedding): Embedding(665, 43)
  (item_embedding): Embedding(1979, 43)
  (numeric_fc): Linear(in_features=8, out_features=43, bias=True)
  (fc1): Linear(in_features=129, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (activation): ReLU()
  (criterion): MSELoss()
)

In [48]:
# 유저 1의 user_idx 찾기
user_id = 496
item_id = 1378
user_idx = small_train_data[small_train_data['userId'] == user_id]['user_idx'].unique()[0]
item_idx = small_train_data[small_train_data['id'] == item_id]['item_idx'].unique()[0]

print(f"유저 {user_id} index: {user_idx},{item_id} index: {item_idx}")

numeric_item_features = small_train_data[small_train_data['item_idx'] == item_idx][target]



유저 496 index: 490,1378 index: 599


In [49]:
original_title = movies_metadata_exploded[movies_metadata_exploded['id'] == item_id]['original_title'].iloc[0]
original_title

'Shortbus'

In [50]:
print(f"모델의 num_users: {model.user_embedding.num_embeddings}")
print(f"모델의 num_items: {model.item_embedding.num_embeddings}")


모델의 num_users: 665
모델의 num_items: 1979


In [None]:
numeric_feature_dim = 8
user_idx
target = ['user_mean_rating', 'user_rating_std', 'user_review_count',
       'release_year', 'movie_mean_rating', 'movie_review_count', 'genre_ids',
       'user_preference']
user_ids = torch.tensor([user_idx], dtype=torch.long).repeat(numeric_item_features.shape[0])
item_ids = torch.tensor([item_idx], dtype=torch.long).repeat(numeric_item_features.shape[0])
numeric_features = torch.tensor(numeric_item_features[target].values, dtype=torch.float32).squeeze(0)
ratings = torch.rand(numeric_item_features.shape[0])

dataset = TensorDataset(user_ids, item_ids, numeric_features, ratings)
dataloader = DataLoader(dataset, batch_size=4, shuffle=False)

predictions = trainer.predict(model, dataloaders=dataloader)

torch.Size([11])
torch.Size([11])
torch.Size([11, 8])
torch.Size([11])


/Users/chahyeon-yeong/miniconda3/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


In [52]:
predicted_ratings = []
actual_ratings = []
for pred, rating in predictions:
    predicted_ratings.append(pred)
    actual_ratings.append(rating)

In [53]:
#predicted_ratings
total = torch.cat(predicted_ratings)
torch.mean(total)

tensor(3.3645)