# 다이닝 코드 데이터

In [1]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

data = pd.read_csv('diningcode_data_crawling_20260125_1542.csv')

In [2]:
data = data.drop(columns=['Unnamed: 0'],axis=1)
data.shape

(12627, 15)

## Preprocessing data


In [3]:
# 다코 미식가 / user_name 분리
daco_list = dict()
name_list = list()
for i,val in enumerate(data['user_name']):
    if "다코미식가" in val:
        daco_list[i] = 1
    else:
        daco_list[i] = 0
    name_list.append(data['user_name'][i].replace('다코미식가',''))
data['daco_gourmand'] = daco_list

name_list = [name.replace('\r\n', '') for name in name_list]
name_list = [name.replace('\n', '') for name in name_list]
data['user_name'] = name_list

In [4]:
data['user_rating'] = data['user_rating'].str.replace('점','').astype(float)

In [5]:
unique_vals = data['taste'].unique()
mapping_dict = {
    unique_vals[0]:3,
    unique_vals[1]:2,
    unique_vals[2]:1
}
data['taste_enc'] = data['taste'].map(mapping_dict)
del mapping_dict, unique_vals

unique_vals = data['service'].unique()
mapping_dict = {
    unique_vals[0]:3,
    unique_vals[1]:2,
    unique_vals[2]:1
}
data['service_enc'] = data['service'].map(mapping_dict)
del mapping_dict, unique_vals

unique_vals = data['price'].unique()
mapping_dict = {
    unique_vals[0]:3,
    unique_vals[1]:2,
    unique_vals[2]:1
}
data['price_enc'] = data['price'].map(mapping_dict)
del mapping_dict, unique_vals

In [6]:
user_le = LabelEncoder()
item_le = LabelEncoder()

data['user_id'] = user_le.fit_transform(data['user_name'])
data['item_id'] = item_le.fit_transform(data['item_name'])

# User name-id mapping
user_i2n = dict(enumerate(user_le.classes_))  
user_n2i = {v: k for k, v in user_i2n.items()}

item_i2n = dict(enumerate(item_le.classes_))
item_n2i = {v: k for k, v in item_i2n.items()}

In [7]:
np.sum(data['user_name'].value_counts() > 1)

np.int64(2208)

In [8]:
data = data.drop_duplicates()

In [9]:
data.shape

(8723, 21)

## Project_Main (Founding Recommnedation model)
1. Baseline (DeepFM or MF model)
2. DeepCoNN + MF
3. LLM (KoBERT,Gemma) + MF model

### 1. Baseline (Matrix Factorization)

In [10]:
user_item_mat = pd.pivot_table(data=data,index='item_id',
                               columns='user_id',values='user_rating')
print('sparsity : ',np.count_nonzero(np.isnan(user_item_mat)) / user_item_mat.size)
print(f'user_item_mat shape : {user_item_mat.shape}')

sparsity :  0.9855843038644786
user_item_mat shape : (160, 3778)


In [11]:
mf_data = data[['user_name','item_name','user_rating']]
mf_data.columns = ['user','item','rating']

mf_train = mf_data.groupby('user').sample(frac=0.8,random_state=42)
mf_train_ind = mf_train.index
mf_test = mf_data.drop(mf_train_ind)

In [12]:
import sys
sys.path.append('../..')  # Go up two levels to reach the directory containing 'Study'

from Study.RecSys.matrixfactorization import matfac

k=10
lr=0.001
reg_param = 0.02
epochs=50

mf_model = matfac.MatrixFactorization(k,lr,reg_param,epochs)

P,Q,b_u,b_i = mf_model.fit(mf_train)
mf_pred,mf_test = mf_model.predict(mf_test)

Epoch : 0 , Loss : 2222.281957 , Rooted Loss: 47.14
Epoch : 10 , Loss : 1895.829478 , Rooted Loss: 43.54
Epoch : 20 , Loss : 1780.056921 , Rooted Loss: 42.19
Epoch : 30 , Loss : 1723.511763 , Rooted Loss: 41.52
Epoch : 40 , Loss : 1691.422434 , Rooted Loss: 41.13


In [14]:
y_pred = mf_pred
y_true = mf_test
k=10
ndcg_k = []
for user_num in y_pred['user'].unique():
    top_pred_items = y_pred.loc[(y_pred['user']==user_num)].sort_values('rating',ascending=False)
    pred_sequence = top_pred_items['item'][:k].values

    test_items = y_true.loc[y_true['user']==user_num]
    ideal_rel_score = test_items.sort_values('rating',ascending=False)[:k]['rating'].values
    rel_score = test_items.set_index('item').reindex(pred_sequence)['rating'].values
    dcg_k = np.sum((np.pow(2,rel_score) -1) / np.log2(np.arange(2,len(rel_score)+2)))
    idcg_k = np.sum((np.pow(2,ideal_rel_score) -1) / np.log2(np.arange(2,len(ideal_rel_score)+2)))
    ndcg_k.append(dcg_k / idcg_k if idcg_k>0 else 0)
np.mean(ndcg_k)

np.float64(0.9926146584105436)

In [15]:
from Study.RecSys.matrixfactorization.preprocessing import precision_at_k,ndcg_at_k,recall_at_k
from sklearn.metrics import root_mean_squared_error

rmse_par = root_mean_squared_error(mf_pred['rating'].values,mf_test['rating'].values)
prec_at_k_par = precision_at_k(mf_pred,mf_test,k=10)
rec_at_k_par = recall_at_k(mf_pred,mf_test,k=10)
ndcg_at_k_par = ndcg_at_k(mf_pred,mf_test,k=10)

print(f'RMSE: {rmse_par:.4f},Precision@K: {prec_at_k_par:.4f},Recall@K: {rec_at_k_par:.4f},NDCG@K: {ndcg_at_k_par:.4f}')

RMSE: 0.6668,Precision@K: 0.1250,Recall@K: 1.0000,NDCG@K: 0.9926


In [25]:
def get_top_p_recommendations(df, user_id, k=5):
    user_data = df[df['user_id'] == user_id]

    top_k_items = user_data.sort_values(by='predicted_rating', ascending=False).head(k)
    
    print(f"=== User '{user_id}' Top-{k} Recommendation ===")
    return top_k_items[['item_id', 'predicted_rating', 'actual_rating']]

topn_df = get_top_p_recommendations(comparison_df, '먹죽귀', k=10)
topn_df

=== User '먹죽귀' Top-10 Recommendation ===


Unnamed: 0,item_id,predicted_rating,actual_rating
6709,베트남시장쌀국수,4.925535,4.0
6765,벱,4.75619,5.0
9232,타오 마라탕,4.724055,4.0
1359,오레노라멘 본점,4.503047,5.0
5,서령 본점,4.438923,5.0
4873,런던베이글뮤지엄 잠실점,4.312912,3.5
1721,오레노라멘 롯데월드몰점,4.197145,4.0


In [17]:
# Create a comparison dataframe
comparison_df = pd.DataFrame({
    'predicted_rating': mf_pred['rating'].values,
    'actual_rating': mf_test['rating'].values,
    'user_id' : mf_test['user'],
    'item_id' : mf_test['item']
})
comparison_df.head(20)

Unnamed: 0,predicted_rating,actual_rating,user_id,item_id
5,4.438923,5.0,먹죽귀,서령 본점
6,4.643927,2.0,쿨제이,서령 본점
12,4.768596,5.0,Ssuworld,서령 본점
18,4.557831,5.0,쵸옹,서령 본점
22,4.719772,4.5,오렌지9191,서령 본점
36,4.623751,5.0,meraki,서령 본점
39,4.824371,5.0,아제나의맛집탐방,서령 본점
46,4.871349,4.0,브라우니쿠키,서령 본점
47,4.581752,5.0,이거맛있당,서령 본점
50,4.435929,5.0,데이지,우래옥 본점


In [16]:
"""
def ndcg_at_k(y_true, y_score, k=-1): #Vector의 계산으로 이루어짐
    y_true = np.array(y_true)
    y_score = np.array(y_score)
    n = len(y_true)
    # k가 -1 또는 데이터 전체보다 크면 k=n으로 보정
    if k == -1 or k > n:
        k = n

    order = np.argsort(y_score)[::-1]
    y_true_sorted = y_true[order[:k]]  # k 길이만큼만 자름
    dcg = np.sum((2 ** y_true_sorted - 1) / np.log2(np.arange(2, k + 2)))
    best_dcg = np.sum((2 ** np.sort(y_true)[::-1][:k] - 1) / np.log2(np.arange(2, k + 2)))

    return dcg / best_dcg if best_dcg > 0 else 0.0

ndcg_k = []
for i,col in enumerate(R_pred):
    ndcg_k.append(ndcg_at_k(R[i],R_pred[i],k=-1))
print('ndcg@k mean:' , np.mean(ndcg_k))
"""

"\ndef ndcg_at_k(y_true, y_score, k=-1): #Vector의 계산으로 이루어짐\n    y_true = np.array(y_true)\n    y_score = np.array(y_score)\n    n = len(y_true)\n    # k가 -1 또는 데이터 전체보다 크면 k=n으로 보정\n    if k == -1 or k > n:\n        k = n\n\n    order = np.argsort(y_score)[::-1]\n    y_true_sorted = y_true[order[:k]]  # k 길이만큼만 자름\n    dcg = np.sum((2 ** y_true_sorted - 1) / np.log2(np.arange(2, k + 2)))\n    best_dcg = np.sum((2 ** np.sort(y_true)[::-1][:k] - 1) / np.log2(np.arange(2, k + 2)))\n\n    return dcg / best_dcg if best_dcg > 0 else 0.0\n\nndcg_k = []\nfor i,col in enumerate(R_pred):\n    ndcg_k.append(ndcg_at_k(R[i],R_pred[i],k=-1))\nprint('ndcg@k mean:' , np.mean(ndcg_k))\n"

### 2. DeepCoNN

In [11]:
dc_data = data[['user_id','item_id','user_rating','user_query']]
dc_data.columns = ['user_id','item_id','rating','review']

dc_train = dc_data.groupby('user_id').sample(frac=0.8,random_state=42)
dc_train_ind = dc_train.index
dc_test = dc_data.drop(dc_train_ind)

In [12]:
from collections import defaultdict
import random
import os
import torch

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
set_seed(42)

In [13]:
from konlpy.tag import Okt
from tqdm import tqdm
from collections import Counter

jvm_path = r'C:\Program Files\Java\jdk-23\bin\server\jvm.dll' # r'C:\Program Files\Java\jdk-25.0.2\bin\server\jvm.dll'
os.environ['JAVA_HOME'] = r'C:\Program Files\Java\jdk-23'   

# --- 하이퍼파라미터 설정 ---
MAX_VOCAB_SIZE = 30000  # Top N
MAX_DOC_LENGTH = 550   

def tokeniz(data):
    okt = Okt()
    
    # 1. Construct User-Item document
    user_docs = data.groupby('user_id')['review'].apply(lambda x: ' '.join(map(str, x))).reset_index()
    item_docs = data.groupby('item_id')['review'].apply(lambda x: ' '.join(map(str, x))).reset_index()

    print('Morpheme(형태소) Analysis & Tokenization processing')

    tqdm.pandas(desc='user docs tokenization')
    user_docs['tokenized'] = user_docs['review'].progress_apply(lambda x: okt.morphs(x,stem=True))

    tqdm.pandas(desc='item docs tokenization')
    item_docs['tokenized'] = item_docs['review'].progress_apply(lambda x: okt.morphs(x,stem=True))

    # 2. Tokenization / generate Vocab dictionary
    print("Generating Dict...")
    all_tokens =[]
    for tokens in user_docs['tokenized']:
        all_tokens.extend(tokens)
    for tokens in item_docs['tokenized']:
        all_tokens.extend(tokens)
    word_counts = Counter(all_tokens)

    # 0-<PAD> / 1-<UNK>
    vocab = {'<PAD>': 0, '<UNK>': 1}
    for i, (word, count) in enumerate(word_counts.most_common(MAX_VOCAB_SIZE)):
        vocab[word] = i + 2  # 0과 1은 이미 차지했으므로 2부터 시작

    print(f"Dict size: {len(vocab)}\n")

    return vocab,user_docs,item_docs

# 3. Padding
def text_to_padded_sequence(text, vocab, max_length):
    # 단어가 사전에 없으면 <UNK>(1)로 처리
    seq = [vocab.get(word, vocab['<UNK>']) for word in text.split()]
    
    # Length Adjustment
    if len(seq) > max_length:
        return seq[:max_length] # Truncation
    else:
        return seq + [vocab['<PAD>']] * (max_length - len(seq)) # Padding

tr_vocab,dc_tr_user_doc,dc_tr_item_doc = tokeniz(dc_train)
te_vocab,dc_te_user_doc,dc_te_item_doc = tokeniz(dc_test)

# 4. UI sequence
def app_padding(vocab):
    print("Apply padding and int sequence")
    user_seq_dict = {}
    for _, row in dc_tr_user_doc.iterrows():
        user_seq_dict[row['user_id']] = text_to_padded_sequence(row['review'], vocab, MAX_DOC_LENGTH)

    item_seq_dict = {}
    for _, row in dc_tr_item_doc.iterrows():
        item_seq_dict[row['item_id']] = text_to_padded_sequence(row['review'], vocab, MAX_DOC_LENGTH)
    return user_seq_dict,item_seq_dict

tr_user_seq_dict,tr_item_seq_dict = app_padding(tr_vocab)
te_user_seq_dict,te_item_seq_dict = app_padding(te_vocab)
print("Preprocessing Complete")

Morpheme(형태소) Analysis & Tokenization processing


user docs tokenization: 100%|██████████| 3778/3778 [00:26<00:00, 143.16it/s]
item docs tokenization: 100%|██████████| 160/160 [00:18<00:00,  8.68it/s]


Generating Dict...
Dict size: 10791

Morpheme(형태소) Analysis & Tokenization processing


user docs tokenization: 100%|██████████| 964/964 [00:03<00:00, 248.32it/s]
item docs tokenization: 100%|██████████| 155/155 [00:03<00:00, 42.83it/s]


Generating Dict...
Dict size: 4518

Apply padding and int sequence
Apply padding and int sequence
Preprocessing Complete


In [14]:
import torch.nn as nn
from gensim.models import KeyedVectors, Word2Vec

# 1. Load Pre-trained LM (Korean fastText-from Meta)
word2vec_model = KeyedVectors.load_word2vec_format("C:/Users/한승원/Downloads/cc.ko.300.vec.gz",binary=False,limit=30000) #"C:/Users/한승원(hanan)/Downloads/cc.ko.300.vec.gz"

EMBED_DIM = 300 
vocab_size = len(tr_vocab) # Identical size with Vocab dict

# 2. PyTorch Embedding Layer Weight: (vocab_size, EMBED_DIM)
embedding_matrix = np.zeros((vocab_size, EMBED_DIM))

# 3. 단어 사전(vocab)을 순회하며 가중치 행렬 채우기
for word, idx in tr_vocab.items():
    if word in word2vec_model:
        # 사전 학습된 모델에 단어가 존재하면 해당 벡터를 가져옴
        embedding_matrix[idx] = word2vec_model[word]
    else:
        # 모델에 없는 단어(OOV)는 정규분포를 따르는 랜덤 벡터로 초기화하거나 0으로 둡니다.
        if word == '<PAD>':
            embedding_matrix[idx] = np.zeros(EMBED_DIM)
        else:
            embedding_matrix[idx] = np.random.normal(scale=0.6, size=(EMBED_DIM,))

# Numpy array -> PyTorch tensor
embedding_tensor = torch.FloatTensor(embedding_matrix)

In [15]:
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

class DiningCodeDataset(Dataset):
    def __init__(self, df, user_seq_dict, item_seq_dict):
        self.users = df['user_id'].values
        self.items = df['item_id'].values
        self.ratings = df['rating'].values  # Target value
        
        self.user_seq_dict = user_seq_dict
        self.item_seq_dict = item_seq_dict

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        user_id = self.users[idx]
        item_id = self.items[idx]
        rating = self.ratings[idx]

        # 딕셔너리에서 미리 만들어둔 정수 시퀀스를 가져옴
        u_doc = self.user_seq_dict[user_id]
        i_doc = self.item_seq_dict[item_id]

        # PyTorch 모델 입력용 텐서로 변환 (타입 변환이 매우 중요합니다)
        u_doc_tensor = torch.tensor(u_doc, dtype=torch.long)
        i_doc_tensor = torch.tensor(i_doc, dtype=torch.long)
        rating_tensor = torch.tensor(rating, dtype=torch.float32)

        return u_doc_tensor, i_doc_tensor, rating_tensor

# DataLoader (Batch size: 64)
BATCH_SIZE = 64
tr_dataset = DiningCodeDataset(dc_train, tr_user_seq_dict, tr_item_seq_dict)
tr_dataloader = DataLoader(tr_dataset, batch_size=BATCH_SIZE, shuffle=True)

te_dataset = DiningCodeDataset(dc_test, te_user_seq_dict, te_item_seq_dict)
te_dataloader = DataLoader(te_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [16]:
import torch.nn as nn
import torch.nn.functional as F

class DeepCoNN(nn.Module):
    def __init__(self,config,embedding_matrix):
        super(DeepCoNN,self).__init__()
        self.config = config

        #1.Embedding layer
        vocab_size,embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding(vocab_size,embedding_dim)
        self.embedding.weight = nn.Parameter(
            torch.FloatTensor(embedding_matrix)
        )
        self.embedding.weight.requires_grad = True

        #2-1.User Network
        self.user_cnn = nn.Conv1d(
            in_channels=embedding_dim,
            out_channels=config.num_filters,
            kernel_size=config.kernel_size,
            padding=1
        )
        self.user_fc = nn.Linear(config.num_filters,config.latent_dim)

        #2-2. Item Network
        self.item_cnn = nn.Conv1d(
            in_channels=embedding_dim,
            out_channels=config.num_filters,
            kernel_size=config.kernel_size,
            padding=1
        )
        self.item_fc = nn.Linear(config.num_filters,config.latent_dim)

        #Dropout
        self.dropout = nn.Dropout(config.dropout_rate)

        #3.FM Layer (2nd-order latent vectors)
        self.fm_linear = nn.Linear(config.latent_dim*2,1)
        
        self.fm_V = nn.Parameter(
            torch.rand(config.latent_dim*2,config.fm_k)
        )
        self.global_bias = nn.Parameter(torch.zeros(1))

    def forward(self,user_doc,item_doc):
        """
        Args:
            user_doc: (batch_size,max_doc_length)
            item_doc: (batch_size,max_doc_length)

        Returns:
            rating: (batch_size,)
        """

        #User_Net
        user_emb = self.embedding(user_doc) # (B,L,E)
        user_emb = user_emb.transpose(1,2) # (B,E,L) - conv1d input format

        user_conv = F.relu(self.user_cnn(user_emb)) 
        user_pool = F.max_pool1d(user_conv,kernel_size=user_conv.size(2))
        user_pool = user_pool.squeeze(2)

        user_latent = F.relu(self.user_fc(user_pool))
        user_latent = self.dropout(user_latent)

        #item_Net
        item_emb = self.embedding(item_doc) # (B,L,E)
        item_emb = item_emb.transpose(1,2) # (B,E,L) - conv1d input format

        item_conv = F.relu(self.item_cnn(item_emb)) 
        item_pool = F.max_pool1d(item_conv,kernel_size=item_conv.size(2))
        item_pool = item_pool.squeeze(2)

        item_latent = F.relu(self.item_fc(item_pool))
        item_latent = self.dropout(item_latent)

        #concatenate
        z = torch.cat([user_latent,item_latent],dim=1)

        #FM layer
        linear_term = self.fm_linear(z)

        interactions = torch.mm(z,self.fm_V)
        interactions_squared = torch.mm(z**2,self.fm_V**2)

        quadratic_term = 0.5*torch.sum(
            interactions**2-interactions_squared,dim=1,keepdim=True
        )

        #Predict
        rating = self.global_bias + linear_term.squeeze(1) + quadratic_term.squeeze(1)
        
        return rating

class Config:

    max_doc_length = 550
    embedding_dim = 300

    "Model Parameters"
    num_filters = 100
    kernel_size = 3
    latent_dim = 50
    fm_k = 8

    "Training"
    batch_size = 64
    num_epochs = 50
    learning_rate = 0.002
    dropout_rate = 0.5

    "Other"
    device='cuda'
    random_seed=42

In [17]:
import torch.nn as nn
import torch.optim as optim

config = Config()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Current device: {device}")
model = DeepCoNN(config=config, embedding_matrix=embedding_matrix).to(device)


criterion = nn.MSELoss()
optimizer = optim.RMSprop(model.parameters(),alpha=0.9, lr=0.002, weight_decay=1e-8)

Current device: cpu


In [None]:
EPOCHS = 10
for epoch in range(EPOCHS):
    model.train()  
    total_loss = 0.0
    
    for batch_idx, (batch_user, batch_item, batch_rating) in enumerate(tr_dataloader):
    
        batch_user = batch_user.to(device)
        batch_item = batch_item.to(device)
        batch_rating = batch_rating.to(device)
        
        optimizer.zero_grad()
        
        predictions = model(batch_user, batch_item)
        loss = criterion(predictions, batch_rating)
    
        loss.backward()
        
        optimizer.step()
        total_loss += loss.item()
        
        # Progress
        if (batch_idx + 1) % 50 == 0:
            print(f"Epoch [{epoch+1}/{EPOCHS}], Step [{batch_idx+1}/{len(tr_dataloader)}], Loss: {loss.item():.4f}")
            
    avg_loss = total_loss / len(tr_dataloader)
    print(f"==== Epoch {epoch+1} Complete | Avg Loss: {avg_loss:.4f} ====\n")

Epoch [1/10], Step [100/118], Loss: 14.8327
==== Epoch 1 Complete | Avg Loss: 268.5286 ====

Epoch [2/10], Step [100/118], Loss: 14.4108
==== Epoch 2 Complete | Avg Loss: 17.6918 ====

Epoch [3/10], Step [100/118], Loss: 20.8785
==== Epoch 3 Complete | Avg Loss: 12.6092 ====

Epoch [4/10], Step [100/118], Loss: 4.5436
==== Epoch 4 Complete | Avg Loss: 7.2146 ====

Epoch [5/10], Step [100/118], Loss: 2.9692
==== Epoch 5 Complete | Avg Loss: 4.0699 ====

Epoch [6/10], Step [100/118], Loss: 2.1007
==== Epoch 6 Complete | Avg Loss: 1.9985 ====

Epoch [7/10], Step [100/118], Loss: 0.9019
==== Epoch 7 Complete | Avg Loss: 1.4352 ====

Epoch [8/10], Step [100/118], Loss: 0.7938
==== Epoch 8 Complete | Avg Loss: 1.0006 ====

Epoch [9/10], Step [100/118], Loss: 0.6591
==== Epoch 9 Complete | Avg Loss: 0.7907 ====

Epoch [10/10], Step [100/118], Loss: 0.8805
==== Epoch 10 Complete | Avg Loss: 0.7233 ====



In [None]:
model.eval()
dc_pred = dc_test.copy() 
pred = []

with torch.no_grad():    
    for batch_user, batch_item,_ in te_dataloader:
        
        batch_user = batch_user.to(device)
        batch_item = batch_item.to(device)
        batch_rating = batch_rating.to(device)
        
        batch_preds = model(batch_user,batch_item)
        
        pred.extend(batch_preds.cpu().numpy())
dc_pred['rating'] = pred

In [None]:
dc_test.columns = ['user','item','rating','review']
dc_pred.columns = ['user','item','rating','review']

import sys
sys.path.append('../..')  # Go up two levels to reach the directory containing 'Study'
from Study.RecSys.matrixfactorization.preprocessing import precision_at_k,ndcg_at_k,recall_at_k
from sklearn.metrics import mean_squared_error

rmse_par = mean_squared_error(dc_pred['rating'].values,dc_test['rating'].values,squared=False)

prec_at_k_par = precision_at_k(dc_pred,dc_test,k=10)
rec_at_k_par = recall_at_k(dc_pred,dc_test,k=10)
#ndcg_at_k_par = ndcg_at_k(dc_pred,dc_test,k=10)

print(f'RMSE: {rmse_par:.4f},Precision@K: {prec_at_k_par:.4f},Recall@K: {rec_at_k_par:.4f}') # Add NDCG

RMSE: 0.1499,0.1250,1.0000


In [26]:
# Create a comparison dataframe
comparison_df = pd.DataFrame({
    'predicted_rating': dc_pred['rating'].values,
    'actual_rating': dc_test['rating'].values,
    'user_id' : dc_test['user'],
    'item_id' : dc_test['item']
})
comparison_df.head(20)


get_top_p_recommendations(comparison_df,'먹죽귀',k=10)

NameError: name 'dc_pred' is not defined