In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import json
import pickle

In [3]:
df = pd.read_csv('/content/drive/MyDrive/집교 2_Team P/user-track-listen_count_filtered5.csv')

In [4]:
from sklearn.preprocessing import LabelEncoder
user_encoder = LabelEncoder()
track_encoder = LabelEncoder()
lyrics_encoder = LabelEncoder()
df['user_id'] = user_encoder.fit_transform(df['user_id'])
df['track_id'] = track_encoder.fit_transform(df['track_id'])

In [5]:
# !pip install torch torchvision -U

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
from tqdm import tqdm

# 데이터 불러오기 (예시: CSV 파일)
# Label Encoding
# PyTorch DataLoader에 맞게 데이터 변환
def df_to_tensor(dataset):
    users = torch.tensor(dataset['user_id'].values, dtype=torch.int)
    items = torch.tensor(dataset['track_id'].values, dtype=torch.int)
    ratings = torch.tensor(dataset['listen_count_bin'].values, dtype=torch.float)

    return users, items, ratings

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_users, train_items, train_ratings = df_to_tensor(train_df)
test_users, test_items, test_ratings = df_to_tensor(test_df)

train_data = TensorDataset(train_users, train_items, train_ratings)
test_data = TensorDataset(test_users, test_items, test_ratings)

train_loader = DataLoader(train_data, batch_size=256, shuffle=True)
test_loader = DataLoader(test_data, batch_size=256, shuffle=False)
num_users = (df['user_id'].nunique())
num_items = (df['track_id'].nunique())
print(num_users)
print(num_items)

23761
28378


In [6]:
# NCF 모델 정의
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_size):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)
        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_size * 2, embedding_size),
            nn.ReLU(),
            nn.Linear(embedding_size, int(embedding_size/2)),
            nn.ReLU(),
            nn.Linear(int(embedding_size/2), int(embedding_size/4)),
            nn.ReLU(),
            nn.Linear(int(embedding_size/4), int(embedding_size/8)),
            nn.ReLU(),
            nn.Linear(int(embedding_size/8), 1)
        )

    def forward(self, user, item):
        user_embedding = self.user_embedding(user)
        item_embedding = self.item_embedding(item)
        x = torch.cat((user_embedding, item_embedding), dim=1)
        x = self.fc_layers(x)
        return x

# CUDA 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
def train_(embedding,n_epoch):
  # NCF 모델 정의 및 GPU로 이동
  model = NCF(num_users=num_users, num_items=num_items, embedding_size=embedding)
  model.to(device)
  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001)
  # tqdm을 사용하여 학습 및 테스트 진행 상황 확인
  num_epochs = n_epoch
  for epoch in range(num_epochs):
      model.train()
      total_loss = 0
      for user, item, rating in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
          optimizer.zero_grad()
          user, item, rating = user.to(device), item.to(device), rating.to(device)  # GPU로 이동
          output = model(user, item)
          loss = criterion(output, rating.unsqueeze(1))
          loss.backward()
          optimizer.step()
          total_loss += loss.item()

      avg_loss = total_loss / len(train_loader)
      print(f'Epoch {epoch+1}/{num_epochs}, Avg. Loss: {avg_loss:.4f}')

      # 각 에폭이 끝날 때마다 테스트 데이터에 대한 예측 수행
      model.eval()
      all_predictions = []
      with torch.no_grad():
          for user, item, _ in tqdm(test_loader, desc=f'Testing Epoch {epoch+1}'):
              user, item = user.to(device), item.to(device)  # GPU로 이동
              output = model(user, item)
              all_predictions.append(output)

      # RMSE 계산
      predictions = torch.cat(all_predictions).squeeze().cpu().numpy()  # CPU로 이동 후 numpy로 변환
      rmse = np.sqrt(mean_squared_error(test_df['listen_count_bin'].values, predictions))
      print(f'Epoch {epoch+1}/{num_epochs}, RMSE on test set: {rmse}')


Using device: cuda


In [7]:
train_(64,10)

Epoch 1/10: 100%|██████████| 14516/14516 [01:15<00:00, 191.84it/s]


Epoch 1/10, Avg. Loss: 1.4476


Testing Epoch 1: 100%|██████████| 3629/3629 [00:11<00:00, 308.84it/s]


Epoch 1/10, RMSE on test set: 1.1643333310730273


Epoch 2/10: 100%|██████████| 14516/14516 [01:15<00:00, 191.95it/s]


Epoch 2/10, Avg. Loss: 1.3295


Testing Epoch 2: 100%|██████████| 3629/3629 [00:11<00:00, 307.80it/s]


Epoch 2/10, RMSE on test set: 1.1560964722165108


Epoch 3/10: 100%|██████████| 14516/14516 [01:15<00:00, 192.22it/s]


Epoch 3/10, Avg. Loss: 1.3086


Testing Epoch 3: 100%|██████████| 3629/3629 [00:11<00:00, 307.69it/s]


Epoch 3/10, RMSE on test set: 1.1525808170323326


Epoch 4/10: 100%|██████████| 14516/14516 [01:15<00:00, 192.77it/s]


Epoch 4/10, Avg. Loss: 1.2907


Testing Epoch 4: 100%|██████████| 3629/3629 [00:11<00:00, 308.57it/s]


Epoch 4/10, RMSE on test set: 1.1516096796365998


Epoch 5/10: 100%|██████████| 14516/14516 [01:15<00:00, 193.04it/s]


Epoch 5/10, Avg. Loss: 1.2693


Testing Epoch 5: 100%|██████████| 3629/3629 [00:11<00:00, 307.93it/s]


Epoch 5/10, RMSE on test set: 1.1510080417804573


Epoch 6/10: 100%|██████████| 14516/14516 [01:15<00:00, 192.39it/s]


Epoch 6/10, Avg. Loss: 1.2420


Testing Epoch 6: 100%|██████████| 3629/3629 [00:11<00:00, 307.05it/s]


Epoch 6/10, RMSE on test set: 1.15304201206068


Epoch 7/10: 100%|██████████| 14516/14516 [01:15<00:00, 192.24it/s]


Epoch 7/10, Avg. Loss: 1.2084


Testing Epoch 7: 100%|██████████| 3629/3629 [00:11<00:00, 307.17it/s]


Epoch 7/10, RMSE on test set: 1.1545050343687842


Epoch 8/10: 100%|██████████| 14516/14516 [01:15<00:00, 192.03it/s]


Epoch 8/10, Avg. Loss: 1.1696


Testing Epoch 8: 100%|██████████| 3629/3629 [00:12<00:00, 302.27it/s]


Epoch 8/10, RMSE on test set: 1.1547545109177928


Epoch 9/10: 100%|██████████| 14516/14516 [01:15<00:00, 193.33it/s]


Epoch 9/10, Avg. Loss: 1.1282


Testing Epoch 9: 100%|██████████| 3629/3629 [00:11<00:00, 306.98it/s]


Epoch 9/10, RMSE on test set: 1.1631612619760707


Epoch 10/10: 100%|██████████| 14516/14516 [01:15<00:00, 191.52it/s]


Epoch 10/10, Avg. Loss: 1.0851


Testing Epoch 10: 100%|██████████| 3629/3629 [00:11<00:00, 306.92it/s]


Epoch 10/10, RMSE on test set: 1.1732829509641152


In [8]:
train_(128,10)

Epoch 1/10: 100%|██████████| 14516/14516 [01:16<00:00, 190.25it/s]


Epoch 1/10, Avg. Loss: 1.4200


Testing Epoch 1: 100%|██████████| 3629/3629 [00:12<00:00, 297.68it/s]


Epoch 1/10, RMSE on test set: 1.1598034831752881


Epoch 2/10: 100%|██████████| 14516/14516 [01:18<00:00, 184.28it/s]


Epoch 2/10, Avg. Loss: 1.3222


Testing Epoch 2: 100%|██████████| 3629/3629 [00:12<00:00, 294.47it/s]


Epoch 2/10, RMSE on test set: 1.1541671140179925


Epoch 3/10: 100%|██████████| 14516/14516 [01:18<00:00, 184.23it/s]


Epoch 3/10, Avg. Loss: 1.2972


Testing Epoch 3: 100%|██████████| 3629/3629 [00:12<00:00, 293.84it/s]


Epoch 3/10, RMSE on test set: 1.1521500929612354


Epoch 4/10: 100%|██████████| 14516/14516 [01:19<00:00, 183.31it/s]


Epoch 4/10, Avg. Loss: 1.2647


Testing Epoch 4: 100%|██████████| 3629/3629 [00:12<00:00, 295.92it/s]


Epoch 4/10, RMSE on test set: 1.147718969938971


Epoch 5/10: 100%|██████████| 14516/14516 [01:19<00:00, 183.73it/s]


Epoch 5/10, Avg. Loss: 1.2163


Testing Epoch 5: 100%|██████████| 3629/3629 [00:12<00:00, 294.41it/s]


Epoch 5/10, RMSE on test set: 1.1478477519770256


Epoch 6/10: 100%|██████████| 14516/14516 [01:19<00:00, 182.76it/s]


Epoch 6/10, Avg. Loss: 1.1512


Testing Epoch 6: 100%|██████████| 3629/3629 [00:12<00:00, 293.06it/s]


Epoch 6/10, RMSE on test set: 1.146144694020721


Epoch 7/10: 100%|██████████| 14516/14516 [01:19<00:00, 183.48it/s]


Epoch 7/10, Avg. Loss: 1.0787


Testing Epoch 7: 100%|██████████| 3629/3629 [00:12<00:00, 294.27it/s]


Epoch 7/10, RMSE on test set: 1.1490395149536348


Epoch 8/10: 100%|██████████| 14516/14516 [01:18<00:00, 184.02it/s]


Epoch 8/10, Avg. Loss: 1.0068


Testing Epoch 8: 100%|██████████| 3629/3629 [00:12<00:00, 295.99it/s]


Epoch 8/10, RMSE on test set: 1.1484158268828808


Epoch 9/10: 100%|██████████| 14516/14516 [01:18<00:00, 184.30it/s]


Epoch 9/10, Avg. Loss: 0.9388


Testing Epoch 9: 100%|██████████| 3629/3629 [00:12<00:00, 296.90it/s]


Epoch 9/10, RMSE on test set: 1.161756247838679


Epoch 10/10: 100%|██████████| 14516/14516 [01:18<00:00, 184.77it/s]


Epoch 10/10, Avg. Loss: 0.8761


Testing Epoch 10: 100%|██████████| 3629/3629 [00:12<00:00, 293.45it/s]

Epoch 10/10, RMSE on test set: 1.172293158978815





In [9]:
train_(256,10  )

Epoch 1/10: 100%|██████████| 14516/14516 [01:21<00:00, 177.38it/s]


Epoch 1/10, Avg. Loss: 1.4073


Testing Epoch 1: 100%|██████████| 3629/3629 [00:12<00:00, 296.14it/s]


Epoch 1/10, RMSE on test set: 1.1580244160374433


Epoch 2/10: 100%|██████████| 14516/14516 [01:24<00:00, 171.73it/s]


Epoch 2/10, Avg. Loss: 1.3203


Testing Epoch 2: 100%|██████████| 3629/3629 [00:12<00:00, 294.82it/s]


Epoch 2/10, RMSE on test set: 1.1575789255247815


Epoch 3/10: 100%|██████████| 14516/14516 [01:24<00:00, 171.19it/s]


Epoch 3/10, Avg. Loss: 1.2926


Testing Epoch 3: 100%|██████████| 3629/3629 [00:12<00:00, 296.39it/s]


Epoch 3/10, RMSE on test set: 1.1497531662171945


Epoch 4/10: 100%|██████████| 14516/14516 [01:25<00:00, 170.66it/s]


Epoch 4/10, Avg. Loss: 1.2411


Testing Epoch 4: 100%|██████████| 3629/3629 [00:12<00:00, 295.60it/s]


Epoch 4/10, RMSE on test set: 1.1419587066917052


Epoch 5/10: 100%|██████████| 14516/14516 [01:24<00:00, 171.19it/s]


Epoch 5/10, Avg. Loss: 1.1537


Testing Epoch 5: 100%|██████████| 3629/3629 [00:12<00:00, 296.04it/s]


Epoch 5/10, RMSE on test set: 1.1381987845441501


Epoch 6/10: 100%|██████████| 14516/14516 [01:24<00:00, 171.53it/s]


Epoch 6/10, Avg. Loss: 1.0487


Testing Epoch 6: 100%|██████████| 3629/3629 [00:12<00:00, 295.83it/s]


Epoch 6/10, RMSE on test set: 1.1394044283722093


Epoch 7/10: 100%|██████████| 14516/14516 [01:24<00:00, 171.97it/s]


Epoch 7/10, Avg. Loss: 0.9440


Testing Epoch 7: 100%|██████████| 3629/3629 [00:12<00:00, 296.14it/s]


Epoch 7/10, RMSE on test set: 1.1449668620531193


Epoch 8/10: 100%|██████████| 14516/14516 [01:24<00:00, 171.80it/s]


Epoch 8/10, Avg. Loss: 0.8468


Testing Epoch 8: 100%|██████████| 3629/3629 [00:12<00:00, 291.76it/s]


Epoch 8/10, RMSE on test set: 1.149867801623184


Epoch 9/10: 100%|██████████| 14516/14516 [01:24<00:00, 171.28it/s]


Epoch 9/10, Avg. Loss: 0.7595


Testing Epoch 9: 100%|██████████| 3629/3629 [00:12<00:00, 295.09it/s]


Epoch 9/10, RMSE on test set: 1.1635686659515239


Epoch 10/10: 100%|██████████| 14516/14516 [01:24<00:00, 172.01it/s]


Epoch 10/10, Avg. Loss: 0.6835


Testing Epoch 10: 100%|██████████| 3629/3629 [00:12<00:00, 294.10it/s]

Epoch 10/10, RMSE on test set: 1.1814284206176788





In [10]:
train_(512,10)

Epoch 1/10: 100%|██████████| 14516/14516 [01:35<00:00, 152.60it/s]


Epoch 1/10, Avg. Loss: 1.3961


Testing Epoch 1: 100%|██████████| 3629/3629 [00:12<00:00, 291.77it/s]


Epoch 1/10, RMSE on test set: 1.156812642380332


Epoch 2/10: 100%|██████████| 14516/14516 [01:37<00:00, 149.02it/s]


Epoch 2/10, Avg. Loss: 1.3207


Testing Epoch 2: 100%|██████████| 3629/3629 [00:12<00:00, 293.55it/s]


Epoch 2/10, RMSE on test set: 1.1513669406083948


Epoch 3/10: 100%|██████████| 14516/14516 [01:37<00:00, 148.14it/s]


Epoch 3/10, Avg. Loss: 1.2946


Testing Epoch 3: 100%|██████████| 3629/3629 [00:12<00:00, 292.36it/s]


Epoch 3/10, RMSE on test set: 1.1478561970154462


Epoch 4/10: 100%|██████████| 14516/14516 [01:38<00:00, 147.12it/s]


Epoch 4/10, Avg. Loss: 1.2504


Testing Epoch 4: 100%|██████████| 3629/3629 [00:12<00:00, 293.94it/s]


Epoch 4/10, RMSE on test set: 1.1390439388299716


Epoch 5/10: 100%|██████████| 14516/14516 [01:38<00:00, 147.09it/s]


Epoch 5/10, Avg. Loss: 1.1654


Testing Epoch 5: 100%|██████████| 3629/3629 [00:12<00:00, 293.78it/s]


Epoch 5/10, RMSE on test set: 1.138531457189359


Epoch 6/10: 100%|██████████| 14516/14516 [01:38<00:00, 147.35it/s]


Epoch 6/10, Avg. Loss: 1.0251


Testing Epoch 6: 100%|██████████| 3629/3629 [00:12<00:00, 295.52it/s]


Epoch 6/10, RMSE on test set: 1.1420540970862996


Epoch 7/10: 100%|██████████| 14516/14516 [01:37<00:00, 148.20it/s]


Epoch 7/10, Avg. Loss: 0.8722


Testing Epoch 7: 100%|██████████| 3629/3629 [00:12<00:00, 288.85it/s]


Epoch 7/10, RMSE on test set: 1.1554770263648249


Epoch 8/10: 100%|██████████| 14516/14516 [01:38<00:00, 147.84it/s]


Epoch 8/10, Avg. Loss: 0.7364


Testing Epoch 8: 100%|██████████| 3629/3629 [00:12<00:00, 293.22it/s]


Epoch 8/10, RMSE on test set: 1.1783413771883922


Epoch 9/10: 100%|██████████| 14516/14516 [01:38<00:00, 147.88it/s]


Epoch 9/10, Avg. Loss: 0.6247


Testing Epoch 9: 100%|██████████| 3629/3629 [00:12<00:00, 295.36it/s]


Epoch 9/10, RMSE on test set: 1.1880571921174705


Epoch 10/10: 100%|██████████| 14516/14516 [01:38<00:00, 147.96it/s]


Epoch 10/10, Avg. Loss: 0.5365


Testing Epoch 10: 100%|██████████| 3629/3629 [00:12<00:00, 293.65it/s]

Epoch 10/10, RMSE on test set: 1.2052686059415303





In [7]:
train_(768,10)

Epoch 1/10: 100%|██████████| 14516/14516 [01:52<00:00, 128.78it/s]


Epoch 1/10, Avg. Loss: 1.3902


Testing Epoch 1: 100%|██████████| 3629/3629 [00:12<00:00, 286.47it/s]


Epoch 1/10, RMSE on test set: 1.1568273654179368


Epoch 2/10: 100%|██████████| 14516/14516 [01:51<00:00, 130.14it/s]


Epoch 2/10, Avg. Loss: 1.3215


Testing Epoch 2: 100%|██████████| 3629/3629 [00:12<00:00, 289.42it/s]


Epoch 2/10, RMSE on test set: 1.1517809248247968


Epoch 3/10: 100%|██████████| 14516/14516 [01:52<00:00, 129.15it/s]


Epoch 3/10, Avg. Loss: 1.2960


Testing Epoch 3: 100%|██████████| 3629/3629 [00:12<00:00, 289.62it/s]


Epoch 3/10, RMSE on test set: 1.1466217008719104


Epoch 4/10: 100%|██████████| 14516/14516 [01:52<00:00, 129.22it/s]


Epoch 4/10, Avg. Loss: 1.2554


Testing Epoch 4: 100%|██████████| 3629/3629 [00:12<00:00, 290.32it/s]


Epoch 4/10, RMSE on test set: 1.1413389010234016


Epoch 5/10: 100%|██████████| 14516/14516 [01:51<00:00, 129.62it/s]


Epoch 5/10, Avg. Loss: 1.1621


Testing Epoch 5: 100%|██████████| 3629/3629 [00:12<00:00, 287.69it/s]


Epoch 5/10, RMSE on test set: 1.1412987406942754


Epoch 6/10: 100%|██████████| 14516/14516 [01:52<00:00, 129.03it/s]


Epoch 6/10, Avg. Loss: 0.9960


Testing Epoch 6: 100%|██████████| 3629/3629 [00:12<00:00, 285.83it/s]


Epoch 6/10, RMSE on test set: 1.1559594834359292


Epoch 7/10: 100%|██████████| 14516/14516 [01:52<00:00, 129.35it/s]


Epoch 7/10, Avg. Loss: 0.8180


Testing Epoch 7: 100%|██████████| 3629/3629 [00:12<00:00, 289.67it/s]


Epoch 7/10, RMSE on test set: 1.1690909454612262


Epoch 8/10: 100%|██████████| 14516/14516 [01:52<00:00, 129.56it/s]


Epoch 8/10, Avg. Loss: 0.6704


Testing Epoch 8: 100%|██████████| 3629/3629 [00:12<00:00, 289.39it/s]


Epoch 8/10, RMSE on test set: 1.1778793705393202


Epoch 9/10: 100%|██████████| 14516/14516 [01:52<00:00, 128.54it/s]


Epoch 9/10, Avg. Loss: 0.5551


Testing Epoch 9: 100%|██████████| 3629/3629 [00:12<00:00, 289.90it/s]


Epoch 9/10, RMSE on test set: 1.2075124684618148


Epoch 10/10: 100%|██████████| 14516/14516 [01:52<00:00, 128.47it/s]


Epoch 10/10, Avg. Loss: 0.4652


Testing Epoch 10: 100%|██████████| 3629/3629 [00:12<00:00, 288.32it/s]

Epoch 10/10, RMSE on test set: 1.1997936569482581



