In [10]:
import numpy as np
import pandas as pd
import torch

from loader import Loader
from datetime import datetime

from NMF import *

from tensorboardX import SummaryWriter
from sklearn.model_selection import train_test_split

In [11]:
# load data
df = pd.read_csv('../../data/buy_count.csv')
train, test = train_test_split(df, test_size=0.2, random_state=777)

train_x = train.to_numpy().astype(np.int32)
train_y = train['buy_count'].to_numpy().astype(np.float32)

test_x = test.to_numpy().astype(np.int32)
test_y = test['buy_count'].to_numpy().astype(np.float32)

n_user = int(len(df["user_id"].unique()))
n_item = 34
print('n_user======')
print(n_user)
print('n_item======')
print(n_item)
print("購買記錄_筆數")
print(len(df))

3916
34
購買記錄_筆數
4438


In [12]:
# Hyer-parameters
lr = 1e-4
k = 5
batchsize = 256
max_epochs = 100
log_interval = 10

In [13]:
def update_parm(P, Q, train_x, k, lr):
    Q = Q.T
    for i in range(len(train_x)):
        current_user_index = torch.IntTensor(train_x[i, 0])
        current_item_index = torch.IntTensor(train_x[i, 1])
        current_tensor_P = P[current_user_index, :]
        current_tensor_Q = Q[:, current_item_index]
        eij = train_x[i, 2] - torch.sum(current_tensor_P * current_tensor_Q)
        for v_k in range(k):
            with torch.no_grad():
                P[current_user_index][v_k] = P[current_user_index][v_k] + \
                    2 * lr * eij * Q[v_k][current_item_index]
                Q[v_k][current_item_index] = Q[v_k][current_item_index] + \
                    2 * lr * eij * P[current_user_index][v_k]


In [14]:
# 定義模型
log_dir = '../runs/NMF_' + 'k=' + \
    str(k) + ',lr='+str(lr)+'batchsize='+str(batchsize) + \
    str(datetime.now()).replace(' ', '_')
writer = SummaryWriter(log_dir=log_dir)

model = NMF(n_user, n_item, k=k, writer=writer)

train_loader = Loader(
    train_x, train_y, batchsize=batchsize, do_shuffle=False)
test_loader = Loader(test_x, test_y, batchsize=batchsize, do_shuffle=False)


In [15]:
def training_loop(n_epochs, learning_rate, data_loader):
    iter = 0
    for epoch in range(n_epochs):
        for i, (train_x, train_y) in enumerate(data_loader):
            iter += 1
            prediction = model(train_x)
            loss = model.loss(
                prediction, train_x[:, 2])
            update_parm(model.P, model.Q, train_x=train_x, k=k, lr=lr)
            if iter % log_interval == 0:
                fmt = "Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
                msg = fmt.format(epoch, iter, len(
                    train_loader), loss)
                print(msg)
        # 驗證
        validation_loss = 0
        for i, (test_x, test_y) in enumerate(test_loader):
            ls = []
            for j in range(len(test_x)):
                current_user_index = torch.IntTensor(test_x[j, 0])
                current_item_index = torch.IntTensor(test_x[j, 1])
                current_tensor_P = model.P[current_user_index, :]
                current_tensor_Q = model.Q.T[:, current_item_index]
                pred = torch.sum(current_tensor_P * current_tensor_Q).item()
                ls.append(pred)
            result = torch.FloatTensor(ls)
            validation_loss += model.loss(result, test_y)
        mse = validation_loss/len(test_loader)
        print("Epoch[{}] Validation MSE: {:.2f} ".format(
            epoch+1, mse))
        writer.add_scalar("validation/avg_loss", mse, epoch)


training_loop(max_epochs, lr, train_loader)
# Save the model to a separate folder
torch.save(model.state_dict(), '../models/nmf.pth')

Epoch[0] Iteration[10/13] Loss: 1.33
Epoch[1] Validation MSE: 1.22 
Epoch[1] Iteration[20/13] Loss: 2.32
Epoch[2] Validation MSE: 1.21 
Epoch[2] Iteration[30/13] Loss: 2.11
Epoch[3] Validation MSE: 1.21 
Epoch[3] Iteration[40/13] Loss: 0.99
Epoch[3] Iteration[50/13] Loss: 1.24
Epoch[4] Validation MSE: 1.20 
Epoch[4] Iteration[60/13] Loss: 1.39
Epoch[5] Validation MSE: 1.20 
Epoch[5] Iteration[70/13] Loss: 1.93
Epoch[6] Validation MSE: 1.20 
Epoch[6] Iteration[80/13] Loss: 2.26
Epoch[6] Iteration[90/13] Loss: 1.48
Epoch[7] Validation MSE: 1.19 
Epoch[7] Iteration[100/13] Loss: 1.22
Epoch[8] Validation MSE: 1.19 
Epoch[8] Iteration[110/13] Loss: 2.10
Epoch[9] Validation MSE: 1.19 
Epoch[9] Iteration[120/13] Loss: 2.14
Epoch[9] Iteration[130/13] Loss: 1.33
Epoch[10] Validation MSE: 1.19 
Epoch[10] Iteration[140/13] Loss: 1.27
Epoch[11] Validation MSE: 1.19 
Epoch[11] Iteration[150/13] Loss: 2.23
Epoch[12] Validation MSE: 1.19 
Epoch[12] Iteration[160/13] Loss: 2.01
Epoch[13] Validation MS