In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from loader import Loader
from AutoRec import *
from datetime import datetime
from tensorboardX import SummaryWriter

In [17]:
# load data
df = pd.read_csv('../../../data/buy_count.csv')
train, test = train_test_split(df, test_size=0.1, random_state=777)

train_x = train.to_numpy().astype(np.int32)
train_y = train['buy_count'].to_numpy().astype(np.float32)

test_x = test.to_numpy().astype(np.int32)
test_y = test['buy_count'].to_numpy().astype(np.float32)

n_user = int(len(df["user_id"].unique()))
n_item = 34
print('n_user======')
print(n_user)
print('n_item======')
print(n_item)
print("購買記錄_筆數")
print(len(df))

3916
34
購買記錄_筆數
4438


In [19]:
# Hyer-parameters
lr = 1e-2  # Learning Rate
k = 4
batchsize = 256
n_epochs = 500
log_interval = 2
default_vlaue = 1  # for test users and items without training observations
log_dir = 'runs/AutoRec_' + 'k=' + str(k) + ',lr='+str(lr)+'batchsize='+str(batchsize) + \
    str(datetime.now()).replace(' ', '_')
writer = SummaryWriter(log_dir=log_dir)

In [20]:
# data loader
train_loader = Loader(train_x, train_y, n_user, n_item,
                      batchsize=batchsize, do_shuffle=False)
test_loader = Loader(test_x, test_y, n_user, n_item,
                     batchsize=batchsize, do_shuffle=False)
# model
model = AutoRec(n_user, n_item, k=k)

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

In [None]:
def train_loop(epochs):
    iter = 0
    for epoch in range(n_epochs):
        epoch_train_error = 0
        for i, (batch_x, batch_mask_x) in enumerate(train_loader):
            iter += 1
            batch_x = torch.from_numpy(batch_x.astype(np.float32))
            batch_mask_x = torch.from_numpy(batch_mask_x.astype(np.float32))
            pred = model(batch_x)
            loss, rmse = model.loss(
                decoder=pred, input=batch_x, optimizer=optimizer, mask_input=batch_mask_x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_train_error += rmse
            if iter % log_interval == 0:
                fmt = "Epoch[{}] Iteration[{}/{}] RMSE: {:.2f}"
                msg = fmt.format(epoch + 1, iter, len(
                    train_loader), rmse)
                print(msg)
        epoch_train_error = epoch_train_error / len(train_loader)
        print("Epoch[{}] training RMSE: {:.2f} ".format(
            epoch+1, epoch_train_error))
        writer.add_scalar("training/RMSE", epoch_train_error, epoch)
        # validation
        for i, (batch_x, batch_mask_x) in enumerate(test_loader):
            batch_x = torch.from_numpy(batch_x.astype(np.float32))
            batch_mask_x = torch.from_numpy(batch_mask_x.astype(np.float32))
            pred = model(batch_x)

            untrained_user_list = list(test_x[(
                np.isin(test_x[:, 0], train_x[:, 0]) == False), 0])
            untrained_item_list = list(test_x[(
                np.isin(test_x[:, 1], train_x[:, 1]) == False), 1])
            for u_user in untrained_user_list:
                for u_item in untrained_item_list:
                    pred[u_user, u_item] = 1
            vali_rmse = torch.sqrt((((pred - batch_x) * batch_mask_x).pow(2).sum()
                                    ) / (batch_mask_x == 1).sum())
            print("Epoch[{}] Validation RMSE: {:.2f} ".format(
                epoch+1, vali_rmse))
            writer.add_scalar("validation/RMSE", vali_rmse, epoch)

train_loop(epochs=n_epochs)
torch.save(model.state_dict(), '../../models/AutoRec.pth')