In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from loader import Loader
from AutoRec import *
from datetime import datetime
from tensorboardX import SummaryWriter

In [2]:
# load data
df = pd.read_csv('../../../data/buy_count.csv')
train, test = train_test_split(df, test_size=0.2, random_state=777)

train_x = train.to_numpy().astype(np.int32)
train_y = train['buy_count'].to_numpy().astype(np.float32)

test_x = test.to_numpy().astype(np.int32)
test_y = test['buy_count'].to_numpy().astype(np.float32)

n_user = int(len(df["user_id"].unique()))
n_item = 34
print('n_user======')
print(n_user)
print('n_item======')
print(n_item)
print("購買記錄_筆數")
print(len(df))

3916
34
購買記錄_筆數
4438


In [3]:
batchsize = 256
log_interval = 10
default_vlaue = 1  # for test users and items without training observations
# data loader
train_loader = Loader(train_x, train_y, n_user, n_item,
            batchsize=batchsize, do_shuffle=False)
test_loader = Loader(test_x, test_y, n_user, n_item,
            batchsize=batchsize, do_shuffle=False)


def train_loop(n_epochs, optimizer, model, writer):
    iter = 0
    for epoch in range(n_epochs):
        epoch_train_error = 0
        for i, (batch_x, batch_mask_x) in enumerate(train_loader):
            iter += 1
            batch_x = torch.from_numpy(batch_x.astype(np.float32))
            batch_mask_x = torch.from_numpy(batch_mask_x.astype(np.float32))
            pred = model(batch_x)
            loss, rmse = model.loss(
                decoder=pred, input=batch_x, optimizer=optimizer, mask_input=batch_mask_x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_train_error += rmse
            if iter % log_interval == 0:
                fmt = "Epoch[{}] Iteration[{}/{}] RMSE: {:.2f}"
                msg = fmt.format(epoch + 1, iter, len(
                    train_loader), rmse)
                print(msg)
        epoch_train_error = epoch_train_error / len(train_loader)
        print("Epoch[{}] training RMSE: {:.2f} ".format(
            epoch+1, epoch_train_error))
        writer.add_scalar("training/RMSE", epoch_train_error, epoch)
        # validation
        for i, (batch_x, batch_mask_x) in enumerate(test_loader):
            batch_x = torch.from_numpy(batch_x.astype(np.float32))
            batch_mask_x = torch.from_numpy(batch_mask_x.astype(np.float32))
            pred = model(batch_x)

            untrained_user_list = list(test_x[(
                np.isin(test_x[:, 0], train_x[:, 0]) == False), 0])
            untrained_item_list = list(test_x[(
                np.isin(test_x[:, 1], train_x[:, 1]) == False), 1])
            for u_user in untrained_user_list:
                for u_item in untrained_item_list:
                    pred[u_user, u_item] = 1
            vali_rmse = torch.sqrt((((pred - batch_x) * batch_mask_x).pow(2).sum()
                                    ) / (batch_mask_x == 1).sum())
            print("Epoch[{}] Validation RMSE: {:.2f} ".format(
                epoch+1, vali_rmse))
            writer.add_scalar("validation/RMSE", vali_rmse, epoch)


In [4]:
lr_list = [1e-2, 1e-4]
k_list = [4,6]
n_epochs_list =  [300, 500, 600]

batchsize = 256

for lr in lr_list:
    for k in k_list:
        for epochs in n_epochs_list:
            model = AutoRec(n_user, n_item, k=k)
            log_dir = '../../runs/AutoRec_' + 'k=' + str(k) + ',lr='+str(lr)+',epochs='+str(epochs) +','+ str(datetime.now()).replace(' ', '_')
            writer = SummaryWriter(log_dir=log_dir)
            optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
            train_loop(n_epochs=epochs, optimizer=optimizer, model=model, writer=writer)
            # torch.save(model.stae_dict(), '../../../models/AutoRec.pth')



# train_loop(epochs=n_epochs)
# torch.save(model.state_dict(), '../../models/AutoRec.pth')

Epoch[1] Iteration[10/15] RMSE: 1.53
Epoch[1] training RMSE: 1.68 
Epoch[1] Validation RMSE: 1.53 
Epoch[2] Iteration[20/15] RMSE: 1.39
Epoch[2] Iteration[30/15] RMSE: 1.39
Epoch[2] training RMSE: 1.40 
Epoch[2] Validation RMSE: 1.30 
Epoch[3] Iteration[40/15] RMSE: 1.12
Epoch[3] training RMSE: 1.24 
Epoch[3] Validation RMSE: 1.18 
Epoch[4] Iteration[50/15] RMSE: 1.11
Epoch[4] Iteration[60/15] RMSE: 1.20
Epoch[4] training RMSE: 1.17 
Epoch[4] Validation RMSE: 1.10 
Epoch[5] Iteration[70/15] RMSE: 1.03
Epoch[5] training RMSE: 1.12 
Epoch[5] Validation RMSE: 1.06 
Epoch[6] Iteration[80/15] RMSE: 1.02
Epoch[6] Iteration[90/15] RMSE: 1.15
Epoch[6] training RMSE: 1.10 
Epoch[6] Validation RMSE: 1.04 
Epoch[7] Iteration[100/15] RMSE: 1.00
Epoch[7] training RMSE: 1.08 
Epoch[7] Validation RMSE: 1.02 
Epoch[8] Iteration[110/15] RMSE: 0.99
Epoch[8] Iteration[120/15] RMSE: 1.12
Epoch[8] training RMSE: 1.06 
Epoch[8] Validation RMSE: 1.01 
Epoch[9] Iteration[130/15] RMSE: 0.97
Epoch[9] training R