In [None]:
!pip install n2
!pip install buffalo
!pip install --upgrade tables
from buffalo.algo.als import ALS
from buffalo.misc import aux, log
from buffalo.algo.options import ALSOption
import buffalo.data
from buffalo.data.mm import MatrixMarketOptions

import numpy as np
import pandas as pd
import helper as hp
from scipy.io import mmwrite
from scipy.io import mmread
from scipy.sparse import csr_matrix
import json

In [None]:
data_path = '/content/drive/My Drive/coc_contest/users.json'

users = pd.read_json(data_path, typ='frame')

In [None]:
ratings = hp.get_userbook_map(users)

In [None]:
user_items, uid_to_idx, idx_to_uid, mid_to_idx, idx_to_mid = hp.df_to_matrix(ratings, 'user_id', 'book_id')

In [None]:
user_items

In [None]:
mmwrite('./main.mtx', user_items)

In [None]:
iid = list(idx_to_mid.values())
uid = list(idx_to_uid.values())

In [None]:
with open("./uid", "w") as f:
    for val in uid:
        print(val, file=f)

with open("./iid", "w") as f:
    for val in iid:
        print(val, file=f)

In [None]:
opt = ALSOption().get_default_option() 
opt.evaluation_on_learning =  True
opt.validation = aux.Option({'topk': 10})
opt.num_workers = 4
opt.reg_u = 0.192
opt.reg_i = 0.56638
opt.alpha = 3
opt.d = 11
opt.save_best = True
opt.model_path = './buffalo_mf.model'
opt.evaluation_period = 10

In [None]:
data_opt = MatrixMarketOptions().get_default_option()
data_opt.input.main = './main.mtx'
data_opt.input.iid = './iid'
data_opt.input.uid = './uid'
data_opt.data.validation.p = 0.1
data_opt.data.validation.max_samples = 10000

In [None]:
data = buffalo.data.load(data_opt)
data.create()

In [None]:
#model = ALS(opt, data=data)
model = ALS(opt, data_opt=data_opt)
model.initialize()
val_res = model.train()
val_res

In [None]:
model.opt.optimize = aux.Option({
        'loss': 'val_ndcg',
        'max_trials': 100,
        'deployment': True,
        'start_with_default_parameters': True,
        'space': {
            'd': ['randint', ['d', 10, 128]],
            'reg_u': ['uniform', ['reg_u', 0.1, 1.0]],
            'reg_i': ['uniform', ['reg_i', 0.1, 1.0]],
            'alpha': ['randint', ['alpha', 1, 10]],
        }
    })

In [None]:
log.set_log_level(log.INFO)
model.opt.model_path = './als.optimize.bin'
print(json.dumps({'alpha': model.opt.alpha, 'd': model.opt.d, 'reg_u': model.opt.reg_u, 'reg_i': model.opt.reg_i}, indent=2))
model.optimize()
optimization_res = model.get_optimization_data()
best_parameters = optimization_res['best_parameters']

print(json.dumps(optimization_res['best'], indent=2))
print(json.dumps({'alpha': best_parameters['alpha'], 'd': best_parameters['d'], 'reg_u': best_parameters['reg_u'], 'reg_i': best_parameters['reg_i']}, indent=2))