In [1]:
import numpy as np 
import pandas as pd
import scipy.sparse as sparse

In [2]:
train = pd.read_csv('./train.txt', names=['user', 'item', 'score'], sep='\t')
test = pd.read_csv('./test.txt', names=['user', 'item'], sep='\t')

In [3]:
train.head()

Unnamed: 0,user,item,score
0,1,1,5
1,1,2,3
2,1,3,4
3,1,4,3
4,1,5,3


In [4]:
martix_train = train.pivot_table(columns='item', index='user', values='score').fillna(0).values
martix_train

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

In [5]:
test['score'] = 10

In [6]:
watched = set()
for pair in test.append(train).values[:, :2]:
    watched.add(tuple(pair))

In [7]:
def train_split(user_item, test_size=0.1):
    X_train = np.zeros(user_item.shape)
    X_test = np.zeros(user_item.shape)
    np.random.seed(1337)
    has_score = np.argwhere(user_item > 0)
    np.random.shuffle(has_score)
    split = int(len(has_score) * test_size)
    test = has_score[:split]
    train = has_score[split:]
    for i, idx in enumerate(train):
        X_train[idx[0], idx[1]] = user_item[idx[0], idx[1]]
    for i, idx in enumerate(test):
        X_test[idx[0], idx[1]] = user_item[idx[0], idx[1]]
    return X_train, X_test

In [8]:
class IALS:
    
    def __init__(self, iterations=30, latent_features=5, alpha=25, lambda_reg=10, neg_sampling=0.85, conf_w=10):
        self.iterations = iterations
        self.latent_features = latent_features
        self.alpha = alpha
        self.lambda_reg = lambda_reg
        self.neg_sampling = neg_sampling
        self.conf_w = conf_w
        np.random.seed(1337)
        
    def fit(self, train, test=None):
        user_size, item_size = train.shape
        matrix_full = np.zeros_like(train)
        mean_score = train[(train > 0)].mean()
        for u in range(user_size):
            for i in range(item_size):
                if train[u][i]:
                    matrix_full[u][i] = train[u][i]
                elif (u, i) in watched:
                    matrix_full[u][i] = mean_score
                else:
                    matrix_full[u][i] = mean_score * self.neg_sampling

        C = np.ones(train.shape) + self.alpha * np.log(np.ones(train.shape) + train * self.conf_w)   
        C_I = C - 1
        lambda_I = self.lambda_reg * sparse.eye(self.latent_features + 1)
 
        # user u x f
        X = np.hstack([np.ones((user_size, 1)), np.random.normal(size=(user_size, self.latent_features))])
        # item i x f
        Y = np.hstack([np.ones((item_size, 1)), np.random.normal(size=(item_size, self.latent_features))])
        # biased als
        X_bias = np.array([0] * user_size)
        Y_bias = np.array([0] * item_size)
        
        n_user = (train > 0).sum(1)
        n_item = (train > 0).sum(0)
        
        MIN_LOSS = 9999999
        MIN_ITER = 9999999
        for iteration in range(self.iterations):            
            # User step
            yTy = np.dot(Y.T, Y)
            Cu = C * (matrix_full - Y_bias)
            for u in range(user_size):
                # X = ((Y.T*Y + Y.T*(C - I) * Y) + lambda*I)^-1 * (Y.T * Cu)
                inv = np.linalg.inv(yTy + np.dot(Y.T * C_I[u], Y) + lambda_I * n_user[u])
                X[u] = np.dot(np.dot(inv, Y.T), Cu[u].reshape(-1, 1)).ravel()
            X_bias = X[:, 0].copy().reshape(-1, 1)
            X[:, 0] = 1
                
            # Item step
            xTx = np.dot(X.T, X)
            Ci = C * (matrix_full - X_bias)
            for i in range(item_size):
                # Y = ((X.T*X + X.T*(C - I) * X) + lambda*I)^-1 * (X.T * Ci)
                inv = np.linalg.inv(xTx + np.dot(X.T * C_I[:, i], X) + lambda_I * n_item[i])
                Y[i] = np.dot(np.dot(inv, X.T), Ci[:, i].reshape(-1, 1)).ravel()
            Y_bias = Y[:, 0].copy().ravel()
            Y[:, 0] = 1
            
            result = np.dot(X[:, 1:], Y[:, 1:].T) + X_bias + Y_bias
            result[result > 5] = 5
            result[result < 1] = 1
            
            if test is not None:
                rmse = np.sqrt(((result * (test > 0) - test) ** 2).sum() / (test > 0).sum())
                rmset = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                if rmse < MIN_LOSS:
                    MIN_LOSS = rmse
                    MIN_ITER = iteration
                    #print("CUR MIN:", rmse, iteration, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w)
                #print("Test score:", str(iteration) + " | " + str(rmse) + " | " + str(rmset))
            else:
                rmse = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                #print("Train score:", str(iteration) + " | " + str(rmse))
                
#        return MIN_LOSS, MIN_ITER, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w
        return result

In [9]:
from sklearn.model_selection import ParameterGrid
from joblib import Parallel, delayed
import tqdm.notebook as tqdm

In [10]:
GRID = {
    'latent_features': [5, 10, 15],
    'alpha': [10, 25, 40, 100],
    'lambda_reg': [0.5, 5, 10, 15, 20],
    'neg_sampling': [0.5, 0.75, 0.85, 1.0],
    'conf_w': [1, 5, 10, 20, 50]
}

In [11]:
X_train, X_test = train_split(martix_train, test_size = 0.33)

In [35]:
param_grid = ParameterGrid(GRID)
best_score = 999999

def grid_search(dict_):
    return IALS(**dict_).fit(X_train, X_test)

GS = Parallel(n_jobs=-1)(delayed(grid_search)(dict_) for dict_ in tqdm.tqdm(param_grid))

HBox(children=(FloatProgress(value=0.0, max=1200.0), HTML(value='')))




In [37]:
sorted(GS, key=lambda x: x[0])[:20]

[(0.9139662442046143, 11, 5, 100, 15, 0.75, 1),
 (0.9144519547170014, 10, 5, 100, 15, 0.85, 1),
 (0.9147856216111326, 11, 5, 40, 15, 0.85, 10),
 (0.9149946763732804, 14, 5, 40, 10, 0.85, 5),
 (0.9155571578200764, 11, 5, 40, 20, 0.85, 20),
 (0.9158111653793563, 8, 10, 100, 20, 0.85, 1),
 (0.9158415430872886, 12, 5, 40, 15, 0.75, 10),
 (0.9158767554267505, 15, 5, 40, 10, 0.75, 5),
 (0.9158834603710929, 13, 5, 25, 10, 0.85, 20),
 (0.9159032701005957, 25, 5, 40, 15, 0.85, 20),
 (0.9159282695248893, 9, 5, 100, 20, 0.75, 1),
 (0.9160828203524338, 8, 5, 100, 20, 0.85, 1),
 (0.9163132854131656, 9, 5, 40, 15, 0.85, 5),
 (0.9163207303660508, 9, 10, 100, 20, 0.75, 1),
 (0.9165103356830069, 14, 5, 40, 20, 0.85, 50),
 (0.9165156567796929, 10, 5, 25, 10, 0.85, 10),
 (0.9167239313150672, 11, 5, 25, 15, 0.85, 50),
 (0.9167525893117328, 19, 5, 40, 15, 0.75, 20),
 (0.9167969535263346, 11, 5, 40, 20, 0.75, 20),
 (0.9169190198281769, 7, 15, 100, 20, 0.85, 1)]

In [12]:
models = [(0.9139662442046143, 11, 5, 100, 15, 0.75, 1),
 (0.9144519547170014, 10, 5, 100, 15, 0.85, 1),
 (0.9147856216111326, 11, 5, 40, 15, 0.85, 10),
 (0.9149946763732804, 14, 5, 40, 10, 0.85, 5),
 (0.9155571578200764, 11, 5, 40, 20, 0.85, 20),
 (0.9158111653793563, 8, 10, 100, 20, 0.85, 1)]

res = np.zeros_like(martix_train)

for m in models:
    res += IALS(m[1], latent_features=m[2], alpha=m[3], lambda_reg=m[4], neg_sampling=m[5], conf_w=m[6]).fit(martix_train)
res = res / len(models)

In [13]:
result = res

In [14]:
class IALS:
    
    def __init__(self, iterations=30, latent_features=5, alpha=25, lambda_reg=10, neg_sampling=0.85, conf_w=10):
        self.iterations = iterations
        self.latent_features = latent_features
        self.alpha = alpha
        self.lambda_reg = lambda_reg
        self.neg_sampling = neg_sampling
        self.conf_w = conf_w
        np.random.seed(1337)
        
    def fit(self, train, test=None):
        user_size, item_size = train.shape
        matrix_full = np.zeros_like(train)
        mean_score = train[(train > 0)].mean()
        for u in range(user_size):
            for i in range(item_size):
                if train[u][i]:
                    matrix_full[u][i] = train[u][i]
                else:
                    matrix_full[u][i] = res[u][i]
                    
        C = np.ones(train.shape) + self.alpha * np.log(np.ones(train.shape) + train * self.conf_w)
        C_I = C - 1
        lambda_I = self.lambda_reg * sparse.eye(self.latent_features + 1)
 
        # user u x f
        X = np.hstack([np.ones((user_size, 1)), np.random.normal(size=(user_size, self.latent_features))])
        # item i x f
        Y = np.hstack([np.ones((item_size, 1)), np.random.normal(size=(item_size, self.latent_features))])
        # biased als
        X_bias = np.array([0] * user_size)
        Y_bias = np.array([0] * item_size)
        
        n_user = (train > 0).sum(1)
        n_item = (train > 0).sum(0)
        
        MIN_LOSS = 9999999
        MIN_ITER = 9999
        for iteration in range(self.iterations):            
            # User step
            yTy = np.dot(Y.T, Y)
            Cu = C * (matrix_full - Y_bias)
            for u in range(user_size):
                # X = ((Y.T*Y + Y.T*(C - I) * Y) + lambda*I)^-1 * (Y.T * Cu)
                inv = np.linalg.inv(yTy + np.dot(Y.T * C_I[u], Y) + lambda_I * n_user[u])
                X[u] = np.dot(np.dot(inv, Y.T), Cu[u].reshape(-1, 1)).ravel()
            X_bias = X[:, 0].copy().reshape(-1, 1)
            X[:, 0] = 1
                
            # Item step
            xTx = np.dot(X.T, X)
            Ci = C * (matrix_full - X_bias)
            for i in range(item_size):
                # Y = ((X.T*X + X.T*(C - I) * X) + lambda*I)^-1 * (X.T * Ci)
                inv = np.linalg.inv(xTx + np.dot(X.T * C_I[:, i], X) + lambda_I * n_item[i])
                Y[i] = np.dot(np.dot(inv, X.T), Ci[:, i].reshape(-1, 1)).ravel()
            Y_bias = Y[:, 0].copy().ravel()
            Y[:, 0] = 1
            
            result = np.dot(X[:, 1:], Y[:, 1:].T) + X_bias + Y_bias
            result[result > 5] = 5
            result[result < 1] = 1
            
            if test is not None:
                rmse = np.sqrt(((result * (test > 0) - test) ** 2).sum() / (test > 0).sum())
                rmset = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                if rmse < MIN_LOSS:
                    MIN_LOSS = rmse
                    MIN_ITER = iteration
                    #print("CUR MIN:", rmse, iteration, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w)
                #print("Test score:", str(iteration) + " | " + str(rmse) + " | " + str(rmset))
            else:
                rmse = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                #print("Train score:", str(iteration) + " | " + str(rmse))
                
#        return MIN_LOSS, MIN_ITER, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w
        return result

In [15]:
X_train, X_test = train_split(martix_train, test_size = 0.33)

In [16]:
GRID = {
    'latent_features': [5, 10, 15],
    'alpha': [10, 25, 40, 100, 200, 1000],
    'lambda_reg': [5, 10, 15, 20, 50],
    'conf_w': [1, 5, 10, 20, 50]
}

In [51]:
param_grid = ParameterGrid(GRID)
best_score = 999999

def grid_search(dict_):
    return IALS(**dict_).fit(X_train, X_test)

GS = Parallel(n_jobs=-1)(delayed(grid_search)(dict_) for dict_ in tqdm.tqdm(param_grid))

HBox(children=(FloatProgress(value=0.0, max=450.0), HTML(value='')))




In [53]:
sorted(GS, key=lambda x: x[0])[:20]

[(0.8695166666651777, 6, 10, 25, 5, 0.85, 1),
 (0.8706953771223594, 8, 10, 10, 5, 0.85, 20),
 (0.8708991969948306, 16, 15, 25, 5, 0.85, 1),
 (0.8712175719863978, 8, 5, 10, 5, 0.85, 20),
 (0.8712504037270057, 7, 10, 10, 5, 0.85, 10),
 (0.8716418715089047, 10, 5, 10, 5, 0.85, 50),
 (0.8716988531174237, 5, 5, 25, 5, 0.85, 1),
 (0.8721054678992153, 8, 5, 40, 5, 0.85, 1),
 (0.8724489406357814, 13, 15, 10, 5, 0.85, 10),
 (0.8730113091855535, 7, 5, 10, 5, 0.85, 10),
 (0.8732856292401278, 18, 15, 10, 5, 0.85, 20),
 (0.8738634358006234, 10, 10, 10, 5, 0.85, 50),
 (0.8744412628713872, 9, 10, 40, 5, 0.85, 1),
 (0.8751510233697914, 15, 5, 25, 5, 0.85, 5),
 (0.8759978203050359, 7, 10, 10, 5, 0.85, 5),
 (0.8763067254854525, 7, 15, 10, 5, 0.85, 5),
 (0.8777708254734176, 7, 5, 10, 5, 0.85, 5),
 (0.8783185335303358, 29, 15, 10, 5, 0.85, 50),
 (0.8787555177458003, 29, 15, 40, 5, 0.85, 1),
 (0.8807564999242398, 21, 5, 25, 5, 0.85, 10)]

In [17]:
models2 = [
        (0.8695166666651777, 6, 10, 25, 5, 0.85, 1),
        (0.8706953771223594, 8, 10, 10, 5, 0.85, 20),
        (0.8708991969948306, 16, 15, 25, 5, 0.85, 1),
        (0.8712175719863978, 8, 5, 10, 5, 0.85, 20),
        (0.8712504037270057, 7, 10, 10, 5, 0.85, 10),
        (0.8716418715089047, 10, 5, 10, 5, 0.85, 50),
        (0.8716988531174237, 5, 5, 25, 5, 0.85, 1),
        (0.8721054678992153, 8, 5, 40, 5, 0.85, 1),
        (0.8724489406357814, 13, 15, 10, 5, 0.85, 10)]

res2 = np.zeros_like(martix_train)

for m in models2:
    res2 += IALS(m[1] + 5, latent_features=m[2], alpha=m[3], lambda_reg=m[4], neg_sampling=m[5], conf_w=m[6]).fit(martix_train)
res2 = res2 / len(models2)

In [26]:
class IALS:
    
    def __init__(self, iterations=30, latent_features=5, alpha=25, lambda_reg=10, neg_sampling=0.85, conf_w=10):
        self.iterations = iterations
        self.latent_features = latent_features
        self.alpha = alpha
        self.lambda_reg = lambda_reg
        self.neg_sampling = neg_sampling
        self.conf_w = conf_w
        np.random.seed(1337)
        
    def fit(self, train, test=None):
        user_size, item_size = train.shape
        matrix_full = np.zeros_like(train)
        mean_score = train[(train > 0)].mean()
        for u in range(user_size):
            for i in range(item_size):
                if train[u][i]:
                    matrix_full[u][i] = train[u][i]
                else:
                    matrix_full[u][i] = res2[u][i]
                    
        C = np.ones(train.shape) + self.alpha * np.log(np.ones(train.shape) + train * self.conf_w)
        C_I = C - 1
        lambda_I = self.lambda_reg * sparse.eye(self.latent_features + 1)
 
        # user u x f
        X = np.hstack([np.ones((user_size, 1)), np.random.normal(size=(user_size, self.latent_features))])
        # item i x f
        Y = np.hstack([np.ones((item_size, 1)), np.random.normal(size=(item_size, self.latent_features))])
        # biased als
        X_bias = np.array([0] * user_size)
        Y_bias = np.array([0] * item_size)
        
        n_user = (train > 0).sum(1)
        n_item = (train > 0).sum(0)
        
        MIN_LOSS = 9999999
        MIN_ITER = 9999
        for iteration in range(self.iterations):            
            # User step
            yTy = np.dot(Y.T, Y)
            Cu = C * (matrix_full - Y_bias)
            for u in range(user_size):
                # X = ((Y.T*Y + Y.T*(C - I) * Y) + lambda*I)^-1 * (Y.T * Cu)
                inv = np.linalg.inv(yTy + np.dot(Y.T * C_I[u], Y) + lambda_I * n_user[u])
                X[u] = np.dot(np.dot(inv, Y.T), Cu[u].reshape(-1, 1)).ravel()
            X_bias = X[:, 0].copy().reshape(-1, 1)
            X[:, 0] = 1
                
            # Item step
            xTx = np.dot(X.T, X)
            Ci = C * (matrix_full - X_bias)
            for i in range(item_size):
                # Y = ((X.T*X + X.T*(C - I) * X) + lambda*I)^-1 * (X.T * Ci)
                inv = np.linalg.inv(xTx + np.dot(X.T * C_I[:, i], X) + lambda_I * n_item[i])
                Y[i] = np.dot(np.dot(inv, X.T), Ci[:, i].reshape(-1, 1)).ravel()
            Y_bias = Y[:, 0].copy().ravel()
            Y[:, 0] = 1
            
            result = np.dot(X[:, 1:], Y[:, 1:].T) + X_bias + Y_bias
            result[result > 5] = 5
            result[result < 1] = 1
            
            if test is not None:
                rmse = np.sqrt(((result * (test > 0) - test) ** 2).sum() / (test > 0).sum())
                rmset = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                if rmse < MIN_LOSS:
                    MIN_LOSS = rmse
                    MIN_ITER = iteration
                    #print("CUR MIN:", rmse, iteration, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w)
                #print("Test score:", str(iteration) + " | " + str(rmse) + " | " + str(rmset))
            else:
                rmse = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                #print("Train score:", str(iteration) + " | " + str(rmse))
                
#        return MIN_LOSS, MIN_ITER, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w
        return result

In [23]:
GRID = {
    'latent_features': [5, 7, 10],
    'alpha': [10, 25, 40, 100],
    'lambda_reg': [5, 10, 15, 20, 50],
    'conf_w': [1, 5, 10, 20, 50]
}

In [24]:
param_grid = ParameterGrid(GRID)
best_score = 999999

def grid_search(dict_):
    return IALS(**dict_).fit(X_train, X_test)

GS = Parallel(n_jobs=-1)(delayed(grid_search)(dict_) for dict_ in tqdm.tqdm(param_grid))

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))




In [25]:
sorted(GS, key=lambda x: x[0])[:20]

[(0.863992257220593, 7, 10, 25, 5, 0.85, 1),
 (0.8650793700851649, 9, 10, 10, 5, 0.85, 20),
 (0.8652946978087482, 29, 7, 10, 5, 0.85, 20),
 (0.8656731237902267, 16, 10, 10, 5, 0.85, 10),
 (0.8656839408271412, 29, 7, 25, 5, 0.85, 1),
 (0.8664717652537909, 29, 7, 10, 5, 0.85, 50),
 (0.8667253079476568, 9, 7, 10, 5, 0.85, 10),
 (0.8671572005137305, 29, 7, 40, 5, 0.85, 1),
 (0.8676416327991164, 8, 5, 10, 5, 0.85, 20),
 (0.8678441217620121, 5, 5, 25, 5, 0.85, 1),
 (0.8681383368549278, 10, 5, 10, 5, 0.85, 50),
 (0.8683852042823921, 8, 5, 40, 5, 0.85, 1),
 (0.8683884848023613, 10, 10, 10, 5, 0.85, 50),
 (0.8687648471973352, 9, 10, 40, 5, 0.85, 1),
 (0.8693763033094114, 7, 5, 10, 5, 0.85, 10),
 (0.8708568618900308, 13, 10, 10, 5, 0.85, 5),
 (0.8716835085320583, 15, 5, 25, 5, 0.85, 5),
 (0.8720158658786376, 8, 7, 10, 5, 0.85, 5),
 (0.8723087823646155, 29, 7, 25, 5, 0.85, 5),
 (0.8740352848385096, 7, 5, 10, 5, 0.85, 5)]

In [29]:
models3 = [(0.863992257220593, 7, 10, 25, 5, 0.85, 1),
 (0.8650793700851649, 9, 10, 10, 5, 0.85, 20),
 (0.8652946978087482, 29, 7, 10, 5, 0.85, 20),
 (0.8656731237902267, 16, 10, 10, 5, 0.85, 10),
 (0.8656839408271412, 29, 7, 25, 5, 0.85, 1)]

res3 = np.zeros_like(martix_train)

for m in models3:
    res3 += IALS(m[1] + 5, latent_features=m[2], alpha=m[3], lambda_reg=m[4], neg_sampling=m[5], conf_w=m[6]).fit(martix_train)
res3 = res3 / len(models3)

In [37]:
class IALS:
    
    def __init__(self, iterations=30, latent_features=5, alpha=25, lambda_reg=10, neg_sampling=0.85, conf_w=10):
        self.iterations = iterations
        self.latent_features = latent_features
        self.alpha = alpha
        self.lambda_reg = lambda_reg
        self.neg_sampling = neg_sampling
        self.conf_w = conf_w
        np.random.seed(1337)
        
    def fit(self, train, test=None):
        user_size, item_size = train.shape
        matrix_full = np.zeros_like(train)
        mean_score = train[(train > 0)].mean()
        for u in range(user_size):
            for i in range(item_size):
                if train[u][i]:
                    matrix_full[u][i] = train[u][i]
                else:
                    matrix_full[u][i] = res3[u][i]
                    
        C = np.ones(train.shape) + self.alpha * np.log(np.ones(train.shape) + train * self.conf_w)
        C_I = C - 1
        lambda_I = self.lambda_reg * sparse.eye(self.latent_features + 1)
 
        # user u x f
        X = np.hstack([np.ones((user_size, 1)), np.random.normal(size=(user_size, self.latent_features))])
        # item i x f
        Y = np.hstack([np.ones((item_size, 1)), np.random.normal(size=(item_size, self.latent_features))])
        # biased als
        X_bias = np.array([0] * user_size)
        Y_bias = np.array([0] * item_size)
        
        n_user = (train > 0).sum(1)
        n_item = (train > 0).sum(0)
        
        MIN_LOSS = 9999999
        MIN_ITER = 9999
        for iteration in range(self.iterations):            
            # User step
            yTy = np.dot(Y.T, Y)
            Cu = C * (matrix_full - Y_bias)
            for u in range(user_size):
                # X = ((Y.T*Y + Y.T*(C - I) * Y) + lambda*I)^-1 * (Y.T * Cu)
                inv = np.linalg.inv(yTy + np.dot(Y.T * C_I[u], Y) + lambda_I * n_user[u])
                X[u] = np.dot(np.dot(inv, Y.T), Cu[u].reshape(-1, 1)).ravel()
            X_bias = X[:, 0].copy().reshape(-1, 1)
            X[:, 0] = 1
                
            # Item step
            xTx = np.dot(X.T, X)
            Ci = C * (matrix_full - X_bias)
            for i in range(item_size):
                # Y = ((X.T*X + X.T*(C - I) * X) + lambda*I)^-1 * (X.T * Ci)
                inv = np.linalg.inv(xTx + np.dot(X.T * C_I[:, i], X) + lambda_I * n_item[i])
                Y[i] = np.dot(np.dot(inv, X.T), Ci[:, i].reshape(-1, 1)).ravel()
            Y_bias = Y[:, 0].copy().ravel()
            Y[:, 0] = 1
            
            result = np.dot(X[:, 1:], Y[:, 1:].T) + X_bias + Y_bias
            result[result > 5] = 5
            result[result < 1] = 1
            
            if test is not None:
                rmse = np.sqrt(((result * (test > 0) - test) ** 2).sum() / (test > 0).sum())
                rmset = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                if rmse < MIN_LOSS:
                    MIN_LOSS = rmse
                    MIN_ITER = iteration
                    #print("CUR MIN:", rmse, iteration, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w)
                #print("Test score:", str(iteration) + " | " + str(rmse) + " | " + str(rmset))
            else:
                rmse = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                #print("Train score:", str(iteration) + " | " + str(rmse))
                
#        return MIN_LOSS, MIN_ITER, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w
        return result

In [34]:
param_grid = ParameterGrid(GRID)
best_score = 999999

def grid_search(dict_):
    return IALS(**dict_).fit(X_train, X_test)

GS = Parallel(n_jobs=-1)(delayed(grid_search)(dict_) for dict_ in tqdm.tqdm(param_grid))

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))




In [35]:
sorted(GS, key=lambda x: x[0])[:20]

[(0.8590039812704885, 21, 10, 25, 5, 0.85, 1),
 (0.8602428603305463, 29, 10, 10, 5, 0.85, 20),
 (0.8610594900181323, 21, 10, 10, 5, 0.85, 10),
 (0.8617264440522253, 11, 7, 10, 5, 0.85, 20),
 (0.8619964624280274, 8, 7, 25, 5, 0.85, 1),
 (0.862630239303434, 29, 7, 10, 5, 0.85, 50),
 (0.8630380802883397, 29, 7, 40, 5, 0.85, 1),
 (0.8632158965195386, 9, 7, 10, 5, 0.85, 10),
 (0.8635185414064538, 27, 10, 10, 5, 0.85, 50),
 (0.8635434876215294, 29, 10, 40, 5, 0.85, 1),
 (0.8663287241736828, 5, 5, 25, 5, 0.85, 1),
 (0.866476531144437, 8, 5, 10, 5, 0.85, 20),
 (0.866905435785169, 8, 5, 40, 5, 0.85, 1),
 (0.8669341950873642, 14, 10, 10, 5, 0.85, 5),
 (0.8670307396590826, 10, 5, 10, 5, 0.85, 50),
 (0.8682358431524588, 7, 5, 10, 5, 0.85, 10),
 (0.8684422086675153, 29, 7, 25, 5, 0.85, 5),
 (0.8687711618818741, 8, 7, 10, 5, 0.85, 5),
 (0.8705952635733107, 14, 5, 25, 5, 0.85, 5),
 (0.8729954751782127, 7, 5, 10, 5, 0.85, 5)]

In [38]:
models4 = [(0.8590039812704885, 21, 10, 25, 5, 0.85, 1),
 (0.8602428603305463, 29, 10, 10, 5, 0.85, 20),
 (0.8610594900181323, 21, 10, 10, 5, 0.85, 10),
 (0.8617264440522253, 11, 7, 10, 5, 0.85, 20),
 (0.8619964624280274, 8, 7, 25, 5, 0.85, 1)]

res4 = np.zeros_like(martix_train)

for m in models4:
    res4 += IALS(m[1] + 5, latent_features=m[2], alpha=m[3], lambda_reg=m[4], neg_sampling=m[5], conf_w=m[6]).fit(martix_train)
res4 = res4 / len(models4)

In [45]:
class IALS:
    
    def __init__(self, iterations=30, latent_features=5, alpha=25, lambda_reg=10, neg_sampling=0.85, conf_w=10):
        self.iterations = iterations
        self.latent_features = latent_features
        self.alpha = alpha
        self.lambda_reg = lambda_reg
        self.neg_sampling = neg_sampling
        self.conf_w = conf_w
        np.random.seed(1337)
        
    def fit(self, train, test=None):
        user_size, item_size = train.shape
        matrix_full = np.zeros_like(train)
        mean_score = train[(train > 0)].mean()
        for u in range(user_size):
            for i in range(item_size):
                if train[u][i]:
                    matrix_full[u][i] = train[u][i]
                else:
                    matrix_full[u][i] = res4[u][i]
                    
        C = np.ones(train.shape) + self.alpha * np.log(np.ones(train.shape) + train * self.conf_w)
        C_I = C - 1
        lambda_I = self.lambda_reg * sparse.eye(self.latent_features + 1)
 
        # user u x f
        X = np.hstack([np.ones((user_size, 1)), np.random.normal(size=(user_size, self.latent_features))])
        # item i x f
        Y = np.hstack([np.ones((item_size, 1)), np.random.normal(size=(item_size, self.latent_features))])
        # biased als
        X_bias = np.array([0] * user_size)
        Y_bias = np.array([0] * item_size)
        
        n_user = (train > 0).sum(1)
        n_item = (train > 0).sum(0)
        
        MIN_LOSS = 9999999
        MIN_ITER = 9999
        for iteration in range(self.iterations):            
            # User step
            yTy = np.dot(Y.T, Y)
            Cu = C * (matrix_full - Y_bias)
            for u in range(user_size):
                # X = ((Y.T*Y + Y.T*(C - I) * Y) + lambda*I)^-1 * (Y.T * Cu)
                inv = np.linalg.inv(yTy + np.dot(Y.T * C_I[u], Y) + lambda_I * n_user[u])
                X[u] = np.dot(np.dot(inv, Y.T), Cu[u].reshape(-1, 1)).ravel()
            X_bias = X[:, 0].copy().reshape(-1, 1)
            X[:, 0] = 1
                
            # Item step
            xTx = np.dot(X.T, X)
            Ci = C * (matrix_full - X_bias)
            for i in range(item_size):
                # Y = ((X.T*X + X.T*(C - I) * X) + lambda*I)^-1 * (X.T * Ci)
                inv = np.linalg.inv(xTx + np.dot(X.T * C_I[:, i], X) + lambda_I * n_item[i])
                Y[i] = np.dot(np.dot(inv, X.T), Ci[:, i].reshape(-1, 1)).ravel()
            Y_bias = Y[:, 0].copy().ravel()
            Y[:, 0] = 1
            
            result = np.dot(X[:, 1:], Y[:, 1:].T) + X_bias + Y_bias
            result[result > 5] = 5
            result[result < 1] = 1
            
            if test is not None:
                rmse = np.sqrt(((result * (test > 0) - test) ** 2).sum() / (test > 0).sum())
                rmset = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                if rmse < MIN_LOSS:
                    MIN_LOSS = rmse
                    MIN_ITER = iteration
                    #print("CUR MIN:", rmse, iteration, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w)
                #print("Test score:", str(iteration) + " | " + str(rmse) + " | " + str(rmset))
            else:
                rmse = np.sqrt(((result * (train > 0) - train) ** 2).sum() / (train > 0).sum())
                #print("Train score:", str(iteration) + " | " + str(rmse))
                
#        return MIN_LOSS, MIN_ITER, self.latent_features, self.alpha, self.lambda_reg, self.neg_sampling, self.conf_w
        return result

In [42]:
param_grid = ParameterGrid(GRID)
best_score = 999999

def grid_search(dict_):
    return IALS(**dict_).fit(X_train, X_test)

GS = Parallel(n_jobs=-1)(delayed(grid_search)(dict_) for dict_ in tqdm.tqdm(param_grid))

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))




In [44]:
sorted(GS, key=lambda x: x[0])[:20]

[(0.8567797690357618, 24, 10, 25, 5, 0.85, 1),
 (0.8580482210831184, 18, 10, 10, 5, 0.85, 20),
 (0.8592542326509288, 29, 10, 10, 5, 0.85, 10),
 (0.8595163283424511, 10, 7, 10, 5, 0.85, 20),
 (0.8602064592213389, 8, 7, 25, 5, 0.85, 1),
 (0.8603136032224314, 29, 10, 40, 5, 0.85, 1),
 (0.8605204157779973, 29, 10, 10, 5, 0.85, 50),
 (0.8608077734072278, 18, 7, 10, 5, 0.85, 50),
 (0.8610604244554132, 29, 7, 40, 5, 0.85, 1),
 (0.8613843223210341, 9, 7, 10, 5, 0.85, 10),
 (0.8654291675826731, 20, 10, 10, 5, 0.85, 5),
 (0.8656378977244835, 5, 5, 25, 5, 0.85, 1),
 (0.8658982410690396, 8, 5, 10, 5, 0.85, 20),
 (0.8660947310218461, 29, 7, 25, 5, 0.85, 5),
 (0.8662835832704237, 8, 5, 40, 5, 0.85, 1),
 (0.8665038052266019, 11, 5, 10, 5, 0.85, 50),
 (0.867206310540381, 8, 7, 10, 5, 0.85, 5),
 (0.8676293747653046, 7, 5, 10, 5, 0.85, 10),
 (0.870116421243002, 15, 5, 25, 5, 0.85, 5),
 (0.871451777046329, 29, 10, 25, 5, 0.85, 5)]

In [50]:
models5 = [(0.8567797690357618, 24, 10, 25, 5, 0.85, 1),
         (0.8595163283424511, 10, 7, 10, 5, 0.85, 20),
         (0.8602064592213389, 8, 7, 25, 5, 0.85, 1)]

res5 = np.zeros_like(martix_train)

for m in models5:
    res5 += IALS(m[1] + 5, latent_features=m[2], alpha=m[3], lambda_reg=m[4], neg_sampling=m[5], conf_w=m[6]).fit(martix_train)
res5 = res5 / len(models5)

In [51]:
y_test_pred = []
for i, idx in enumerate(test.values):
    y_test_pred.append(res5[idx[0]-1][idx[1]-1])
pred = pd.DataFrame({'Id': range(1, len(y_test_pred)+1), 'Score': y_test_pred})
pred.to_csv('submission.txt', index=False)