In [1]:
import numpy as np
import pandas as pd
import random
from loguru import logger

from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, r2_score

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split


from math import ceil

import os
import copy
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
logger.add("/tmp/test-movielens.log")

1

In [3]:
# # load train data
# train_df = pd.read_csv('../../../../tmp/dota_train_binary_heroes.csv.zip', index_col='match_id_hash')
# test_df = pd.read_csv('../../../../tmp/dota_train_binary_heroes.csv.zip', index_col='match_id_hash')
# target = pd.read_csv('../../../../tmp/train_targets.csv.zip', index_col='match_id_hash')
# y = target['radiant_win'].values.astype(np.float32)
# y = y.reshape(-1, 1)

In [3]:
df_ratings = pd.read_csv("../../datasets/movielens-small/ratings.csv")
ratings_onehot = pd.get_dummies(df_ratings[["userId", "movieId"]].astype("category"))
ratings_onehot.shape

(100836, 10334)

In [4]:
train_df = ratings_onehot.sample(frac=0.8)

In [5]:
test_df = ratings_onehot[~ratings_onehot.index.isin(train_df.index)]

In [9]:
df_ratings["rating_bin"] = (df_ratings.rating <= 3).astype(float)

In [10]:
y = df_ratings[df_ratings.index.isin(train_df.index)]["rating_bin"].values.astype(np.float32).reshape(-1, 1)

In [13]:
X_train = train_df.values.astype(np.float32)
X_test = test_df.values.astype(np.float32)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

models = []
scores = []
train_preds = np.zeros(y.shape)
test_preds = np.zeros((X_test.shape[0], 1))

In [14]:
batch_size = 1024
epochs = 300
criterion = nn.BCEWithLogitsLoss()
# criterion = nn.MSELoss()
opt_params = {'lr': 0.01, 'momentum': 0.9}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X = X_train
folds = KFold(n_splits=5, random_state=17)



In [15]:
X_tensor, X_test, y_tensor = torch.from_numpy(X).to(device), torch.from_numpy(X_test).to(device), torch.from_numpy(y).to(device)

In [18]:
for n_fold, (train_ind, valid_ind) in enumerate(folds.split(X, y)):

    logger.debug(f'fold {n_fold+1}')

    train_set = TensorDataset(X_tensor[train_ind], y_tensor[train_ind])
    valid_set = TensorDataset(X_tensor[valid_ind], y_tensor[valid_ind])

    loaders = {'train': DataLoader(train_set, batch_size=batch_size, shuffle=True),
               'valid': DataLoader(valid_set, batch_size=batch_size, shuffle=False)}

    model = TorchFM(n=X_train.shape[1], k=5)
    model.to(device)
    best_model_wts = copy.deepcopy(model.state_dict())

    optimizer = optim.SGD(model.parameters(), **opt_params)

    best_score = 0.
    for epoch in range(epochs):
        losses = {'train': 0., 'valid': 0}

        for phase in ['train', 'valid']:

            if phase == 'train':
                model.train()
            else:
                model.eval()

            for batch_x, batch_y in loaders[phase]:
                optimizer.zero_grad()
                out = model(batch_x)
                loss = criterion(out, batch_y)
                losses[phase] += loss.item()*batch_x.size(0)

                with torch.set_grad_enabled(phase == 'train'):
                    if phase == 'train':
                        loss.backward()
#                             scheduler.step()
                        optimizer.step()

            losses[phase] /= len(loaders[phase].dataset)

        # after each epoch check if we improved roc auc and if yes - save model
        with torch.no_grad():
            model.eval()
            valid_preds = (model(X_tensor[valid_ind]).cpu().numpy())
            epoch_score = roc_auc_score(y[valid_ind], valid_preds)
            if epoch_score > best_score:
                best_model_wts = copy.deepcopy(model.state_dict())
                best_score = epoch_score

        if ((epoch+1) % 30) == 0:
            logger.debug(f'epoch {epoch+1} train loss: {losses["train"]:.3f} valid loss {losses["valid"]:.3f} valid roc auc {epoch_score:.3f}')

    # prediction on valid set
    with torch.no_grad():
        model.load_state_dict(best_model_wts)
        model.eval()

        train_preds[valid_ind] = (model(X_tensor[valid_ind]).cpu().numpy())
        fold_score = roc_auc_score(y[valid_ind], train_preds[valid_ind])
        scores.append(fold_score)
        logger.debug(f'Best ROC AUC score {fold_score}')
        models.append(model)

        test_preds += (model(X_test).cpu().numpy())

logger.debug('CV AUC ROC', np.mean(scores), np.std(scores))

test_preds /= folds.n_splits

2020-03-16 20:15:35.211 | DEBUG    | __main__:<module>:3 - fold 1
2020-03-16 20:19:32.234 | DEBUG    | __main__:<module>:52 - epoch 30 train loss: 0.709 valid loss 0.712 valid roc auc 0.503
2020-03-16 20:23:26.199 | DEBUG    | __main__:<module>:52 - epoch 60 train loss: 0.700 valid loss 0.702 valid roc auc 0.503


KeyboardInterrupt: 

In [35]:
from fastFM.datasets import make_user_item_regression
from sklearn.model_selection import train_test_split

# This sets up a small test dataset.
X, y, _ = make_user_item_regression(label_stdev=.4)
X_train, X_test, y_train, y_test = train_test_split(X.toarray(), y)

In [21]:
from fastFM import als
fm = als.FMRegression(n_iter=1000, init_stdev=0.1, rank=2, l2_reg_w=0.1, l2_reg_V=0.5)
fm.fit(X_train, y_train)
y_pred = fm.predict(X_test)
r2_score(y_test, y_pred)

0.9945042051187586

In [27]:
X.shape

(400, 40)

In [25]:
X.toarray()

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [51]:
class FM(nn.Module):
    def __init__(self, features_num=None, k=5):
        super().__init__()
        self.V = nn.Parameter(torch.randn(features_num, k), requires_grad=True)
        self.linear = nn.Linear(features_num, 1)

    def forward(self, X):
        out_1 = ((X @ self.V) ** 2).sum(1, keepdim=True)
        out_2 = ((X ** 2) @ (self.V ** 2)).sum(1, keepdim=True)

        out_interaction = 0.5 * (out_1 - out_2)
        out_linear = self.linear(X)
        return out_interaction + out_linear

In [55]:
X_tensor, y_tensor = torch.from_numpy(X_train), torch.from_numpy(y_train.reshape(-1, 1))

In [73]:
model = FM(X_train.shape[1])
criterion = nn.MSELoss() 
optimizer = optim.SGD(model.parameters(), lr=0.001)

for epoch in range(5000):
    optimizer.zero_grad()
    predictions = model(X_tensor.float())
    loss = criterion(predictions, y_tensor.float())
    # get gradients
    loss.backward()
    # update parameters
    optimizer.step()

In [74]:
r2_score(y_train, predictions.squeeze().detach().numpy())

0.9842598586896495