In [62]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random

from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

import os
import copy
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [63]:
class TorchFM(nn.Module):
    def __init__(self, n=None, k=5):
        super().__init__()
        # Initially we fill V with random values sampled from Gaussian distribution
        # NB: use nn.Parameter to compute gradients
        print(n, k)
        self.V = nn.Parameter(torch.randn(n, k),requires_grad=True)
        self.lin = nn.Linear(n, 1)

        
    def forward(self, x):
        out_1 = torch.matmul(x, self.V).pow(2).sum(1, keepdim=True) #S_1^2
        out_2 = torch.matmul(x.pow(2), self.V.pow(2)).sum(1, keepdim=True) # S_2
        
        out_inter = 0.5*(out_1 - out_2)
        out_lin = self.lin(x)
        out = out_inter + out_lin
        
        return out

In [64]:
train_df = pd.read_csv("../../../../tmp/dota_train_binary_heroes.csv.zip", index_col='match_id_hash')
test_df = pd.read_csv('../../../../tmp/dota_test_binary_heroes.csv.zip', index_col='match_id_hash')
target = pd.read_csv("../../../../tmp/train_targets.csv.zip", index_col='match_id_hash')
y = target['radiant_win'].values.astype(np.float32)
y = y.reshape(-1,1)

In [65]:
# convert to 32-bit numbers to send to GPU 
X_train = train_df.values.astype(np.float32)
X_test = test_df.values.astype(np.float32)

In [66]:
# To compute probalities
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [67]:
# X_tensor, X_test, y_tensor = torch.from_numpy(X_train), torch.from_numpy(X_test), torch.from_numpy(y)

train_preds = np.zeros(y.shape)
test_preds = np.zeros((X_test.shape[0], 1))

train_set = TensorDataset(X_tensor, y_tensor)
valid_set = TensorDataset(X_tensor, y_tensor)

In [68]:
dl_train = DataLoader(train_set, batch_size=1024, shuffle=True)

In [69]:
batch_size = 1024
epochs = 300
criterion = nn.BCEWithLogitsLoss()
optimizer_class = torch.optim.SGD

In [70]:
train_preds = np.zeros(y.shape)
test_preds = np.zeros((X_test.shape[0], 1))

X_tensor, X_test, y_tensor = torch.from_numpy(X_train), torch.from_numpy(X_test), torch.from_numpy(y)

In [71]:
models = []
scores = []
train_preds = np.zeros(y.shape)
test_preds = np.zeros((X_test.shape[0], 1))

train_set = TensorDataset(X_tensor, y_tensor)
valid_set = TensorDataset(X_tensor, y_tensor)

loaders = {'train': DataLoader(train_set, batch_size=batch_size, shuffle=True),
           'valid': DataLoader(valid_set, batch_size=batch_size, shuffle=False)}

model = TorchFM(n=X_train.shape[1])
best_model_wts = copy.deepcopy(model.state_dict())

optimizer = optimizer_class(model.parameters(), lr=0.01, momentum=0.9)

best_score = 0.
for epoch in range(epochs):
    losses = {'train': 0., 'valid': 0}

    for phase in ['train', 'valid']:

        if phase == 'train':
            model.train()
        else:
            model.eval()

        for batch_x, batch_y in loaders[phase]:
            optimizer.zero_grad()
            out = model(batch_x)
            loss = criterion(out, batch_y)
            losses[phase] += loss.item()*batch_x.size(0)

            with torch.set_grad_enabled(phase == 'train'):
                if phase == 'train':
                    loss.backward()
#                             scheduler.step()
                    optimizer.step()

        losses[phase] /= len(loaders[phase].dataset)

    # after each epoch check if we improved roc auc and if yes - save model
    with torch.no_grad():
        model.eval()
        valid_preds = sigmoid(model(X_tensor).numpy())
        epoch_score = roc_auc_score(y, valid_preds)
        if epoch_score > best_score:
            best_model_wts = copy.deepcopy(model.state_dict())
            best_score = epoch_score

    if ((epoch+1) % 30) == 0:
        print(f'epoch {epoch+1} train loss: {losses["train"]:.3f} valid loss {losses["valid"]:.3f} valid roc auc {epoch_score:.3f}')

# prediction on valid set
with torch.no_grad():
    model.load_state_dict(best_model_wts)
    model.eval()

    train_preds = sigmoid(model(X_tensor).numpy())
    fold_score = roc_auc_score(y, train_preds)
    scores.append(fold_score)
    print(f'Best ROC AUC score {fold_score}')
    models.append(model)

    test_preds += sigmoid(model(X_test).numpy())

# print('CV AUC ROC', np.mean(scores), np.std(scores))

# test_preds /= folds.n_splits
    
#     return models, train_preds, test_preds

230 5
epoch 30 train loss: 0.990 valid loss 0.978 valid roc auc 0.552
epoch 60 train loss: 0.730 valid loss 0.728 valid roc auc 0.594
epoch 90 train loss: 0.686 valid loss 0.685 valid roc auc 0.616
epoch 120 train loss: 0.671 valid loss 0.671 valid roc auc 0.629
epoch 150 train loss: 0.665 valid loss 0.665 valid roc auc 0.638
epoch 180 train loss: 0.661 valid loss 0.661 valid roc auc 0.643
epoch 210 train loss: 0.659 valid loss 0.658 valid roc auc 0.647
epoch 240 train loss: 0.657 valid loss 0.657 valid roc auc 0.650
epoch 270 train loss: 0.655 valid loss 0.655 valid roc auc 0.653
epoch 300 train loss: 0.654 valid loss 0.654 valid roc auc 0.655
Best ROC AUC score 0.6548929243466133


In [38]:
folds = KFold(n_splits=5, random_state=17)