In [1]:
!pip install --no-index --find-links /kaggle/input/pytorchtabnet/pytorch_tabnet-3.1.1-py3-none-any.whl pytorch-tabnet
!pip install /kaggle/input/iterative-stratification/iterative-stratification-master/

Looking in links: /kaggle/input/pytorchtabnet/pytorch_tabnet-3.1.1-py3-none-any.whl
Processing /kaggle/input/pytorchtabnet/pytorch_tabnet-3.1.1-py3-none-any.whl
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-3.1.1
Processing /kaggle/input/iterative-stratification/iterative-stratification-master
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Building wheels for collected packages: iterative-stratification
  Building wheel for iterative-stratification (setup.py) ... [?25l- \ done
[?25h  Created wheel for iterative-stratification: filename=iterative_stratification-0.1.6-py3-none-any.whl

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/lish-moa/train_targets_scored.csv
/kaggle/input/lish-moa/sample_submission.csv
/kaggle/input/lish-moa/train_drug.csv
/kaggle/input/lish-moa/train_targets_nonscored.csv
/kaggle/input/lish-moa/train_features.csv
/kaggle/input/lish-moa/test_features.csv
/kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.0-py3-none-any.whl
/kaggle/input/pytorchtabnet/pytorch_tabnet-3.1.0-py3-none-any.whl
/kaggle/input/pytorchtabnet/pytorch_tabnet-2.0.1-py3-none-any.whl
/kaggle/input/pytorchtabnet/pytorch_tabnet-1.2.0-py3-none-any.whl
/kaggle/input/pytorchtabnet/pytorch_tabnet-3.1.1-py3-none-any.whl
/kaggle/input/pytorchtabnet/pytorch_tabnet-3.0.0-py3-none-any.whl
/kaggle/input/iterative-stratification/iterative-stratification-master/.travis.yml
/kaggle/input/iterative-stratification/iterative-stratification-master/setup.cfg
/kaggle/input/iterative-stratification/iterative-stratification-master/LICENSE
/kaggle/input/iterative-stratification/iterative-stratification-master/.gitignore
/kaggle/input/

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor
from scipy.special import expit
from sklearn.metrics import log_loss
from torch.utils.data import DataLoader

In [4]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features.values
        self.targets = targets.values
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        idx_features = torch.tensor(self.features[idx], dtype=torch.float)
        idx_targets = torch.tensor(self.targets[idx], dtype=torch.float)
        return idx_features, idx_targets


class TestMoADataset:
    def __init__(self, features):
        self.features = features.values
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        idx_features = torch.tensor(self.features[idx], dtype=torch.float)
        return idx_features
    
import torch.nn as nn


class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2,
                 output_size, dropout=0.2):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [5]:
class LogitsLogLoss(Metric):

    def __init__(self):
        self._name = 'logits_ll'
        self._maximize = False

    def __call__(self, y_true, y_pred):
        aux = (1 - y_true) * np.log(1 - expit(y_pred) + 1e-15) + y_true * np.log(expit(y_pred) + 1e-15)
        return np.mean(-aux)

In [6]:
def seed_everything(seed, use_cuda=False):
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

    if use_cuda:
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [7]:
def preprocess(df):
    df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
    df['cp_time'] = df['cp_time'].map({24: 0, 48: 1, 72:2})
    df = df[df['cp_type']  != 'ctl_vehicle'].reset_index(drop=True)
    return df


def prepare_data(dir):
    X_train = pd.read_csv(f'{dir}/train_features.csv')
    Y_train = pd.read_csv(f'{dir}/train_targets_scored.csv')
    X_test = pd.read_csv(f'{dir}/test_features.csv')
    ss = pd.read_csv(f'{dir}/sample_submission.csv')

    train = X_train.merge(Y_train, on='sig_id')
    Y_train_stub = train.loc[:, Y_train.columns]
    
    train = preprocess(train)
    X_test = preprocess(X_test).drop(['cp_type'], axis=1)

    X_train = train.loc[:, X_train.columns].drop(['sig_id', 'cp_type'], axis=1)
    Y_train = train.loc[:, Y_train.columns]

    return X_train, Y_train, Y_train_stub, X_test, ss

In [8]:
TAB_EPOCHS = 200
TAB_BATCH_SIZE = 1024
TAB_LEARNING_RATE = 2e-2
WEIGHT_DECAY = 1e-5
SEED = 42
NUM_FOLDS = 5

EPOCHS = 200
BATCH_SIZE = 2048
LEARNING_RATE = 1e-3

In [9]:
def train_fun(model, optimizer, loss_fun, train_loader, device, epoch):
    model.train()
    running_loss = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if (batch_idx + 1) % 5 == 0 or (batch_idx + 1) == len(train_loader):
            print(f'Train Epoch: {epoch + 1}, Batch: [{(batch_idx + 1)}/{len(train_loader)}], Loss: {loss.item():.3f}')

    mean_loss = running_loss / len(train_loader)
    return mean_loss


def validate_fun(model, loss_fun, val_loader, device, epoch):
    Y_pred_lst = []
    model.eval()
    running_loss = 0

    for batch_idx, (inputs, targets) in enumerate(val_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        with torch.no_grad():
            outputs = model(inputs)
        
        loss = loss_fun(outputs, targets)
        running_loss += loss.item()
        Y_pred_lst.append(outputs.sigmoid().detach().cpu().numpy())

        if (batch_idx + 1) % 1024 == 0 or (batch_idx + 1) == len(val_loader):
            print(f'Validate Epoch: {epoch + 1}, Batch: [{batch_idx + 1}/{len(val_loader)}], Loss: {loss.item():.6f}')

    mean_loss = running_loss / len(val_loader)
    Y_pred = np.concatenate(Y_pred_lst)

    return mean_loss, Y_pred


def test_fun(model, test_loader, device):
    Y_pred_lst = []
    model.eval()

    for batch_idx, inputs in enumerate(test_loader):
        inputs = inputs.to(device)
        
        with torch.no_grad():
            outputs = model(inputs)

        Y_pred_lst.append(outputs.sigmoid().detach().cpu().numpy())

    Y_pred = np.concatenate(Y_pred_lst)

    return Y_pred


def train_simple_net(fold, X_test, train_loader, val_loader, train_size, val_idx, in_size, out_size,
                     device):
    model = SimpleNet(in_size, 2048, 1024, out_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    loss_fun = nn.BCEWithLogitsLoss()
    best_loss = np.inf
    oof = np.zeros((train_size, out_size))

    for epoch in range(EPOCHS):
        epoch_train_loss = train_fun(model, optimizer, loss_fun, train_loader, device, epoch)
        epoch_val_loss, val_Y_pred = validate_fun(model, loss_fun, val_loader, device, epoch)

        # print(f'Epoch: {epoch}, Train Loss: {epoch_train_loss}, Val Loss: {epoch_val_loss}')

        if epoch_val_loss < best_loss:
            best_loss = epoch_val_loss
            oof[val_idx] = val_Y_pred
            torch.save(model.state_dict(), f'simple_fold_{fold + 1}.pth')
    
        test_dataset = TestMoADataset(X_test)   
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    model = SimpleNet(in_size, 2048, 1024, out_size).to(device)
    model.load_state_dict(torch.load(f'simple_fold_{fold + 1}.pth'))
    model.to(device)

    Y_pred = test_fun(model, test_loader, device)

    return best_loss, Y_pred, oof


def train_tab_net(fold, X_test, X_train, Y_train, X_val, Y_val, train_size, val_idx, out_size):
    oof = np.zeros((train_size, out_size))
    tabnet_params = dict(
                        n_d=32,
                        n_a=32,
                        n_steps=1,
                        gamma=1.8,
                        lambda_sparse=0,
                        optimizer_fn=torch.optim.Adam,
                        optimizer_params=dict(lr=TAB_LEARNING_RATE, weight_decay=WEIGHT_DECAY),
                        scheduler_params=dict(mode='min',
                                              patience=5,
                                              min_lr=1e-5,
                                              factor=0.9),
                        scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
                        mask_type='entmax',
                        seed=SEED,
                        verbose=10
                    )

    model = TabNetRegressor(**tabnet_params)

    model.fit(
        X_train=X_train, 
        y_train=Y_train,
        eval_set = [(X_val, Y_val)],
        eval_name=['val'],
        eval_metric=[LogitsLogLoss],
        max_epochs=TAB_EPOCHS,
        patience=20,
        batch_size=TAB_BATCH_SIZE,
        virtual_batch_size=128,
        num_workers=1,
        drop_last=False,
        loss_fn=nn.BCEWithLogitsLoss()
    )

    oof[val_idx] = expit(model.predict(X_val))
    Y_pred = expit(model.predict(X_test))

    return np.min(model.history['val_logits_ll']), Y_pred, oof


def run_msk_fold_cv(X_train, Y_train, Y_train_stub, X_test, ss, num_folds, model_name, device):
    running_loss = 0
    Y_pred = np.zeros((X_test.shape[0], Y_train.shape[1] - 1))
    mskf = MultilabelStratifiedKFold(n_splits=num_folds, shuffle=True, random_state=0)
    oof = np.zeros((X_train.shape[0], Y_train.shape[1] - 1))

    for fold, (trn_idx, val_idx) in enumerate(mskf.split(X_train, Y_train)):
        fold_X_train = X_train.loc[trn_idx, :]
        fold_Y_train = Y_train.loc[trn_idx, :].drop('sig_id', axis=1)
        fold_X_val = X_train.loc[val_idx, :]
        fold_Y_val = Y_train.loc[val_idx, :].drop('sig_id', axis=1)

        train_dataset = MoADataset(fold_X_train, fold_Y_train)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

        val_dataset = MoADataset(fold_X_val, fold_Y_val)   
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        print(f'Fold: {fold + 1}')

        if model_name == 'simple':
            fold_loss, fold_Y_pred, fold_oof = train_simple_net(fold, X_test.drop('sig_id', axis=1), 
                                                                train_loader, val_loader, 
                                                                X_train.shape[0], val_idx, 
                                                                X_train.shape[1], Y_train.shape[1] - 1, 
                                                                device)
        elif model_name == 'tab':
            fold_X_train = fold_X_train.to_numpy()
            fold_Y_train = fold_Y_train.to_numpy()
            fold_X_val = fold_X_val.to_numpy()
            fold_Y_val = fold_Y_val.to_numpy()
            fold_loss, fold_Y_pred, fold_oof = train_tab_net(fold, X_test.drop('sig_id', axis=1).to_numpy(), 
                                                             fold_X_train, fold_Y_train,
                                                             fold_X_val, fold_Y_val,
                                                             X_train.shape[0], val_idx, 
                                                             Y_train.shape[1] - 1)
        Y_pred += fold_Y_pred
        oof += fold_oof
        running_loss += fold_loss

    Y_pred /= num_folds
    oof /= num_folds
    cv_loss = running_loss / num_folds

    oof_Y_pred = Y_train.copy()
    oof_Y_pred.iloc[:, 1:] = oof
    oof_Y_pred = Y_train_stub.loc[:, ['sig_id']].merge(oof_Y_pred, on='sig_id', how='left').fillna(0)

    Y_true = Y_train_stub.iloc[:, 1:].values
    oof_Y_pred = oof_Y_pred.iloc[:, 1:].values

    cv_score = 0

    for i in range(oof_Y_pred.shape[1]):
        cv_score += log_loss(Y_true[:, i], oof_Y_pred[:, i])

    cv_score /= oof_Y_pred.shape[1]

    print(f'CV loss (ctl_vechile excluded): {cv_loss:.6f}')
    print(f'CV loss: {cv_score:.6f}')

    test_Y_pred = X_test.loc[:, ['sig_id']].merge(ss, how='left', on=['sig_id'])
    test_Y_pred.iloc[:, 1:] = Y_pred
    test_Y_pred = ss.loc[:, ['sig_id']].merge(test_Y_pred, on='sig_id', how='left').fillna(0)

    return test_Y_pred

In [10]:
def run_net(model_name, mode):
    use_cuda = False
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    Y_pred = None

    if device == ('cuda'):
        use_cuda = True

    seed_everything(SEED, use_cuda)

    X_train, Y_train, Y_train_stub, X_test, ss = prepare_data('../input/lish-moa')

    if mode == 'cv':
        Y_pred = run_msk_fold_cv(X_train, Y_train, Y_train_stub, X_test, ss, NUM_FOLDS, model_name, device)
  
    return Y_pred

In [11]:
sub = run_net('tab', 'cv')



Fold: 1
Device used : cuda
epoch 0  | loss: 0.3576  | val_logits_ll: 0.04619 |  0:00:02s
epoch 10 | loss: 0.01994 | val_logits_ll: 0.01992 |  0:00:13s
epoch 20 | loss: 0.01767 | val_logits_ll: 0.01782 |  0:00:24s
epoch 30 | loss: 0.01677 | val_logits_ll: 0.0175  |  0:00:35s
epoch 40 | loss: 0.01636 | val_logits_ll: 0.01804 |  0:00:46s
epoch 50 | loss: 0.01597 | val_logits_ll: 0.01728 |  0:00:58s
epoch 60 | loss: 0.01586 | val_logits_ll: 0.01698 |  0:01:10s
epoch 70 | loss: 0.01549 | val_logits_ll: 0.017   |  0:01:21s
epoch 80 | loss: 0.01501 | val_logits_ll: 0.01747 |  0:01:33s

Early stopping occurred at epoch 82 with best_epoch = 62 and best_val_logits_ll = 0.01683
Best weights from best epoch are automatically used!
Fold: 2
Device used : cuda
epoch 0  | loss: 0.36189 | val_logits_ll: 0.04236 |  0:00:01s
epoch 10 | loss: 0.01957 | val_logits_ll: 0.01972 |  0:00:12s
epoch 20 | loss: 0.0175  | val_logits_ll: 0.0192  |  0:00:24s
epoch 30 | loss: 0.01698 | val_logits_ll: 0.01949 |  0:00:

In [12]:
sub.to_csv('submission.csv', index=False)