In [115]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import random
import os
import sys
from pathlib import Path
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings('ignore')

#used net arch from kaggle.com/nicohrubec/pytorch-multilabel-neural-network/

In [116]:
import numpy as np
from joblib import Parallel, delayed
from scipy.interpolate import interp1d
from scipy.special import erf, erfinv
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class GaussRankScaler(BaseEstimator, TransformerMixin):
    """Transform features by scaling each feature to a normal distribution.
    Parameters
        ----------
        epsilon : float, optional, default 1e-4
            A small amount added to the lower bound or subtracted
            from the upper bound. This value prevents infinite number
            from occurring when applying the inverse error function.
        copy : boolean, optional, default True
            If False, try to avoid a copy and do inplace scaling instead.
            This is not guaranteed to always work inplace; e.g. if the data is
            not a NumPy array, a copy may still be returned.
        n_jobs : int or None, optional, default None
            Number of jobs to run in parallel.
            ``None`` means 1 and ``-1`` means using all processors.
        interp_kind : str or int, optional, default 'linear'
           Specifies the kind of interpolation as a string
            ('linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
            'previous', 'next', where 'zero', 'slinear', 'quadratic' and 'cubic'
            refer to a spline interpolation of zeroth, first, second or third
            order; 'previous' and 'next' simply return the previous or next value
            of the point) or as an integer specifying the order of the spline
            interpolator to use.
        interp_copy : bool, optional, default False
            If True, the interpolation function makes internal copies of x and y.
            If False, references to `x` and `y` are used.
        Attributes
        ----------
        interp_func_ : list
            The interpolation function for each feature in the training set.
        """

    def __init__(self, epsilon=1e-4, copy=True, n_jobs=None, interp_kind='linear', interp_copy=False):
        self.epsilon = epsilon
        self.copy = copy
        self.interp_kind = interp_kind
        self.interp_copy = interp_copy
        self.fill_value = 'extrapolate'
        self.n_jobs = n_jobs

    def fit(self, X, y=None):
        """Fit interpolation function to link rank with original data for future scaling
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The data used to fit interpolation function for later scaling along the features axis.
        y
            Ignored
        """
        X = check_array(X, copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite=True)

        self.interp_func_ = Parallel(n_jobs=self.n_jobs)(delayed(self._fit)(x) for x in X.T)
        return self

    def _fit(self, x):
        x = self.drop_duplicates(x)
        rank = np.argsort(np.argsort(x))
        bound = 1.0 - self.epsilon
        factor = np.max(rank) / 2.0 * bound
        scaled_rank = np.clip(rank / factor - bound, -bound, bound)
        return interp1d(
            x, scaled_rank, kind=self.interp_kind, copy=self.interp_copy, fill_value=self.fill_value)

    def transform(self, X, copy=None):
        """Scale the data with the Gauss Rank algorithm
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The data used to scale along the features axis.
        copy : bool, optional (default: None)
            Copy the input X or not.
        """
        check_is_fitted(self, 'interp_func_')

        copy = copy if copy is not None else self.copy
        X = check_array(X, copy=copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite=True)

        X = np.array(Parallel(n_jobs=self.n_jobs)(delayed(self._transform)(i, x) for i, x in enumerate(X.T))).T
        return X

    def _transform(self, i, x):
        return erfinv(self.interp_func_[i](x))

    def inverse_transform(self, X, copy=None):
        """Scale back the data to the original representation
        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data used to scale along the features axis.
        copy : bool, optional (default: None)
            Copy the input X or not.
        """
        check_is_fitted(self, 'interp_func_')

        copy = copy if copy is not None else self.copy
        X = check_array(X, copy=copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite=True)

        X = np.array(Parallel(n_jobs=self.n_jobs)(delayed(self._inverse_transform)(i, x) for i, x in enumerate(X.T))).T
        return X

    def _inverse_transform(self, i, x):
        inv_interp_func = interp1d(self.interp_func_[i].y, self.interp_func_[i].x, kind=self.interp_kind,
                                   copy=self.interp_copy, fill_value=self.fill_value)
        return inv_interp_func(erf(x))

    @staticmethod
    def drop_duplicates(x):
        is_unique = np.zeros_like(x, dtype=bool)
        is_unique[np.unique(x, return_index=True)[1]] = True
        return x[is_unique]

submit[targets] = preds
submit.loc[X_test['cp_type']=='ctl_vehicle', targets] = 0
submit.to_csv('submission.csv', index=False)

In [3]:
import ctypes
ctypes.cdll.LoadLibrary('caffe2_nvrtc.dll')

<CDLL 'caffe2_nvrtc.dll', handle 7ffae3170000 at 0x285220915e0>

In [4]:
seed = 42

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
set_seed(seed)

In [5]:
p_min = 1e-15
p_max = 1 - p_min

def score(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    y_pred = np.clip(y_pred, p_min, p_max)
    return -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)).mean()

In [6]:
train_X = pd.read_csv('../input/lish-moa/train_features.csv', index_col='sig_id')
test_Y = pd.read_csv('../input/lish-moa/sample_submission.csv', index_col='sig_id')
train_Y = pd.read_csv('../input/lish-moa/train_targets_scored.csv', index_col='sig_id', dtype={f: test_Y.dtypes[f] for f in test_Y})
test_X = pd.read_csv('../input/lish-moa/test_features.csv', index_col='sig_id')

In [7]:
train_X.cp_time = train_X.cp_time / 24
test_X.cp_time = test_X.cp_time / 24

train_X['real_drug'] = train_X.cp_type == 'trt_cp'
test_X['real_drug'] = test_X.cp_type == 'trt_cp'

t = train_X.cp_dose.copy()
train_X.drop(columns=['cp_dose', 'cp_type'], inplace=True)
train_X['cp_dose'] = 1
train_X.loc[(t == 'D2'), 'cp_dose'] = 2

t = test_X.cp_dose.copy()
test_X.drop(columns=['cp_dose', 'cp_type'], inplace=True)
test_X['cp_dose'] = 1
test_X.loc[(t == 'D2'), 'cp_dose'] = 2

In [22]:
nfolds = 6
nstarts = 1
nepochs = 50
batch_size = 128
val_batch_size = batch_size * 4
criterion = nn.BCELoss()
kfold = MultilabelStratifiedKFold(n_splits=nfolds, random_state=517, shuffle=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
class Dataset_my(Dataset):
    def __init__(self, df, targets, mode='train'):
        self.mode = mode
        #self.feats = feats_idx
        #self.data = df[:, feats_idx]
        self.data = df
        if mode=='train':
            self.targets = targets
    
    def __getitem__(self, idx):
        if self.mode == 'train':
            return torch.FloatTensor(self.data[idx]), torch.FloatTensor(self.targets[idx])
        elif self.mode == 'test':
            return torch.FloatTensor(self.data[idx]), 0
        
    def __len__(self):
        return len(self.data)

In [10]:
def run_CV_for_model(cur_model, model_num, train_X_loc, train_Y_loc, test_X_loc):
    set_seed(seed)
    for n, (tr, te) in enumerate(kfold.split(train_Y_loc, train_Y_loc)):
        print(f'Train fold {n+1}')
        xtrain, xval = train_X_loc[tr], train_X_loc[te]
        ytrain, yval = train_Y_loc[tr], train_Y_loc[te]

        train_set = Dataset_my(xtrain, ytrain)
        val_set = Dataset_my(xval, yval)

        dataloaders = {
            'train': DataLoader(train_set, batch_size=batch_size, shuffle=True),
            'val': DataLoader(val_set, batch_size=val_batch_size, shuffle=False)
        }

        model = cur_model(train_X_loc.shape[1]).to(device)
        Path(f'./saved_params/model{model_num}').mkdir(parents=True, exist_ok=True)
        checkpoint_path = f'./saved_params/model{model_num}/repeat_{1}_Fold_{n+1}.pt'
        optimizer = optim.Adam(model.parameters(), weight_decay=1e-5)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, eps=1e-4, verbose=True)
        best_loss = {'train': np.inf, 'val': np.inf}

        for epoch in range(nepochs):
            epoch_loss = {'train': 0.0, 'val': 0.0}

            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0

                for i, (x, y) in enumerate(dataloaders[phase]):
                    x, y = x.to(device), y.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase=='train'):
                        preds = model(x)
                        loss = criterion(preds, y)

                        if phase=='train':
                            loss.backward()
                            optimizer.step()

                    running_loss += loss.item() / len(dataloaders[phase])

                epoch_loss[phase] = running_loss

            print("Epoch {}/{}   -   loss: {:5.5f}   -   val_loss: {:5.5f}".format(epoch+1, nepochs, epoch_loss['train'], epoch_loss['val']))

            scheduler.step(epoch_loss['val'])

            if epoch_loss['val'] < best_loss['val']:
                best_loss = epoch_loss
                torch.save(model.state_dict(), checkpoint_path)
    return best_loss

In [10]:
def train_model(cur_model, model_num, train_X_loc, train_Y_loc):
    set_seed(seed)
    train_set = Dataset_my(train_X_loc, train_Y_loc)

    dataloaders = {
        'train': DataLoader(train_set, batch_size=batch_size, shuffle=True)
    }

    model = cur_model(train_X_loc.shape[1]).to(device)
    Path(f'./saved_params/model{model_num}').mkdir(parents=True, exist_ok=True)
    checkpoint_path = f'./saved_params/model{model_num}/repeat_{1}_Fold_{n+1}.pt'
    optimizer = optim.Adam(model.parameters(), weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, eps=1e-4, verbose=True)
    best_loss = {'train': np.inf}

    for epoch in range(nepochs):
        epoch_loss = {'train': 0.0}

        for phase in ['train']:
            if phase == 'train':
                model.train()

            for i, (x, y) in enumerate(dataloaders[phase]):
                x, y = x.to(device), y.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    preds = model(x)
                    loss = criterion(preds, y)

                    if phase=='train':
                        loss.backward()
                        optimizer.step()

        scheduler.step(epoch_loss['train'])

        if epoch_loss['train'] < best_loss['train']:
            best_loss = epoch_loss
            torch.save(model.state_dict(), checkpoint_path)

In [10]:
def predict_model(cur_model, model_num, test_X_loc):
    set_seed(seed)
    train_set = Dataset_my(test_X_loc, None, mode='test')

    dataloaders = {
        'test': DataLoader(train_set, batch_size=val_batch_size, shuffle=False)
    }

    model = cur_model(test_X_loc.shape[1]).to(device)
    checkpoint_path = f'./saved_params/model{model_num}/repeat_{1}_Fold_{n+1}.pt'
    model.load_state_dict(torch.load(checkpoint_path))
    model.eval()
    
    fold_preds = []

    for i, (x, y) in enumerate(dataloaders[phase]):
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        with torch.no_grad():
            batch_preds = model(x)
            fold_preds.append(batch_preds)

    fold_preds = torch.cat(fold_preds, dim=0).cpu().numpy()
    fold_preds[train_X.real_drug == False] = 0
    print(fold_preds.shape)

In [89]:
def run_eval_for_model(cur_model, model_num, train_X_loc, train_Y_loc, test_X_loc):
    set_seed(seed)
    oof = np.zeros((len(train_X_loc), nstarts, train_Y_loc.shape[1]))
    oof_targets = np.zeros((len(train_X_loc), train_Y_loc.shape[1]))
    preds = np.zeros((len(test_X_loc), train_Y_loc.shape[1]))
    seed_targets = []
    seed_oof = []
    seed_preds = np.zeros((len(test_X_loc), train_Y_loc.shape[1], nfolds))

    for n, (tr, te) in enumerate(kfold.split(train_Y_loc, train_Y_loc)):
        xval, yval = train_X_loc[te], train_Y_loc[te]
        fold_preds = []

        val_set = Dataset_my(xval, yval)
        test_set = Dataset_my(test_X_loc, None, mode='test')

        dataloaders = {
            'val': DataLoader(val_set, batch_size=val_batch_size, shuffle=False),
            'test': DataLoader(test_set, batch_size=val_batch_size, shuffle=False)
        }

        checkpoint_path = f'./saved_params/model{model_num}/repeat_{1}_Fold_{n+1}.pt'
        model = cur_model(train_X_loc.shape[1]).to(device)
        model.load_state_dict(torch.load(checkpoint_path))
        model.eval()

        for phase in ['val', 'test']:
            for i, (x, y) in enumerate(dataloaders[phase]):
                if phase == 'val':
                    x, y = x.to(device), y.to(device)
                elif phase == 'test':
                    x = x.to(device)

                with torch.no_grad():
                    batch_preds = model(x)

                    if phase == 'val':
                        seed_targets.append(y)
                        seed_oof.append(batch_preds)
                    elif phase == 'test':
                        fold_preds.append(batch_preds)

        fold_preds = torch.cat(fold_preds, dim=0).cpu().numpy()
        seed_preds[:, :, n] = fold_preds

    seed_targets = torch.cat(seed_targets, dim=0).cpu().numpy()
    seed_oof = torch.cat(seed_oof, dim=0).cpu().numpy()
    seed_preds = np.mean(seed_preds, axis=2)

    oof_targets = seed_targets
    oof[:, 0, :] = seed_oof
    preds += seed_preds / nstarts

    oof = np.mean(oof, axis=1)
    oof_real = np.zeros(train_Y.shape)
    oof_real[:train_X.real_drug.sum(), :] = oof
    oof_targets_real = np.zeros(train_Y.shape)
    oof_targets_real[:train_X.real_drug.sum(), :] = oof_targets
    print("Overall score is {:5.5f}".format(score(oof_targets_real, oof_real)))

In [173]:
def run_eval_for_model_true(cur_model, model_num, train_X_loc, train_Y_loc, test_X_loc):
    set_seed(seed + 1)
    oof = np.zeros((len(train_X_loc), nstarts, train_Y_loc.shape[1]))
    oof_targets = np.zeros((len(train_X_loc), train_Y_loc.shape[1]))
    preds = np.zeros((len(test_X_loc), train_Y_loc.shape[1]))
    seed_targets = []
    seed_oof = []
    seed_preds = np.zeros((len(test_X_loc), train_Y_loc.shape[1], nfolds))

    for n, (tr, te) in enumerate(kfold.split(train_Y_loc, train_Y_loc)):
        xval, yval = train_X_loc[te], train_Y_loc[te]
        fold_preds = []

        val_set = Dataset_my(xval, yval)
        test_set = Dataset_my(test_X_loc, None, mode='test')

        dataloaders = {
            'val': DataLoader(val_set, batch_size=val_batch_size, shuffle=False),
            'test': DataLoader(test_set, batch_size=val_batch_size, shuffle=False)
        }

        checkpoint_path = f'./saved_params/model{model_num}/repeat_{1}_Fold_{n+1}.pt'
        model = cur_model(train_X_loc.shape[1]).to(device)
        model.load_state_dict(torch.load(checkpoint_path))
        model.eval()

        for phase in ['val', 'test']:
            for i, (x, y) in enumerate(dataloaders[phase]):
                if phase == 'val':
                    x, y = x.to(device), y.to(device)
                elif phase == 'test':
                    x = x.to(device)

                with torch.no_grad():
                    batch_preds = model(x)

                    if phase == 'val':
                        seed_targets.append(y)
                        seed_oof.append(batch_preds)
                    elif phase == 'test':
                        fold_preds.append(batch_preds)

        fold_preds = torch.cat(fold_preds, dim=0).cpu().numpy()
        seed_preds[:, :, n] = fold_preds

    seed_targets = torch.cat(seed_targets, dim=0).cpu().numpy()
    seed_oof = torch.cat(seed_oof, dim=0).cpu().numpy()
    seed_preds = np.mean(seed_preds, axis=2)

    oof_targets = seed_targets
    oof[:, 0, :] = seed_oof
    preds += seed_preds / nstarts

    oof = np.mean(oof, axis=1)
    oof_real = np.zeros(train_Y.shape)
    oof_real[:train_X.real_drug.sum(), :] = oof
    oof_targets_real = np.zeros(train_Y.shape)
    oof_targets_real[:train_X.real_drug.sum(), :] = oof_targets
    print("Overall score is {:5.5f}".format(score(oof_targets_real, oof_real)))

In [23]:
class Model0(nn.Module):
    def __init__(self, num_columns):
        super(Model0, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.sigmoid(self.dense3(x))
        
        return x

In [24]:
train_Y0 = train_Y.loc[train_X['real_drug']==True].reset_index(drop=True)
train_X0 = train_X.loc[train_X['real_drug']==True].drop(columns=['real_drug']).reset_index(drop=True)
test_X0 = test_X.drop(columns=['real_drug'])

features = list(train_X0.columns)
nfeatures = len(features)
features_g = [col for col in train_X0.columns if 'g-' in col]
features_c = [col for col in train_X0.columns if 'c-' in col]
features_gc = features_g + features_c
targets = list(train_Y0.columns)
ntargets = len(targets)

train_Y0 = train_Y0.values
train_X0 = train_X0.values
test_X0 = test_X0.values

In [25]:
run_CV_for_model(Model0, 0, train_X0, train_Y0, test_X0)

Train fold 1
Epoch 1/50   -   loss: 0.38006   -   val_loss: 0.06840
Epoch 2/50   -   loss: 0.04316   -   val_loss: 0.02734
Epoch 3/50   -   loss: 0.02591   -   val_loss: 0.02280
Epoch 4/50   -   loss: 0.02208   -   val_loss: 0.02049
Epoch 5/50   -   loss: 0.02076   -   val_loss: 0.01965
Epoch 6/50   -   loss: 0.01947   -   val_loss: 0.01915
Epoch 7/50   -   loss: 0.01917   -   val_loss: 0.01879
Epoch 8/50   -   loss: 0.01834   -   val_loss: 0.01831
Epoch 9/50   -   loss: 0.01795   -   val_loss: 0.01786
Epoch 10/50   -   loss: 0.01795   -   val_loss: 0.01772
Epoch 11/50   -   loss: 0.01732   -   val_loss: 0.01754
Epoch 12/50   -   loss: 0.01713   -   val_loss: 0.01740
Epoch 13/50   -   loss: 0.01667   -   val_loss: 0.01722
Epoch 14/50   -   loss: 0.01635   -   val_loss: 0.01707
Epoch 15/50   -   loss: 0.01613   -   val_loss: 0.01724
Epoch 16/50   -   loss: 0.01595   -   val_loss: 0.01728
Epoch 17/50   -   loss: 0.01579   -   val_loss: 0.01690
Epoch 18/50   -   loss: 0.01559   -   val_lo

Epoch 44/50   -   loss: 0.01028   -   val_loss: 0.01611
Epoch 45/50   -   loss: 0.01012   -   val_loss: 0.01620
Epoch 46/50   -   loss: 0.01001   -   val_loss: 0.01616
Epoch 47/50   -   loss: 0.00986   -   val_loss: 0.01621
Epoch 48/50   -   loss: 0.00964   -   val_loss: 0.01627
Epoch 49/50   -   loss: 0.00955   -   val_loss: 0.01630
Epoch 50/50   -   loss: 0.00939   -   val_loss: 0.01632
Train fold 4
Epoch 1/50   -   loss: 0.37879   -   val_loss: 0.06638
Epoch 2/50   -   loss: 0.04274   -   val_loss: 0.02656
Epoch 3/50   -   loss: 0.02619   -   val_loss: 0.02227
Epoch 4/50   -   loss: 0.02257   -   val_loss: 0.02019
Epoch 5/50   -   loss: 0.02093   -   val_loss: 0.01906
Epoch 6/50   -   loss: 0.01988   -   val_loss: 0.01844
Epoch 7/50   -   loss: 0.01916   -   val_loss: 0.01807
Epoch 8/50   -   loss: 0.01860   -   val_loss: 0.01764
Epoch 9/50   -   loss: 0.01814   -   val_loss: 0.01724
Epoch 10/50   -   loss: 0.01781   -   val_loss: 0.01727
Epoch 11/50   -   loss: 0.01742   -   val_lo

Epoch 37/50   -   loss: 0.01141   -   val_loss: 0.01578
Epoch 38/50   -   loss: 0.01122   -   val_loss: 0.01578
Epoch 39/50   -   loss: 0.01110   -   val_loss: 0.01580
Epoch 40/50   -   loss: 0.01085   -   val_loss: 0.01583
Epoch 41/50   -   loss: 0.01075   -   val_loss: 0.01580
Epoch 42/50   -   loss: 0.01056   -   val_loss: 0.01588
Epoch 43/50   -   loss: 0.01035   -   val_loss: 0.01592
Epoch 44/50   -   loss: 0.01019   -   val_loss: 0.01588
Epoch 45/50   -   loss: 0.01011   -   val_loss: 0.01596
Epoch 46/50   -   loss: 0.00989   -   val_loss: 0.01603
Epoch 47/50   -   loss: 0.00972   -   val_loss: 0.01599
Epoch 48/50   -   loss: 0.00963   -   val_loss: 0.01601
Epoch 49/50   -   loss: 0.00938   -   val_loss: 0.01605
Epoch 50/50   -   loss: 0.00921   -   val_loss: 0.01614


{'train': 0.012243488516945105, 'val': 0.015673546236939728}

In [90]:
run_eval_for_model(Model0, 0, train_X0, train_Y0, test_X0)

Overall score is 0.01484


In [53]:
class Model1(nn.Module):
    def __init__(self, num_columns):
        super(Model1, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.sigmoid(self.dense3(x))
        
        return x

In [70]:
train_Y1 = train_Y.reset_index(drop=True)
train_X1 = train_X.drop(columns=['real_drug']).reset_index(drop=True)
test_X1 = test_X.drop(columns=['real_drug'])

features = list(train_X1.columns)
nfeatures = len(features)
features_g = [col for col in train_X1.columns if 'g-' in col]
features_c = [col for col in train_X1.columns if 'c-' in col]
features_gc = features_g + features_c
targets = list(train_Y1.columns)
ntargets = len(targets)

train_Y1 = train_Y1.values
train_X1 = train_X1.values
test_X1 = test_X1.values

In [71]:
run_CV_for_model(Model1, 1, train_X1, train_Y1, test_X1)

Train fold 1
Epoch 1/50   -   loss: 0.35275   -   val_loss: 0.05716
Epoch 2/50   -   loss: 0.03776   -   val_loss: 0.02479
Epoch 3/50   -   loss: 0.02354   -   val_loss: 0.02100
Epoch 4/50   -   loss: 0.02070   -   val_loss: 0.01921
Epoch 5/50   -   loss: 0.01932   -   val_loss: 0.01823
Epoch 6/50   -   loss: 0.01858   -   val_loss: 0.01764
Epoch 7/50   -   loss: 0.01789   -   val_loss: 0.01747
Epoch 8/50   -   loss: 0.01733   -   val_loss: 0.01672
Epoch 9/50   -   loss: 0.01718   -   val_loss: 0.01670
Epoch 10/50   -   loss: 0.01690   -   val_loss: 0.01644
Epoch 11/50   -   loss: 0.01647   -   val_loss: 0.01624
Epoch 12/50   -   loss: 0.01626   -   val_loss: 0.01613
Epoch 13/50   -   loss: 0.01585   -   val_loss: 0.01586
Epoch 14/50   -   loss: 0.01574   -   val_loss: 0.01582
Epoch 15/50   -   loss: 0.01553   -   val_loss: 0.01573
Epoch 16/50   -   loss: 0.01526   -   val_loss: 0.01560
Epoch 17/50   -   loss: 0.01518   -   val_loss: 0.01563
Epoch 18/50   -   loss: 0.01500   -   val_lo

Epoch 44/50   -   loss: 0.01139   -   val_loss: 0.01490
Epoch 45/50   -   loss: 0.01126   -   val_loss: 0.01493
Epoch 46/50   -   loss: 0.01115   -   val_loss: 0.01501
Epoch 47/50   -   loss: 0.01106   -   val_loss: 0.01493
Epoch 48/50   -   loss: 0.01084   -   val_loss: 0.01495
Epoch 49/50   -   loss: 0.01069   -   val_loss: 0.01501
Epoch 50/50   -   loss: 0.01062   -   val_loss: 0.01501
Train fold 4
Epoch 1/50   -   loss: 0.35058   -   val_loss: 0.05238
Epoch 2/50   -   loss: 0.03725   -   val_loss: 0.02476
Epoch 3/50   -   loss: 0.02348   -   val_loss: 0.02092
Epoch 4/50   -   loss: 0.02058   -   val_loss: 0.01970
Epoch 5/50   -   loss: 0.01958   -   val_loss: 0.01847
Epoch 6/50   -   loss: 0.01843   -   val_loss: 0.01793
Epoch 7/50   -   loss: 0.01825   -   val_loss: 0.01746
Epoch 8/50   -   loss: 0.01761   -   val_loss: 0.01705
Epoch 9/50   -   loss: 0.01704   -   val_loss: 0.01663
Epoch 10/50   -   loss: 0.01657   -   val_loss: 0.01648
Epoch 11/50   -   loss: 0.01632   -   val_lo

Epoch 37/50   -   loss: 0.01245   -   val_loss: 0.01500
Epoch 38/50   -   loss: 0.01228   -   val_loss: 0.01487
Epoch 39/50   -   loss: 0.01225   -   val_loss: 0.01485
Epoch 40/50   -   loss: 0.01211   -   val_loss: 0.01489
Epoch 41/50   -   loss: 0.01193   -   val_loss: 0.01497
Epoch 42/50   -   loss: 0.01194   -   val_loss: 0.01489
Epoch 43/50   -   loss: 0.01176   -   val_loss: 0.01493
Epoch 44/50   -   loss: 0.01160   -   val_loss: 0.01489
Epoch 45/50   -   loss: 0.01151   -   val_loss: 0.01497
Epoch 46/50   -   loss: 0.01130   -   val_loss: 0.01491
Epoch 47/50   -   loss: 0.01120   -   val_loss: 0.01497
Epoch 48/50   -   loss: 0.01102   -   val_loss: 0.01500
Epoch 49/50   -   loss: 0.01093   -   val_loss: 0.01500
Epoch 50/50   -   loss: 0.01078   -   val_loss: 0.01501


{'train': 0.012253041182143182, 'val': 0.014853060594759881}

In [91]:
run_eval_for_model(Model1, 1, train_X0, train_Y0, test_X0)

Overall score is 0.01108


In [92]:
class Model2(nn.Module):
    def __init__(self, num_columns):
        super(Model2, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.sigmoid(self.dense3(x))
        
        return x

In [103]:
t = test_Y[test_X['real_drug'] == False].copy()
for f in t:
    t[f] = 0
train_Y2 = train_Y.reset_index(drop=True).append(t)
train_X2 = train_X.reset_index(drop=True).append(test_X[test_X['real_drug'] == False]).drop(columns=['real_drug'])
test_X2 = test_X.drop(columns=['real_drug'])

features = list(train_X2.columns)
nfeatures = len(features)
features_g = [col for col in train_X2.columns if 'g-' in col]
features_c = [col for col in train_X2.columns if 'c-' in col]
features_gc = features_g + features_c
targets = list(train_Y2.columns)
ntargets = len(targets)

train_Y2 = train_Y2.values
train_X2 = train_X2.values
test_X2 = test_X2.values

In [107]:
run_CV_for_model(Model2, 2, train_X2, train_Y2, test_X2)

Train fold 1
Epoch 1/50   -   loss: 0.34680   -   val_loss: 0.05793
Epoch 2/50   -   loss: 0.03625   -   val_loss: 0.02437
Epoch 3/50   -   loss: 0.02306   -   val_loss: 0.02070
Epoch 4/50   -   loss: 0.02047   -   val_loss: 0.01894
Epoch 5/50   -   loss: 0.01875   -   val_loss: 0.01771
Epoch 6/50   -   loss: 0.01821   -   val_loss: 0.01755
Epoch 7/50   -   loss: 0.01741   -   val_loss: 0.01693
Epoch 8/50   -   loss: 0.01706   -   val_loss: 0.01654
Epoch 9/50   -   loss: 0.01647   -   val_loss: 0.01604
Epoch 10/50   -   loss: 0.01633   -   val_loss: 0.01610
Epoch 11/50   -   loss: 0.01590   -   val_loss: 0.01586
Epoch 12/50   -   loss: 0.01567   -   val_loss: 0.01560
Epoch 13/50   -   loss: 0.01571   -   val_loss: 0.01565
Epoch 14/50   -   loss: 0.01527   -   val_loss: 0.01543
Epoch 15/50   -   loss: 0.01509   -   val_loss: 0.01532
Epoch 16/50   -   loss: 0.01492   -   val_loss: 0.01528
Epoch 17/50   -   loss: 0.01470   -   val_loss: 0.01524
Epoch 18/50   -   loss: 0.01454   -   val_lo

Epoch 44/50   -   loss: 0.01020   -   val_loss: 0.01481
Epoch 45/50   -   loss: 0.01007   -   val_loss: 0.01483
Epoch 46/50   -   loss: 0.00994   -   val_loss: 0.01483
Epoch 47/50   -   loss: 0.00978   -   val_loss: 0.01487
Epoch 48/50   -   loss: 0.00967   -   val_loss: 0.01490
Epoch 49/50   -   loss: 0.00953   -   val_loss: 0.01492
Epoch 50/50   -   loss: 0.00933   -   val_loss: 0.01489
Train fold 4
Epoch 1/50   -   loss: 0.34893   -   val_loss: 0.05279
Epoch 2/50   -   loss: 0.03639   -   val_loss: 0.02396
Epoch 3/50   -   loss: 0.02313   -   val_loss: 0.02001
Epoch 4/50   -   loss: 0.02024   -   val_loss: 0.01874
Epoch 5/50   -   loss: 0.01945   -   val_loss: 0.01818
Epoch 6/50   -   loss: 0.01804   -   val_loss: 0.01739
Epoch 7/50   -   loss: 0.01754   -   val_loss: 0.01709
Epoch 8/50   -   loss: 0.01705   -   val_loss: 0.01658
Epoch 9/50   -   loss: 0.01671   -   val_loss: 0.01660
Epoch 10/50   -   loss: 0.01667   -   val_loss: 0.01651
Epoch 11/50   -   loss: 0.01621   -   val_lo

Epoch 37/50   -   loss: 0.01160   -   val_loss: 0.01460
Epoch 38/50   -   loss: 0.01146   -   val_loss: 0.01460
Epoch 39/50   -   loss: 0.01127   -   val_loss: 0.01459
Epoch 40/50   -   loss: 0.01102   -   val_loss: 0.01461
Epoch 41/50   -   loss: 0.01087   -   val_loss: 0.01460
Epoch 42/50   -   loss: 0.01074   -   val_loss: 0.01461
Epoch 43/50   -   loss: 0.01059   -   val_loss: 0.01459
Epoch 44/50   -   loss: 0.01041   -   val_loss: 0.01461
Epoch 45/50   -   loss: 0.01024   -   val_loss: 0.01462
Epoch 46/50   -   loss: 0.01012   -   val_loss: 0.01465
Epoch 47/50   -   loss: 0.00997   -   val_loss: 0.01465
Epoch 48/50   -   loss: 0.00984   -   val_loss: 0.01464
Epoch 49/50   -   loss: 0.00971   -   val_loss: 0.01470
Epoch 50/50   -   loss: 0.00954   -   val_loss: 0.01470


{'train': 0.011270263120296256, 'val': 0.01458931085653603}

In [109]:
run_eval_for_model(Model2, 2, train_X0, train_Y0, test_X0)

Overall score is 0.01089


In [110]:
class Model3(nn.Module):
    def __init__(self, num_columns):
        super(Model3, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.sigmoid(self.dense3(x))
        
        return x

In [111]:
run_CV_for_model(Model3, 3, train_X2, train_Y2, test_X2)

Train fold 1
Epoch 1/50   -   loss: 0.34673   -   val_loss: 0.05585
Epoch 2/50   -   loss: 0.03659   -   val_loss: 0.02352
Epoch 3/50   -   loss: 0.02339   -   val_loss: 0.02030
Epoch 4/50   -   loss: 0.02027   -   val_loss: 0.01888
Epoch 5/50   -   loss: 0.01885   -   val_loss: 0.01794
Epoch 6/50   -   loss: 0.01815   -   val_loss: 0.01756
Epoch 7/50   -   loss: 0.01761   -   val_loss: 0.01688
Epoch 8/50   -   loss: 0.01708   -   val_loss: 0.01651
Epoch 9/50   -   loss: 0.01672   -   val_loss: 0.01621
Epoch 10/50   -   loss: 0.01629   -   val_loss: 0.01594
Epoch 11/50   -   loss: 0.01594   -   val_loss: 0.01593
Epoch 12/50   -   loss: 0.01574   -   val_loss: 0.01574
Epoch 13/50   -   loss: 0.01554   -   val_loss: 0.01575
Epoch 14/50   -   loss: 0.01533   -   val_loss: 0.01558
Epoch 15/50   -   loss: 0.01509   -   val_loss: 0.01538
Epoch 16/50   -   loss: 0.01483   -   val_loss: 0.01523
Epoch 17/50   -   loss: 0.01466   -   val_loss: 0.01518
Epoch 18/50   -   loss: 0.01461   -   val_lo

Epoch 44/50   -   loss: 0.01027   -   val_loss: 0.01486
Epoch 45/50   -   loss: 0.01010   -   val_loss: 0.01488
Epoch 46/50   -   loss: 0.00994   -   val_loss: 0.01488
Epoch 47/50   -   loss: 0.00983   -   val_loss: 0.01491
Epoch 48/50   -   loss: 0.00969   -   val_loss: 0.01494
Epoch 49/50   -   loss: 0.00953   -   val_loss: 0.01497
Epoch 50/50   -   loss: 0.00939   -   val_loss: 0.01493
Train fold 4
Epoch 1/50   -   loss: 0.34887   -   val_loss: 0.05303
Epoch 2/50   -   loss: 0.03643   -   val_loss: 0.02413
Epoch 3/50   -   loss: 0.02307   -   val_loss: 0.02017
Epoch 4/50   -   loss: 0.02028   -   val_loss: 0.01868
Epoch 5/50   -   loss: 0.01901   -   val_loss: 0.01850
Epoch 6/50   -   loss: 0.01810   -   val_loss: 0.01757
Epoch 7/50   -   loss: 0.01758   -   val_loss: 0.01684
Epoch 8/50   -   loss: 0.01707   -   val_loss: 0.01662
Epoch 9/50   -   loss: 0.01653   -   val_loss: 0.01634
Epoch 10/50   -   loss: 0.01627   -   val_loss: 0.01611
Epoch 11/50   -   loss: 0.01594   -   val_lo

Epoch 37/50   -   loss: 0.01137   -   val_loss: 0.01448
Epoch 38/50   -   loss: 0.01123   -   val_loss: 0.01451
Epoch 39/50   -   loss: 0.01105   -   val_loss: 0.01449
Epoch 40/50   -   loss: 0.01088   -   val_loss: 0.01455
Epoch 41/50   -   loss: 0.01073   -   val_loss: 0.01451
Epoch 42/50   -   loss: 0.01065   -   val_loss: 0.01455
Epoch 43/50   -   loss: 0.01046   -   val_loss: 0.01453
Epoch 44/50   -   loss: 0.01034   -   val_loss: 0.01454
Epoch 45/50   -   loss: 0.01015   -   val_loss: 0.01454
Epoch 46/50   -   loss: 0.01003   -   val_loss: 0.01458
Epoch 47/50   -   loss: 0.00989   -   val_loss: 0.01459
Epoch 48/50   -   loss: 0.00972   -   val_loss: 0.01461
Epoch 49/50   -   loss: 0.00959   -   val_loss: 0.01464
Epoch 50/50   -   loss: 0.00942   -   val_loss: 0.01465


{'train': 0.011365584398419412, 'val': 0.014477502671070397}

In [112]:
run_eval_for_model(Model3, 3, train_X0, train_Y0, test_X0)

Overall score is 0.01051


In [117]:
class Model4(nn.Module):
    def __init__(self, num_columns):
        super(Model4, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.sigmoid(self.dense3(x))
        
        return x

In [158]:
train_Y4 = train_Y.reset_index(drop=True).append(t)
train_X4 = train_X.reset_index(drop=True)
test_X4 = test_X

all_X4 = train_X4.append(test_X4).drop(columns=['real_drug'])

features_g = [col for col in train_X4.columns if 'g-' in col]
features_c = [col for col in train_X4.columns if 'c-' in col]

all_X4['g_sum'] = all_X4[features_g].sum(axis = 1)
all_X4['g_mean'] = all_X4[features_g].mean(axis = 1)
all_X4['g_std'] = all_X4[features_g].std(axis = 1)
all_X4['g_kurt'] = all_X4[features_g].kurtosis(axis = 1)
all_X4['g_skew'] = all_X4[features_g].skew(axis = 1)
all_X4['c_sum'] = all_X4[features_c].sum(axis = 1)
all_X4['c_mean'] = all_X4[features_c].mean(axis = 1)
all_X4['c_std'] = all_X4[features_c].std(axis = 1)
all_X4['c_kurt'] = all_X4[features_c].kurtosis(axis = 1)
all_X4['c_skew'] = all_X4[features_c].skew(axis = 1)
all_X4['gc_sum'] = all_X4[features_g + features_c].sum(axis = 1)
all_X4['gc_mean'] = all_X4[features_g + features_c].mean(axis = 1)
all_X4['gc_std'] = all_X4[features_g + features_c].std(axis = 1)
all_X4['gc_kurt'] = all_X4[features_g + features_c].kurtosis(axis = 1)
all_X4['gc_skew'] = all_X4[features_g + features_c].skew(axis = 1)



In [159]:
scaler = GaussRankScaler()
all_X4 = scaler.fit_transform(all_X4)

In [160]:
pca_transformer = PCA(687)
all_X4 = pca_transformer.fit_transform(all_X4)

In [166]:
train_X4 = all_X4[:head4]
test_X4 = all_X4[head4:]

In [167]:
run_CV_for_model(Model4, 4, train_X4, train_Y4, test_X4)

Train fold 1
Epoch 1/50   -   loss: 0.36586   -   val_loss: 0.06679
Epoch 2/50   -   loss: 0.04082   -   val_loss: 0.02530
Epoch 3/50   -   loss: 0.02381   -   val_loss: 0.02034
Epoch 4/50   -   loss: 0.02053   -   val_loss: 0.01865
Epoch 5/50   -   loss: 0.01872   -   val_loss: 0.01774
Epoch 6/50   -   loss: 0.01767   -   val_loss: 0.01708
Epoch 7/50   -   loss: 0.01690   -   val_loss: 0.01674
Epoch 8/50   -   loss: 0.01638   -   val_loss: 0.01640
Epoch 9/50   -   loss: 0.01580   -   val_loss: 0.01624
Epoch 10/50   -   loss: 0.01534   -   val_loss: 0.01604
Epoch 11/50   -   loss: 0.01499   -   val_loss: 0.01591
Epoch 12/50   -   loss: 0.01461   -   val_loss: 0.01576
Epoch 13/50   -   loss: 0.01425   -   val_loss: 0.01564
Epoch 14/50   -   loss: 0.01383   -   val_loss: 0.01555
Epoch 15/50   -   loss: 0.01349   -   val_loss: 0.01543
Epoch 16/50   -   loss: 0.01312   -   val_loss: 0.01545
Epoch 17/50   -   loss: 0.01276   -   val_loss: 0.01543
Epoch 18/50   -   loss: 0.01244   -   val_lo

Epoch 44/50   -   loss: 0.00508   -   val_loss: 0.01635
Epoch 45/50   -   loss: 0.00492   -   val_loss: 0.01641
Epoch 46/50   -   loss: 0.00472   -   val_loss: 0.01645
Epoch 47/50   -   loss: 0.00457   -   val_loss: 0.01657
Epoch 48/50   -   loss: 0.00443   -   val_loss: 0.01670
Epoch 49/50   -   loss: 0.00433   -   val_loss: 0.01666
Epoch 50/50   -   loss: 0.00422   -   val_loss: 0.01674
Train fold 4
Epoch 1/50   -   loss: 0.36759   -   val_loss: 0.05772
Epoch 2/50   -   loss: 0.04106   -   val_loss: 0.02437
Epoch 3/50   -   loss: 0.02381   -   val_loss: 0.02005
Epoch 4/50   -   loss: 0.02023   -   val_loss: 0.01854
Epoch 5/50   -   loss: 0.01864   -   val_loss: 0.01774
Epoch 6/50   -   loss: 0.01762   -   val_loss: 0.01728
Epoch 7/50   -   loss: 0.01687   -   val_loss: 0.01676
Epoch 8/50   -   loss: 0.01624   -   val_loss: 0.01657
Epoch 9/50   -   loss: 0.01584   -   val_loss: 0.01641
Epoch 10/50   -   loss: 0.01545   -   val_loss: 0.01620
Epoch 11/50   -   loss: 0.01500   -   val_lo

Epoch 37/50   -   loss: 0.00640   -   val_loss: 0.01558
Epoch 38/50   -   loss: 0.00629   -   val_loss: 0.01560
Epoch 39/50   -   loss: 0.00608   -   val_loss: 0.01567
Epoch 40/50   -   loss: 0.00586   -   val_loss: 0.01573
Epoch 41/50   -   loss: 0.00566   -   val_loss: 0.01579
Epoch 42/50   -   loss: 0.00555   -   val_loss: 0.01581
Epoch 43/50   -   loss: 0.00532   -   val_loss: 0.01594
Epoch 44/50   -   loss: 0.00515   -   val_loss: 0.01594
Epoch 45/50   -   loss: 0.00502   -   val_loss: 0.01602
Epoch 46/50   -   loss: 0.00485   -   val_loss: 0.01608
Epoch 47/50   -   loss: 0.00468   -   val_loss: 0.01622
Epoch 48/50   -   loss: 0.00462   -   val_loss: 0.01615
Epoch 49/50   -   loss: 0.00440   -   val_loss: 0.01626
Epoch 50/50   -   loss: 0.00429   -   val_loss: 0.01635


{'train': 0.011721657925061415, 'val': 0.015069309854879975}

In [172]:
run_eval_for_model(Model4, 4, train_X4[:train_X.shape[0]][train_X['real_drug']], train_Y4[:train_Y.shape[0]][train_X['real_drug']], test_X4)

Overall score is 0.01015


In [176]:
class Model5(nn.Module):
    def __init__(self, num_columns):
        super(Model5, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.sigmoid(self.dense3(x))
        
        return x

In [178]:
train_X5 = train_X4
test_X5 = test_X4
train_Y5 = train_Y4.copy()
alpha_smoothing = 0.001
train_Y5 = (1 - alpha_smoothing) * train_Y5 + alpha_smoothing * train_Y5.mean(axis=1)[:, None]

In [179]:
run_CV_for_model(Model5, 5, train_X5, train_Y5, test_X5)

Train fold 1
Epoch 1/50   -   loss: 0.36745   -   val_loss: 0.06441
Epoch 2/50   -   loss: 0.04121   -   val_loss: 0.02531
Epoch 3/50   -   loss: 0.02399   -   val_loss: 0.02044
Epoch 4/50   -   loss: 0.02040   -   val_loss: 0.01866
Epoch 5/50   -   loss: 0.01873   -   val_loss: 0.01777
Epoch 6/50   -   loss: 0.01766   -   val_loss: 0.01715
Epoch 7/50   -   loss: 0.01692   -   val_loss: 0.01666
Epoch 8/50   -   loss: 0.01630   -   val_loss: 0.01642
Epoch 9/50   -   loss: 0.01579   -   val_loss: 0.01620
Epoch 10/50   -   loss: 0.01541   -   val_loss: 0.01606
Epoch 11/50   -   loss: 0.01501   -   val_loss: 0.01587
Epoch 12/50   -   loss: 0.01454   -   val_loss: 0.01574
Epoch 13/50   -   loss: 0.01419   -   val_loss: 0.01561
Epoch 14/50   -   loss: 0.01385   -   val_loss: 0.01560
Epoch 15/50   -   loss: 0.01344   -   val_loss: 0.01550
Epoch 16/50   -   loss: 0.01307   -   val_loss: 0.01544
Epoch 17/50   -   loss: 0.01271   -   val_loss: 0.01547
Epoch 18/50   -   loss: 0.01239   -   val_lo

Epoch 44/50   -   loss: 0.00581   -   val_loss: 0.01597
Epoch 45/50   -   loss: 0.00563   -   val_loss: 0.01613
Epoch 46/50   -   loss: 0.00553   -   val_loss: 0.01613
Epoch 47/50   -   loss: 0.00533   -   val_loss: 0.01620
Epoch 48/50   -   loss: 0.00518   -   val_loss: 0.01626
Epoch 49/50   -   loss: 0.00500   -   val_loss: 0.01634
Epoch 50/50   -   loss: 0.00486   -   val_loss: 0.01639
Train fold 4
Epoch 1/50   -   loss: 0.36519   -   val_loss: 0.05698
Epoch 2/50   -   loss: 0.04112   -   val_loss: 0.02351
Epoch 3/50   -   loss: 0.02427   -   val_loss: 0.01967
Epoch 4/50   -   loss: 0.02041   -   val_loss: 0.01830
Epoch 5/50   -   loss: 0.01876   -   val_loss: 0.01756
Epoch 6/50   -   loss: 0.01766   -   val_loss: 0.01712
Epoch 7/50   -   loss: 0.01689   -   val_loss: 0.01675
Epoch 8/50   -   loss: 0.01634   -   val_loss: 0.01650
Epoch 9/50   -   loss: 0.01586   -   val_loss: 0.01627
Epoch 10/50   -   loss: 0.01544   -   val_loss: 0.01616
Epoch 11/50   -   loss: 0.01502   -   val_lo

Epoch 37/50   -   loss: 0.00745   -   val_loss: 0.01583
Epoch 38/50   -   loss: 0.00720   -   val_loss: 0.01586
Epoch 39/50   -   loss: 0.00704   -   val_loss: 0.01588
Epoch 40/50   -   loss: 0.00681   -   val_loss: 0.01593
Epoch 41/50   -   loss: 0.00665   -   val_loss: 0.01599
Epoch 42/50   -   loss: 0.00640   -   val_loss: 0.01601
Epoch 43/50   -   loss: 0.00617   -   val_loss: 0.01612
Epoch 44/50   -   loss: 0.00601   -   val_loss: 0.01613
Epoch 45/50   -   loss: 0.00582   -   val_loss: 0.01616
Epoch 46/50   -   loss: 0.00561   -   val_loss: 0.01629
Epoch 47/50   -   loss: 0.00547   -   val_loss: 0.01630
Epoch 48/50   -   loss: 0.00524   -   val_loss: 0.01636
Epoch 49/50   -   loss: 0.00507   -   val_loss: 0.01648
Epoch 50/50   -   loss: 0.00500   -   val_loss: 0.01648


{'train': 0.009807500911617204, 'val': 0.0155167521443218}

In [180]:
run_eval_for_model(Model5, 5, train_X5[:train_X.shape[0]][train_X['real_drug']], train_Y5[:train_Y.shape[0]][train_X['real_drug']], test_X5)

Overall score is 0.00972
