In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import random
import os
import sys
from pathlib import Path
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings('ignore')

#used net arch from kaggle.com/nicohrubec/pytorch-multilabel-neural-network/

In [2]:
import numpy as np
from joblib import Parallel, delayed
from scipy.interpolate import interp1d
from scipy.special import erf, erfinv
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class GaussRankScaler(BaseEstimator, TransformerMixin):
    """Transform features by scaling each feature to a normal distribution.
    Parameters
        ----------
        epsilon : float, optional, default 1e-4
            A small amount added to the lower bound or subtracted
            from the upper bound. This value prevents infinite number
            from occurring when applying the inverse error function.
        copy : boolean, optional, default True
            If False, try to avoid a copy and do inplace scaling instead.
            This is not guaranteed to always work inplace; e.g. if the data is
            not a NumPy array, a copy may still be returned.
        n_jobs : int or None, optional, default None
            Number of jobs to run in parallel.
            ``None`` means 1 and ``-1`` means using all processors.
        interp_kind : str or int, optional, default 'linear'
           Specifies the kind of interpolation as a string
            ('linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
            'previous', 'next', where 'zero', 'slinear', 'quadratic' and 'cubic'
            refer to a spline interpolation of zeroth, first, second or third
            order; 'previous' and 'next' simply return the previous or next value
            of the point) or as an integer specifying the order of the spline
            interpolator to use.
        interp_copy : bool, optional, default False
            If True, the interpolation function makes internal copies of x and y.
            If False, references to `x` and `y` are used.
        Attributes
        ----------
        interp_func_ : list
            The interpolation function for each feature in the training set.
        """

    def __init__(self, epsilon=1e-4, copy=True, n_jobs=None, interp_kind='linear', interp_copy=False):
        self.epsilon = epsilon
        self.copy = copy
        self.interp_kind = interp_kind
        self.interp_copy = interp_copy
        self.fill_value = 'extrapolate'
        self.n_jobs = n_jobs

    def fit(self, X, y=None):
        """Fit interpolation function to link rank with original data for future scaling
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The data used to fit interpolation function for later scaling along the features axis.
        y
            Ignored
        """
        X = check_array(X, copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite=True)

        self.interp_func_ = Parallel(n_jobs=self.n_jobs)(delayed(self._fit)(x) for x in X.T)
        return self

    def _fit(self, x):
        x = self.drop_duplicates(x)
        rank = np.argsort(np.argsort(x))
        bound = 1.0 - self.epsilon
        factor = np.max(rank) / 2.0 * bound
        scaled_rank = np.clip(rank / factor - bound, -bound, bound)
        return interp1d(
            x, scaled_rank, kind=self.interp_kind, copy=self.interp_copy, fill_value=self.fill_value)

    def transform(self, X, copy=None):
        """Scale the data with the Gauss Rank algorithm
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The data used to scale along the features axis.
        copy : bool, optional (default: None)
            Copy the input X or not.
        """
        check_is_fitted(self, 'interp_func_')

        copy = copy if copy is not None else self.copy
        X = check_array(X, copy=copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite=True)

        X = np.array(Parallel(n_jobs=self.n_jobs)(delayed(self._transform)(i, x) for i, x in enumerate(X.T))).T
        return X

    def _transform(self, i, x):
        return erfinv(self.interp_func_[i](x))

    def inverse_transform(self, X, copy=None):
        """Scale back the data to the original representation
        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data used to scale along the features axis.
        copy : bool, optional (default: None)
            Copy the input X or not.
        """
        check_is_fitted(self, 'interp_func_')

        copy = copy if copy is not None else self.copy
        X = check_array(X, copy=copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite=True)

        X = np.array(Parallel(n_jobs=self.n_jobs)(delayed(self._inverse_transform)(i, x) for i, x in enumerate(X.T))).T
        return X

    def _inverse_transform(self, i, x):
        inv_interp_func = interp1d(self.interp_func_[i].y, self.interp_func_[i].x, kind=self.interp_kind,
                                   copy=self.interp_copy, fill_value=self.fill_value)
        return inv_interp_func(erf(x))

    @staticmethod
    def drop_duplicates(x):
        is_unique = np.zeros_like(x, dtype=bool)
        is_unique[np.unique(x, return_index=True)[1]] = True
        return x[is_unique]

submit[targets] = preds
submit.loc[X_test['cp_type']=='ctl_vehicle', targets] = 0
submit.to_csv('submission.csv', index=False)

In [3]:
import ctypes
ctypes.cdll.LoadLibrary('caffe2_nvrtc.dll')

<CDLL 'caffe2_nvrtc.dll', handle 7ffae3e70000 at 0x21c83456f10>

In [4]:
seed = 42

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
set_seed(seed)

In [5]:
p_min = 1e-15
p_max = 1 - p_min

def score(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    y_pred = np.clip(y_pred, p_min, p_max)
    return -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)).mean()

In [6]:
train_X = pd.read_csv('../input/lish-moa/train_features.csv', index_col='sig_id')
test_Y = pd.read_csv('../input/lish-moa/sample_submission.csv', index_col='sig_id')
train_Y = pd.read_csv('../input/lish-moa/train_targets_scored.csv', index_col='sig_id', dtype={f: test_Y.dtypes[f] for f in test_Y})
test_X = pd.read_csv('../input/lish-moa/test_features.csv', index_col='sig_id')

In [7]:
train_X.cp_time = train_X.cp_time / 24
test_X.cp_time = test_X.cp_time / 24

train_X['real_drug'] = train_X.cp_type == 'trt_cp'
test_X['real_drug'] = test_X.cp_type == 'trt_cp'

t = train_X.cp_dose.copy()
train_X.drop(columns=['cp_dose', 'cp_type'], inplace=True)
train_X['cp_dose'] = 1
train_X.loc[(t == 'D2'), 'cp_dose'] = 2

t = test_X.cp_dose.copy()
test_X.drop(columns=['cp_dose', 'cp_type'], inplace=True)
test_X['cp_dose'] = 1
test_X.loc[(t == 'D2'), 'cp_dose'] = 2

In [8]:
nfolds = 6
nstarts = 1
nepochs = 50
batch_size = 128
val_batch_size = batch_size * 4
criterion = nn.BCELoss()
kfold = MultilabelStratifiedKFold(n_splits=nfolds, random_state=517, shuffle=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
class Dataset_my(Dataset):
    def __init__(self, df, targets, mode='train'):
        self.mode = mode
        #self.feats = feats_idx
        #self.data = df[:, feats_idx]
        self.data = df
        if mode=='train':
            self.targets = targets
    
    def __getitem__(self, idx):
        if self.mode == 'train':
            return torch.FloatTensor(self.data[idx]), torch.FloatTensor(self.targets[idx])
        elif self.mode == 'test':
            return torch.FloatTensor(self.data[idx]), 0
        
    def __len__(self):
        return len(self.data)

In [10]:
def run_CV_for_model(cur_model, model_num, train_X_loc, train_Y_loc, test_X_loc):
    set_seed(seed)
    for n, (tr, te) in enumerate(kfold.split(train_Y_loc, train_Y_loc)):
        print(f'Train fold {n+1}')
        xtrain, xval = train_X_loc[tr], train_X_loc[te]
        ytrain, yval = train_Y_loc[tr], train_Y_loc[te]

        train_set = Dataset_my(xtrain, ytrain)
        val_set = Dataset_my(xval, yval)

        dataloaders = {
            'train': DataLoader(train_set, batch_size=batch_size, shuffle=True),
            'val': DataLoader(val_set, batch_size=val_batch_size, shuffle=False)
        }

        model = cur_model(train_X_loc.shape[1]).to(device)
        Path(f'./saved_params/model{model_num}').mkdir(parents=True, exist_ok=True)
        checkpoint_path = f'./saved_params/model{model_num}/repeat_{1}_Fold_{n+1}.pt'
        optimizer = optim.Adam(model.parameters(), weight_decay=1e-5)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, eps=1e-4, verbose=True)
        best_loss = {'train': np.inf, 'val': np.inf}

        for epoch in range(nepochs):
            epoch_loss = {'train': 0.0, 'val': 0.0}

            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0

                for i, (x, y) in enumerate(dataloaders[phase]):
                    x, y = x.to(device), y.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase=='train'):
                        preds = model(x)
                        loss = criterion(preds, y)

                        if phase=='train':
                            loss.backward()
                            optimizer.step()

                    running_loss += loss.item() / len(dataloaders[phase])

                epoch_loss[phase] = running_loss

            print("Epoch {}/{}   -   loss: {:5.5f}   -   val_loss: {:5.5f}".format(epoch+1, nepochs, epoch_loss['train'], epoch_loss['val']))

            scheduler.step(epoch_loss['val'])

            if epoch_loss['val'] < best_loss['val']:
                best_loss = epoch_loss
                torch.save(model.state_dict(), checkpoint_path)
    return best_loss

In [26]:
def run_predict_for_model(cur_model, model_num, test_X_loc):
    preds = np.zeros((test_X_loc.shape[0], test_Y.shape[1], nfolds))
    
    for n in range(nfolds):
        test_set = Dataset_my(test_X_loc, None, mode='test')
        dataloader = DataLoader(test_set, batch_size=val_batch_size, shuffle=False)
        
        checkpoint_path = f'./saved_params/model{model_num}/repeat_{1}_Fold_{n+1}.pt'
        model = cur_model(test_X_loc.shape[1]).to(device)
        model.load_state_dict(torch.load(checkpoint_path))
        model.eval()
        
        fold_preds = []
        for i, (x, y) in enumerate(dataloader):
            x = x.to(device)

            with torch.no_grad():
                fold_preds.append(model(x))
            
        fold_preds = torch.cat(fold_preds, dim=0).cpu().numpy()
        preds[:, :, n] = fold_preds
    preds = preds.mean(axis=2)
    return preds

In [20]:
class Model4(nn.Module):
    def __init__(self, num_columns):
        super(Model4, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_columns)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_columns, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm3 = nn.BatchNorm1d(1024)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1024, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.sigmoid(self.dense3(x))
        
        return x

In [12]:
t = test_Y[test_X['real_drug'] == False]
for f in t:
    t[f] = 0
train_Y4 = train_Y.reset_index(drop=True).append(t)
train_X4 = train_X.reset_index(drop=True)
test_X4 = test_X

all_X4 = train_X4.append(test_X4).drop(columns=['real_drug'])

features_g = [col for col in train_X4.columns if 'g-' in col]
features_c = [col for col in train_X4.columns if 'c-' in col]

all_X4['g_sum'] = all_X4[features_g].sum(axis = 1)
all_X4['g_mean'] = all_X4[features_g].mean(axis = 1)
all_X4['g_std'] = all_X4[features_g].std(axis = 1)
all_X4['g_kurt'] = all_X4[features_g].kurtosis(axis = 1)
all_X4['g_skew'] = all_X4[features_g].skew(axis = 1)
all_X4['c_sum'] = all_X4[features_c].sum(axis = 1)
all_X4['c_mean'] = all_X4[features_c].mean(axis = 1)
all_X4['c_std'] = all_X4[features_c].std(axis = 1)
all_X4['c_kurt'] = all_X4[features_c].kurtosis(axis = 1)
all_X4['c_skew'] = all_X4[features_c].skew(axis = 1)
all_X4['gc_sum'] = all_X4[features_g + features_c].sum(axis = 1)
all_X4['gc_mean'] = all_X4[features_g + features_c].mean(axis = 1)
all_X4['gc_std'] = all_X4[features_g + features_c].std(axis = 1)
all_X4['gc_kurt'] = all_X4[features_g + features_c].kurtosis(axis = 1)
all_X4['gc_skew'] = all_X4[features_g + features_c].skew(axis = 1)



In [13]:
scaler = GaussRankScaler()
all_X4 = scaler.fit_transform(all_X4)

In [14]:
pca_transformer = PCA(687)
all_X4 = pca_transformer.fit_transform(all_X4)

In [15]:
train_X4 = all_X4[:train_X4.shape[0]]
test_X4 = all_X4[train_X4.shape[0]:]

In [16]:
train_X4 = np.vstack([train_X4, test_X4[test_X['real_drug'] == False]])

In [17]:
alpha_smoothing = 0.001
train_Y4 = (1 - alpha_smoothing) * train_Y4 + alpha_smoothing * train_Y4.mean(axis=1)[:, None]

In [18]:
train_Y4 = train_Y4.values

In [21]:
run_CV_for_model(Model4, 4, train_X4, train_Y4, test_X4)

Train fold 1
Epoch 1/50   -   loss: 0.36709   -   val_loss: 0.06566
Epoch 2/50   -   loss: 0.04150   -   val_loss: 0.02564
Epoch 3/50   -   loss: 0.02403   -   val_loss: 0.02049
Epoch 4/50   -   loss: 0.02037   -   val_loss: 0.01863
Epoch 5/50   -   loss: 0.01872   -   val_loss: 0.01771
Epoch 6/50   -   loss: 0.01765   -   val_loss: 0.01711
Epoch 7/50   -   loss: 0.01686   -   val_loss: 0.01664
Epoch 8/50   -   loss: 0.01628   -   val_loss: 0.01641
Epoch 9/50   -   loss: 0.01581   -   val_loss: 0.01620
Epoch 10/50   -   loss: 0.01549   -   val_loss: 0.01605
Epoch 11/50   -   loss: 0.01503   -   val_loss: 0.01593
Epoch 12/50   -   loss: 0.01455   -   val_loss: 0.01573
Epoch 13/50   -   loss: 0.01419   -   val_loss: 0.01565
Epoch 14/50   -   loss: 0.01389   -   val_loss: 0.01559
Epoch 15/50   -   loss: 0.01348   -   val_loss: 0.01552
Epoch 16/50   -   loss: 0.01312   -   val_loss: 0.01549
Epoch 17/50   -   loss: 0.01272   -   val_loss: 0.01551
Epoch 18/50   -   loss: 0.01244   -   val_lo

Epoch 44/50   -   loss: 0.00549   -   val_loss: 0.01609
Epoch 45/50   -   loss: 0.00533   -   val_loss: 0.01620
Epoch 46/50   -   loss: 0.00516   -   val_loss: 0.01626
Epoch 47/50   -   loss: 0.00501   -   val_loss: 0.01629
Epoch 48/50   -   loss: 0.00490   -   val_loss: 0.01639
Epoch 49/50   -   loss: 0.00470   -   val_loss: 0.01646
Epoch 50/50   -   loss: 0.00454   -   val_loss: 0.01649
Train fold 4
Epoch 1/50   -   loss: 0.36530   -   val_loss: 0.05661
Epoch 2/50   -   loss: 0.04129   -   val_loss: 0.02345
Epoch 3/50   -   loss: 0.02445   -   val_loss: 0.01969
Epoch 4/50   -   loss: 0.02038   -   val_loss: 0.01837
Epoch 5/50   -   loss: 0.01876   -   val_loss: 0.01758
Epoch 6/50   -   loss: 0.01763   -   val_loss: 0.01716
Epoch 7/50   -   loss: 0.01692   -   val_loss: 0.01680
Epoch 8/50   -   loss: 0.01636   -   val_loss: 0.01650
Epoch 9/50   -   loss: 0.01584   -   val_loss: 0.01629
Epoch 10/50   -   loss: 0.01544   -   val_loss: 0.01617
Epoch 11/50   -   loss: 0.01503   -   val_lo

Epoch 37/50   -   loss: 0.00671   -   val_loss: 0.01598
Epoch 38/50   -   loss: 0.00643   -   val_loss: 0.01607
Epoch 39/50   -   loss: 0.00631   -   val_loss: 0.01608
Epoch 40/50   -   loss: 0.00609   -   val_loss: 0.01615
Epoch 41/50   -   loss: 0.00592   -   val_loss: 0.01620
Epoch 42/50   -   loss: 0.00570   -   val_loss: 0.01627
Epoch 43/50   -   loss: 0.00552   -   val_loss: 0.01636
Epoch 44/50   -   loss: 0.00535   -   val_loss: 0.01637
Epoch 45/50   -   loss: 0.00519   -   val_loss: 0.01639
Epoch 46/50   -   loss: 0.00500   -   val_loss: 0.01652
Epoch 47/50   -   loss: 0.00490   -   val_loss: 0.01660
Epoch 48/50   -   loss: 0.00472   -   val_loss: 0.01663
Epoch 49/50   -   loss: 0.00453   -   val_loss: 0.01675
Epoch 50/50   -   loss: 0.00448   -   val_loss: 0.01676


{'train': 0.011962243980621989, 'val': 0.015516783576458693}

In [22]:
run_eval_for_model(Model4, 4, train_X4[:train_X.shape[0]][train_X['real_drug']], train_Y4[:train_Y.shape[0]][train_X['real_drug']], test_X4)

Overall score is 0.00994


In [37]:
train_P4 = run_predict_for_model(Model4, 4, train_X4)

In [38]:
train_P4.shape, train_Y4.shape

((24172, 206), (24172, 206))

In [41]:
score(train_Y4, train_P4)

0.008982095616991351

In [45]:
train_P4[:train_X.shape[0]][train_X['real_drug'] == False] = 0
train_P4[train_X.shape[0]] = 0

In [46]:
score(train_Y4, train_P4)

0.008836717233532172

In [65]:
test_P = run_predict_for_model(Model4, 4, test_X4)

In [73]:
test_Y[list(test_Y.columns)] = test_P
test_Y[test_X['real_drug'] == False] = 0

In [74]:
test_Y

Unnamed: 0_level_0,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
sig_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
id_0004d9e33,0.002171,0.001328,0.002253,0.017687,0.026022,0.003741,0.001546,0.010823,0.000312,0.014732,...,0.000730,0.003549,0.003592,0.001130,0.000549,0.000762,0.003203,0.003871,0.004933,0.001944
id_001897cda,0.000631,0.000748,0.001614,0.003659,0.003010,0.002421,0.002964,0.006331,0.003306,0.006086,...,0.000918,0.000760,0.004370,0.000536,0.003929,0.000454,0.001155,0.000828,0.001719,0.002813
id_002429b5b,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
id_00276f245,0.000620,0.000638,0.001117,0.023269,0.007211,0.006435,0.001940,0.003666,0.000217,0.007372,...,0.000355,0.000682,0.004195,0.027139,0.004764,0.000536,0.001166,0.001717,0.000462,0.001656
id_0027f1083,0.001551,0.001067,0.001734,0.013590,0.015821,0.002559,0.004305,0.002512,0.000386,0.009790,...,0.000523,0.000220,0.002824,0.001352,0.000399,0.000425,0.001223,0.001345,0.000292,0.001627
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
id_ff7004b87,0.000926,0.000516,0.001241,0.002234,0.017332,0.002486,0.002286,0.002138,0.000636,0.005145,...,0.000469,0.000968,0.001773,0.038757,0.008308,0.000429,0.004527,0.002440,0.000987,0.002238
id_ff925dd0d,0.001859,0.000952,0.001260,0.007831,0.039036,0.005091,0.002150,0.004809,0.000385,0.011100,...,0.000375,0.001357,0.001794,0.001146,0.001845,0.000708,0.001228,0.001549,0.000773,0.001588
id_ffb710450,0.001665,0.000586,0.001574,0.020110,0.054895,0.002837,0.002792,0.003713,0.000273,0.015742,...,0.000340,0.001018,0.002779,0.001120,0.001397,0.000354,0.001211,0.001487,0.000394,0.001623
id_ffbb869f2,0.000973,0.001788,0.000975,0.015097,0.005949,0.003971,0.002500,0.001956,0.000744,0.030438,...,0.000314,0.000235,0.003044,0.000503,0.001767,0.000501,0.002656,0.001747,0.000293,0.002505


In [75]:
test_Y.to_csv('submission.csv')