In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
from copy import deepcopy as dp
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from scipy.spatial import ConvexHull

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
import torchvision.models as models

import pickle
from pickle import dump, load

import warnings
warnings.filterwarnings('ignore')

experiment_name="Resnet_deepinsight"
model_output_folder = f"./{experiment_name}"
os.makedirs(model_output_folder, exist_ok=True)

def g_table(list1):
    table_dic = {}
    for i in list1:
        if i not in table_dic.keys():
            table_dic[i] = 1
        else:
            table_dic[i] += 1
    return(table_dic)

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [None]:
model = models.resnet50(pretrained = True)
model.load_state_dict(torch.load('../input/resnet50/resnet50.pth'))

In [None]:
SEED = [0]
input_dir = '../input/lish-moa/'

sc_dic = {}
feat_dic = {}
train_features = pd.read_csv(input_dir+'train_features.csv')
train_targets_scored = pd.read_csv(input_dir+'train_targets_scored.csv')
train_targets_nonscored = pd.read_csv(input_dir+'train_targets_nonscored.csv')
test_features = pd.read_csv(input_dir+'test_features.csv')
sample_submission = pd.read_csv(input_dir+'sample_submission.csv')
train_drug = pd.read_csv(input_dir+'train_drug.csv')

target_cols = train_targets_scored.drop('sig_id', axis=1).columns.values.tolist()
target_nonsc_cols = train_targets_nonscored.drop('sig_id', axis=1).columns.values.tolist()

######## non-score ########
nonctr_id = train_features.loc[:,'sig_id'].tolist()
tmp_con1 = [i in nonctr_id for i in train_targets_scored['sig_id']]
mat_cor = pd.DataFrame(np.corrcoef(train_targets_scored.drop('sig_id',axis = 1)[tmp_con1].T,
                      train_targets_nonscored.drop('sig_id',axis = 1)[tmp_con1].T))
mat_cor2 = mat_cor.iloc[(train_targets_scored.shape[1]-1):,0:train_targets_scored.shape[1]-1]
mat_cor2.index = target_nonsc_cols
mat_cor2.columns = target_cols
mat_cor2 = mat_cor2.dropna()
mat_cor2_max = mat_cor2.abs().max(axis = 1)

q_n_cut = 0.9
target_nonsc_cols2 = mat_cor2_max[mat_cor2_max > np.quantile(mat_cor2_max,q_n_cut)].index.tolist()
print(len(target_nonsc_cols2))

GENES = [col for col in train_features.columns if col.startswith('g-')]
CELLS = [col for col in train_features.columns if col.startswith('c-')]
feat_dic['gene'] = GENES
feat_dic['cell'] = CELLS
train = train_features.merge(train_targets_scored, on='sig_id')
train = train.merge(train_targets_nonscored[['sig_id']+target_nonsc_cols2], on='sig_id')

target = train[['sig_id']+target_cols]
target_ns = train[['sig_id']+target_nonsc_cols2]

# train0 = train.drop('cp_type', axis=1)
# test = test.drop('cp_type', axis=1)
train0 = train
test = test_features

for df in [train0, test]:
    df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1})
    df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
    df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1})

target_cols = target.drop('sig_id', axis=1).columns.values.tolist()

# drug ids
tar_sig = target['sig_id'].tolist()
train_drug = train_drug.loc[[i in tar_sig for i in train_drug['sig_id']]]
target = target.merge(train_drug, on='sig_id', how='left') 

# LOCATE DRUGS
vc = train_drug.drug_id.value_counts()
vc1 = vc.loc[vc <= 19].index
vc2 = vc.loc[vc > 19].index

feature_cols = []
for key_i in feat_dic.keys():
    value_i = feat_dic[key_i]
    print(key_i,len(value_i))
    feature_cols += value_i
len(feature_cols)
feature_cols0 = dp(feature_cols)
    
oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))



In [None]:
# Averaging on multiple SEEDS
for seed in SEED:

    seed_everything(seed=seed)
    folds = train0.copy()
    feature_cols = dp(feature_cols0)
    
    # kfold - leave drug out
    target2 = target.copy()
    dct1 = {}; dct2 = {}
    skf = MultilabelStratifiedKFold(n_splits = 5) # , shuffle = True, random_state = seed
    tmp = target2.groupby('drug_id')[target_cols].mean().loc[vc1]
    tmp_idx = tmp.index.tolist()
    tmp_idx.sort()
    tmp_idx2 = random.sample(tmp_idx,len(tmp_idx))
    tmp = tmp.loc[tmp_idx2]
    for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_cols])):
        dd = {k:fold for k in tmp.index[idxV].values}
        dct1.update(dd)

    # STRATIFY DRUGS MORE THAN 19X
    skf = MultilabelStratifiedKFold(n_splits = 5) # , shuffle = True, random_state = seed
    tmp = target2.loc[target2.drug_id.isin(vc2)].reset_index(drop = True)
    tmp_idx = tmp.index.tolist()
    tmp_idx.sort()
    tmp_idx2 = random.sample(tmp_idx,len(tmp_idx))
    tmp = tmp.loc[tmp_idx2]
    for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_cols])):
        dd = {k:fold for k in tmp.sig_id[idxV].values}
        dct2.update(dd)

    target2['kfold'] = target2.drug_id.map(dct1)
    target2.loc[target2.kfold.isna(),'kfold'] = target2.loc[target2.kfold.isna(),'sig_id'].map(dct2)
    target2.kfold = target2.kfold.astype(int)

    folds['kfold'] = target2['kfold'].copy()

    train = folds.copy()
    test_ = test.copy()

    # HyperParameters
    DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
    EPOCHS = 15
    BATCH_SIZE = 48
    LEARNING_RATE = 1e-3
    WEIGHT_DECAY = 1e-5
    NFOLDS = 5
    EARLY_STOPPING_STEPS = 10
    EARLY_STOP = False

    n_comp1 = 50
    n_comp2 = 15

    num_features=len(feature_cols) + n_comp1 + n_comp2
    num_targets=len(target_cols)
    num_targets_0=len(target_nonsc_cols2)
    hidden_size=4096

    tar_freq = np.array([np.min(list(g_table(train[target_cols].iloc[:,i]).values())) for i in range(len(target_cols))])
    tar_weight0 = np.array([np.log(i+100) for i in tar_freq])
    tar_weight0_min = dp(np.min(tar_weight0))
    tar_weight = tar_weight0_min/tar_weight0
    pos_weight = torch.tensor(tar_weight).to(DEVICE)
    from torch.nn.modules.loss import _WeightedLoss
    class SmoothBCEwLogits(_WeightedLoss):
        def __init__(self, weight=None, reduction='mean', smoothing=0.0):
            super().__init__(weight=weight, reduction=reduction)
            self.smoothing = smoothing
            self.weight = weight
            self.reduction = reduction

        @staticmethod
        def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
            assert 0 <= smoothing < 1
            with torch.no_grad():
                targets = targets * (1.0 - smoothing) + 0.5 * smoothing
            return targets

        def forward(self, inputs, targets):
            targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
                self.smoothing)
            loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

            if  self.reduction == 'sum':
                loss = loss.sum()
            elif  self.reduction == 'mean':
                loss = loss.mean()

            return loss
        
    # Alicia - DeepInsight
    class LogScaler:
        """Log normalize and scale data

        Log normalization and scaling procedure as described as norm-2 in the
        DeepInsight paper supplementary information.

        Note: The dimensions of input matrix is (N samples, d features)
        """
        def __init__(self):
            self._min0 = None
            self._max = None

        """
        Use this as a preprocessing step in inference mode.
        """
        def fit(self, X, y=None):
            # Min. of training set per feature
            self._min0 = X.min(axis=0)

            # Log normalized X by log(X + _min0 + 1)
            X_norm = np.log(X + np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) + 1).clip(min=0, max=None)

            # Global max. of training set from X_norm
            self._max = X_norm.max()

        """
        For training set only.
        """
        def fit_transform(self, X, y=None):
            # Min. of training set per feature
            self._min0 = X.min(axis=0)

            # Log normalized X by log(X + _min0 + 1)
            X_norm = np.log(X + np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) + 1).clip(min=0, max=None)

            # Global max. of training set from X_norm
            self._max = X_norm.max()

            # Normalized again by global max. of training set
            return (X_norm / self._max).clip(0, 1)

        """
        For validation and test set only.
        """
        def transform(self, X, y=None):
            # Adjust min. of each feature of X by _min0
            for i in range(X.shape[1]):
                X[:, i] = X[:, i].clip(min=self._min0[i], max=None)

            # Log normalized X by log(X + _min0 + 1)
            X_norm = np.log(
                X +
                np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
                1).clip(min=0, max=None)

            # Normalized again by global max. of training set
            return (X_norm / self._max).clip(0, 1)
    
    class DeepInsightTransformer:
        """Transform features to an image matrix using dimensionality reduction

        This class takes in data normalized between 0 and 1 and converts it to a
        CNN compatible 'image' matrix

        """
        def __init__(self,
                     feature_extractor='tsne',
                     perplexity=30,
                     pixels=100,
                     random_state=None,
                     n_jobs=None):
            """Generate an ImageTransformer instance

            Args:
                feature_extractor: string of value ('tsne', 'pca', 'kpca') or a
                    class instance with method `fit_transform` that returns a
                    2-dimensional array of extracted features.
                pixels: int (square matrix) or tuple of ints (height, width) that
                    defines the size of the image matrix.
                random_state: int or RandomState. Determines the random number
                    generator, if present, of a string defined feature_extractor.
                n_jobs: The number of parallel jobs to run for a string defined
                    feature_extractor.
            """
            self.random_state = random_state
            self.n_jobs = n_jobs

            if isinstance(feature_extractor, str):
                fe = feature_extractor.casefold()
                if fe == 'tsne_exact'.casefold():
                    fe = TSNE(n_components=2,
                              metric='cosine',
                              perplexity=perplexity,
                              n_iter=1000,
                              method='exact',
                              random_state=self.random_state,
                              n_jobs=self.n_jobs)
                elif fe == 'tsne'.casefold():
                    fe = TSNE(n_components=2,
                              metric='cosine',
                              perplexity=perplexity,
                              n_iter=1000,
                              method='barnes_hut',
                              random_state=self.random_state,
                              n_jobs=self.n_jobs)
                elif fe == 'pca'.casefold():
                    fe = PCA(n_components=2, random_state=self.random_state)
                elif fe == 'kpca'.casefold():
                    fe = KernelPCA(n_components=2,
                                   kernel='rbf',
                                   random_state=self.random_state,
                                   n_jobs=self.n_jobs)
                else:
                    raise ValueError(("Feature extraction method '{}' not accepted"
                                      ).format(feature_extractor))
                self._fe = fe
            elif hasattr(feature_extractor, 'fit_transform') and \
                    inspect.ismethod(feature_extractor.fit_transform):
                self._fe = feature_extractor
            else:
                raise TypeError('Parameter feature_extractor is not a '
                                'string nor has method "fit_transform"')

            if isinstance(pixels, int):
                pixels = (pixels, pixels)

            # The resolution of transformed image
            self._pixels = pixels
            self._xrot = None

        def fit(self, X, y=None, plot=False):
            """Train the image transformer from the training set (X)

            Args:
                X: {array-like, sparse matrix} of shape (n_samples, n_features)
                y: Ignored. Present for continuity with scikit-learn
                plot: boolean of whether to produce a scatter plot showing the
                    feature reduction, hull points, and minimum bounding rectangle

            Returns:
                self: object
            """
            # Transpose to get (n_features, n_samples)
            X = X.T

            # Perform dimensionality reduction
            x_new = self._fe.fit_transform(X)

            # Get the convex hull for the points
            chvertices = ConvexHull(x_new).vertices
            hull_points = x_new[chvertices]

            # Determine the minimum bounding rectangle
            mbr, mbr_rot = self._minimum_bounding_rectangle(hull_points)

            # Rotate the matrix
            # Save the rotated matrix in case user wants to change the pixel size
            self._xrot = np.dot(mbr_rot, x_new.T).T

            # Determine feature coordinates based on pixel dimension
            self._calculate_coords()

            # plot rotation diagram if requested
            if plot is True:
                # Create subplots
                fig, ax = plt.subplots(1, 1, figsize=(10, 7), squeeze=False)
                ax[0, 0].scatter(x_new[:, 0],
                                 x_new[:, 1],
                                 cmap=plt.cm.get_cmap("jet", 10),
                                 marker="x",
                                 alpha=1.0)
                ax[0, 0].fill(x_new[chvertices, 0],
                              x_new[chvertices, 1],
                              edgecolor='r',
                              fill=False)
                ax[0, 0].fill(mbr[:, 0], mbr[:, 1], edgecolor='g', fill=False)
                plt.gca().set_aspect('equal', adjustable='box')
                plt.show()
            return self

        @property
        def pixels(self):
            """The image matrix dimensions

            Returns:
                tuple: the image matrix dimensions (height, width)

            """
            return self._pixels

        @pixels.setter
        def pixels(self, pixels):
            """Set the image matrix dimension

            Args:
                pixels: int or tuple with the dimensions (height, width)
                of the image matrix

            """
            if isinstance(pixels, int):
                pixels = (pixels, pixels)
            self._pixels = pixels
            # recalculate coordinates if already fit
            if hasattr(self, '_coords'):
                self._calculate_coords()

        def _calculate_coords(self):
            """Calculate the matrix coordinates of each feature based on the
            pixel dimensions.
            """
            ax0_coord = np.digitize(self._xrot[:, 0],
                                    bins=np.linspace(min(self._xrot[:, 0]),
                                                     max(self._xrot[:, 0]),
                                                     self._pixels[0])) - 1
            ax1_coord = np.digitize(self._xrot[:, 1],
                                    bins=np.linspace(min(self._xrot[:, 1]),
                                                     max(self._xrot[:, 1]),
                                                     self._pixels[1])) - 1
            self._coords = np.stack((ax0_coord, ax1_coord))

        def transform(self, X, empty_value=0):
            """Transform the input matrix into image matrices

            Args:
                X: {array-like, sparse matrix} of shape (n_samples, n_features)
                    where n_features matches the training set.
                empty_value: numeric value to fill elements where no features are
                    mapped. Default = 0 (although it was 1 in the paper).

            Returns:
                A list of n_samples numpy matrices of dimensions set by
                the pixel parameter
            """
            # Group by location (x1, y1) of each feature
            # Tranpose to get (n_features, n_samples)
            img_coords = pd.DataFrame(np.vstack(
                (self._coords, X.clip(0, 1))).T).groupby(
                    [0, 1],  # (x1, y1)
                    as_index=False).mean()

            img_matrices = []
            blank_mat = np.zeros(self._pixels)
            if empty_value != 0:
                blank_mat[:] = empty_value
            for z in range(2, img_coords.shape[1]):
                img_matrix = blank_mat.copy()
                img_matrix[img_coords[0].astype(int),
                           img_coords[1].astype(int)] = img_coords[z]
                img_matrices.append(img_matrix)
            
            img_matrices = np.array([self._mat_to_rgb(m) for m in img_matrices])
                
            return img_matrices
        
        def transform_3d(self, X, empty_value=0):
            """Transform the input matrix into image matrices

            Args:
                X: {array-like, sparse matrix} of shape (n_samples, n_features)
                    where n_features matches the training set.
                empty_value: numeric value to fill elements where no features are
                    mapped. Default = 0 (although it was 1 in the paper).

            Returns:
                A list of n_samples numpy matrices of dimensions set by
                the pixel parameter
            """

            # Group by location (x1, y1) of each feature
            # Tranpose to get (n_features, n_samples)
            img_coords = pd.DataFrame(np.vstack(
                (self._coords, X.clip(0, 1))).T).groupby(
                    [0, 1],  # (x1, y1)
                    as_index=False)
            avg_img_coords = img_coords.mean()
            min_img_coords = img_coords.min()
            max_img_coords = img_coords.max()

            img_matrices = []
            blank_mat = np.zeros((3, self._pixels[0], self._pixels[1]))
            if empty_value != 0:
                blank_mat[:, :, :] = empty_value
            for z in range(2, avg_img_coords.shape[1]):
                img_matrix = blank_mat.copy()
                img_matrix[0, avg_img_coords[0].astype(int),
                           avg_img_coords[1].astype(int)] = avg_img_coords[z]
                img_matrix[1, min_img_coords[0].astype(int),
                           min_img_coords[1].astype(int)] = min_img_coords[z]
                img_matrix[2, max_img_coords[0].astype(int),
                           max_img_coords[1].astype(int)] = max_img_coords[z]
                img_matrices.append(img_matrix)

            return img_matrices

        def fit_transform(self, X, empty_value=0):
            """Train the image transformer from the training set (X) and return
            the transformed data.

            Args:
                X: {array-like, sparse matrix} of shape (n_samples, n_features)
                empty_value: numeric value to fill elements where no features are
                    mapped. Default = 0 (although it was 1 in the paper).

            Returns:
                A list of n_samples numpy matrices of dimensions set by
                the pixel parameter
            """
            self.fit(X)
            return self.transform(X, empty_value=empty_value)
        
        def fit_transform_3d(self, X, empty_value=0):
            """Train the image transformer from the training set (X) and return
            the transformed data.

            Args:
                X: {array-like, sparse matrix} of shape (n_samples, n_features)
                empty_value: numeric value to fill elements where no features are
                    mapped. Default = 0 (although it was 1 in the paper).

            Returns:
                A list of n_samples numpy matrices of dimensions set by
                the pixel parameter
            """
            self.fit(X)
            return self.transform_3d(X, empty_value=empty_value)

        def feature_density_matrix(self):
            """Generate image matrix with feature counts per pixel

            Returns:
                img_matrix (ndarray): matrix with feature counts per pixel
            """
            fdmat = np.zeros(self._pixels)
            # Group by location (x1, y1) of each feature
            # Tranpose to get (n_features, n_samples)
            coord_cnt = (
                pd.DataFrame(self._coords.T).assign(count=1).groupby(
                    [0, 1],  # (x1, y1)
                    as_index=False).count())
            fdmat[coord_cnt[0].astype(int),
                  coord_cnt[1].astype(int)] = coord_cnt['count']
            return fdmat

        @staticmethod
        def _minimum_bounding_rectangle(hull_points):
            """Find the smallest bounding rectangle for a set of points.

            Modified from JesseBuesking at https://stackoverflow.com/a/33619018
            Returns a set of points representing the corners of the bounding box.

            Args:
                hull_points : an nx2 matrix of hull coordinates

            Returns:
                (tuple): tuple containing
                    coords (ndarray): coordinates of the corners of the rectangle
                    rotmat (ndarray): rotation matrix to align edges of rectangle
                        to x and y
            """

            pi2 = np.pi / 2.

            # Calculate edge angles
            edges = hull_points[1:] - hull_points[:-1]
            angles = np.arctan2(edges[:, 1], edges[:, 0])
            angles = np.abs(np.mod(angles, pi2))
            angles = np.unique(angles)

            # Find rotation matrices
            rotations = np.vstack([
                np.cos(angles),
                np.cos(angles - pi2),
                np.cos(angles + pi2),
                np.cos(angles)
            ]).T
            rotations = rotations.reshape((-1, 2, 2))

            # Apply rotations to the hull
            rot_points = np.dot(rotations, hull_points.T)

            # Find the bounding points
            min_x = np.nanmin(rot_points[:, 0], axis=1)
            max_x = np.nanmax(rot_points[:, 0], axis=1)
            min_y = np.nanmin(rot_points[:, 1], axis=1)
            max_y = np.nanmax(rot_points[:, 1], axis=1)

            # Find the box with the best area
            areas = (max_x - min_x) * (max_y - min_y)
            best_idx = np.argmin(areas)

            # Return the best box
            x1 = max_x[best_idx]
            x2 = min_x[best_idx]
            y1 = max_y[best_idx]
            y2 = min_y[best_idx]
            rotmat = rotations[best_idx]

            # Generate coordinates
            coords = np.zeros((4, 2))
            coords[0] = np.dot([x1, y2], rotmat)
            coords[1] = np.dot([x2, y2], rotmat)
            coords[2] = np.dot([x2, y1], rotmat)
            coords[3] = np.dot([x1, y1], rotmat)

            return coords, rotmat
        
        @staticmethod
        def _mat_to_rgb(mat):
            """Convert image matrix to numpy rgb format
            Args:
                mat: {array-like} (M, N)
            Returns:
                An numpy.ndarry (M, N, 3) with orignal values repeated across
                RGB channels.
            """
            return np.repeat(mat[:, :, np.newaxis], 3, axis=2)

    class TrainDataset(torch.utils.data.Dataset):
        def __init__(self, features, labels, transformer):
            self.features = features
            self.labels = labels
            self.transformer = transformer

        def __getitem__(self, index):
            normalized = self.features[index, :]
            normalized = np.expand_dims(normalized, axis=0)

            # Note: we are setting empty_value=1 to follow the setup in the paper
            image = self.transformer.transform(normalized, empty_value=1)[0]
            
            preprocess = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
        
            return {"x": preprocess(image).type(torch.float), "y": torch.tensor(self.labels[index, :],dtype=torch.float)}
        
        
        def __len__(self):
            return self.features.shape[0]


    class TestDataset(torch.utils.data.Dataset):
        def __init__(self, features,transformer):
            self.features = features
            self.transformer = transformer

        def __getitem__(self, index):
            normalized = self.features[index, :]
            normalized = np.expand_dims(normalized, axis=0)

            # Note: we are setting empty_value=1 to follow the setup in the paper
            image = self.transformer.transform(normalized, empty_value=1)[0]
            
            preprocess = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
            return  {"x": preprocess(image).type(torch.float), "y": -1}

        def __len__(self):
            return self.features.shape[0]
        


    def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
        model.train()
        final_loss = 0

        for data in dataloader:
            optimizer.zero_grad()
            inputs, targets = data['x'].to(device), data['y'].to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            loss.backward()
            optimizer.step()
            scheduler.step()

            final_loss += loss.item()

        final_loss /= len(dataloader)

        return final_loss


    def valid_fn(model, loss_fn, dataloader, device):
        model.eval()
        final_loss = 0
        valid_preds = []

        for data in dataloader:
            inputs, targets = data['x'].to(device), data['y'].to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)

            final_loss += loss.item()
            valid_preds.append(outputs.sigmoid().detach().cpu().numpy())

        final_loss /= len(dataloader)
        valid_preds = np.concatenate(valid_preds)

        return final_loss, valid_preds

    def inference_fn(model, dataloader, device):
        model.eval()
        preds = []

        for data in dataloader:
            inputs = data['x'].to(device)
            with torch.no_grad():
                outputs = model(inputs)

            preds.append(outputs.sigmoid().detach().cpu().numpy())

        preds = np.concatenate(preds)

        return preds

   

    def run_training(fold, seed):

        seed_everything(seed)

        trn_idx = train[train['kfold'] != fold].index
        val_idx = train[train['kfold'] == fold].index

        train_df = train[train['kfold'] != fold].reset_index(drop=True).copy()
        valid_df = train[train['kfold'] == fold].reset_index(drop=True).copy()

        x_train, y_train,y_train_ns = train_df[feature_cols].values, train_df[target_cols].values,train_df[target_nonsc_cols2].values
        x_valid, y_valid,y_valid_ns  =  valid_df[feature_cols].values, valid_df[target_cols].values,valid_df[target_nonsc_cols2].values
        x_test = test_[feature_cols].values

        #------------ norm --------------
#         col_num = list(set(feat_dic['gene'] + feat_dic['cell']) & set(feature_cols))
#         col_num.sort()
#         x_train[col_num],ss = norm_fit(x_train[col_num],True,'quan')
#         x_valid[col_num]    = norm_tra(x_valid[col_num],ss)
#         x_test[col_num]     = norm_tra(x_test[col_num],ss)
        

        #------------ pca --------------
#         def pca_pre(tr,va,te,
#                     n_comp,feat_raw,feat_new):
#             pca = PCA(n_components=n_comp, random_state=42)
#             tr2 = pd.DataFrame(pca.fit_transform(tr[feat_raw]),columns=feat_new)
#             va2 = pd.DataFrame(pca.transform(va[feat_raw]),columns=feat_new)
#             te2 = pd.DataFrame(pca.transform(te[feat_raw]),columns=feat_new)
#             return(tr2,va2,te2)


#         pca_feat_g = [f'pca_G-{i}' for i in range(n_comp1)]
#         feat_dic['pca_g'] = pca_feat_g
#         x_tr_g_pca,x_va_g_pca,x_te_g_pca = pca_pre(x_train,x_valid,x_test,
#                                                    n_comp1,feat_dic['gene'],pca_feat_g)
#         x_train = pd.concat([x_train,x_tr_g_pca],axis = 1)
#         x_valid = pd.concat([x_valid,x_va_g_pca],axis = 1)
#         x_test  = pd.concat([x_test,x_te_g_pca],axis = 1)

#         pca_feat_g = [f'pca_C-{i}' for i in range(n_comp2)]
#         feat_dic['pca_c'] = pca_feat_g
#         x_tr_c_pca,x_va_c_pca,x_te_c_pca = pca_pre(x_train,x_valid,x_test,
#                                                    n_comp2,feat_dic['cell'],pca_feat_g)
#         x_train = pd.concat([x_train,x_tr_c_pca],axis = 1)
#         x_valid = pd.concat([x_valid,x_va_c_pca],axis = 1)
#         x_test  = pd.concat([x_test,x_te_c_pca], axis = 1)

#         x_train,x_valid,x_test = x_train.values,x_valid.values,x_test.values

        def save_pickle(obj, model_output_folder, seed, fold_i, name):
            dump(obj, open(f"{model_output_folder}/seed{seed}_fold{fold_i}_{name}.pkl", 'wb'), pickle.HIGHEST_PROTOCOL)

        
        # LogScaler (Norm-2 Normalization)
        print("Running norm-2 normalization ......")
        scaler = LogScaler()
        x_train = scaler.fit_transform(x_train)
        x_valid = scaler.transform(x_valid)
        x_test = scaler.transform(x_test)
        
        save_pickle(scaler, model_output_folder, seed, fold, "log-scaler")
        
        
        # Extract DeepInsight Feature Map
        print("Extracting feature map ......")
        transformer = DeepInsightTransformer(feature_extractor='tsne_exact',
                                pixels=224,
                                perplexity=5,
                                random_state=seed,
                                n_jobs=-1)
        
        transformer.fit(x_train)
        
#         
        
        save_pickle(transformer, model_output_folder, seed, fold, "deepinsight-transform")

       
        train_dataset = TrainDataset(x_train, y_train_ns, transformer)
        valid_dataset = TrainDataset(x_valid, y_valid_ns, transformer)
        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, len(target_nonsc_cols2))
        model.to(DEVICE)

        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=0.0001)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e5, 
                                                  max_lr=0.0001, epochs=EPOCHS, steps_per_epoch=len(trainloader))

        loss_tr = nn.BCEWithLogitsLoss()   #SmoothBCEwLogits(smoothing = 0.001)
        loss_va = nn.BCEWithLogitsLoss()    

        early_stopping_steps = EARLY_STOPPING_STEPS
        early_step = 0

        for epoch in range(1):
            train_loss = train_fn(model, optimizer,scheduler, loss_tr, trainloader, DEVICE)
            valid_loss, valid_preds = valid_fn(model, loss_va, validloader, DEVICE)
            print(f"FOLD: {fold}, EPOCH: {epoch},train_loss: {train_loss}, valid_loss: {valid_loss}")

        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, len(target_cols))
        model.to(DEVICE)

        train_dataset = TrainDataset(x_train, y_train, transformer)
        valid_dataset = TrainDataset(x_valid, y_valid, transformer)
        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                                  max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))

        loss_tr = SmoothBCEwLogits(smoothing = 0.001)
        loss_va = nn.BCEWithLogitsLoss()    

        early_stopping_steps = EARLY_STOPPING_STEPS
        early_step = 0

        oof = np.zeros((len(train), len(target_cols)))
        best_loss = np.inf

        mod_name = f"FOLD_mod11_{seed}_{fold}_.pth"
        
        for epoch in range(EPOCHS):
            train_loss = train_fn(model, optimizer,scheduler, loss_tr, trainloader, DEVICE)
            valid_loss, valid_preds = valid_fn(model, loss_va, validloader, DEVICE)
            print(f"SEED: {seed}, FOLD: {fold}, EPOCH: {epoch},train_loss: {train_loss}, valid_loss: {valid_loss}")

            if valid_loss < best_loss:

                best_loss = valid_loss
                oof[val_idx] = valid_preds
                torch.save(model.state_dict(), mod_name)

            elif(EARLY_STOP == True):

                early_step += 1
                if (early_step >= early_stopping_steps):
                    break

        #--------------------- PREDICTION---------------------
        testdataset = TestDataset(x_test, transformer)
        testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)

        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, len(target_cols))
        
        model.load_state_dict(torch.load(mod_name))
        model.to(DEVICE)

        predictions = np.zeros((len(test_), len(target_cols)))
        predictions = inference_fn(model, testloader, DEVICE)
        return oof, predictions

    def run_k_fold(NFOLDS, seed):
        oof = np.zeros((len(train), len(target_cols)))
        predictions = np.zeros((len(test), len(target_cols)))

        for fold in range(NFOLDS):
            oof_, pred_ = run_training(fold, seed)

            predictions += pred_ / NFOLDS
            oof += oof_

        return oof, predictions

    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)
    
    oof_tmp = dp(oof)
    oof_tmp = oof_tmp * len(SEED) / (SEED.index(seed)+1)
    sc_dic[seed] = np.mean([log_loss(train[target_cols].iloc[:,i],oof_tmp[:,i]) for i in range(len(target_cols))])
    

In [None]:
from sklearn.metrics import log_loss
print(np.mean([log_loss(train[target_cols].iloc[:,i],oof[:,i]) for i in range(len(target_cols))]))

train0[target_cols] = oof
test[target_cols] = predictions


sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv('submission.csv', index=False)