In [1]:
%reload_ext autoreload
%reload_ext autoreload

In [2]:
import time
import os
import sys
import copy
import time
import datetime
import random
import math
import warnings
from functools import partial
# warnings.filterwarnings('ignore')

In [3]:
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from torch import Tensor
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn

In [4]:
import numpy as np
import scipy
import seaborn as sns
import pandas as pd
from tqdm.notebook import tqdm, trange

In [5]:
sys.path.append("../")

In [6]:
from chaosmining.data_utils import read_formulas, create_simulation_data
from chaosmining.simulation.models import MLPRegressor
from chaosmining.simulation.functions import abs_argmax_topk
from chaosmining.utils import radar_factory

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, precision_score, recall_score, accuracy_score, roc_curve, auc, balanced_accuracy_score

In [8]:
from captum.attr import IntegratedGradients, Saliency, DeepLift, FeatureAblation

In [9]:
import matplotlib
# mpl.use('Agg')
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt 
from matplotlib.colors import ListedColormap

matplotlib.rcParams['lines.linewidth'] = 1
matplotlib.rcParams['lines.markersize'] = 5
plt.rcParams['figure.figsize'] = [4, 4]

# Santander customer dataset

In [10]:
house_train_path = '../data/house_prices_advanced_regression/train.csv'
house_test_path = '../data/house_prices_advanced_regression/test.csv'

In [11]:
customer_train_path = '../data/santander_customer_satisfaction/train.csv'
customer_test_path = '../data/santander_customer_satisfaction/test.csv'
sample_path = '../data/santander_customer_satisfaction/sample_submission.csv'

In [12]:
train  = pd.read_csv(customer_train_path,index_col=0)
test   = pd.read_csv(customer_test_path, index_col=0)
sample = pd.read_csv(sample_path)

In [13]:
train.dtypes.value_counts()

int64      259
float64    111
Name: count, dtype: int64

In [14]:
train.isnull().values.any()

False

In [15]:
train.select_dtypes(include=['int64']).nunique()

var3                             208
var15                            100
ind_var1_0                         2
ind_var1                           2
ind_var2_0                         1
                                ... 
num_var45_ult3                   172
saldo_var2_ult1                    1
saldo_medio_var13_medio_hace3      1
saldo_medio_var13_medio_ult1       3
TARGET                             2
Length: 259, dtype: int64

a good many of the integer features have one single value. Such columns have zero variance and thus have no predictive value, In https://www.kaggle.com/code/carlmcbrideellis/tabular-classification-with-neural-networks-keras/notebook they drop these columns from the train, as well as the test data to maintain consistency.

In [16]:
# features_to_drop = train.nunique()
# features_to_drop = features_to_drop.loc[features_to_drop.values==1].index
# # now drop these columns from both the training and the test datasets
# train = train.drop(features_to_drop,axis=1)
# test  = test.drop(features_to_drop,axis=1)

In [17]:
train.dtypes.value_counts()

int64      259
float64    111
Name: count, dtype: int64

In [18]:
train.shape

(76020, 370)

In [19]:
X = train.iloc[:,:-1]
y = train['TARGET']

In [20]:
from imblearn.under_sampling import RandomUnderSampler

In [21]:
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X, y)
X_resampled = X_resampled.to_numpy()
y_resampled = y_resampled.to_numpy().reshape(y_resampled.shape[0],-1)

In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, 
                                                  train_size=0.8,
                                                  test_size=0.2, 
                                                  random_state=42, 
                                                  shuffle=True)

In [23]:
from sklearn.preprocessing import MinMaxScaler
scaler  = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test   = scaler.transform(X_test)
# test    = scaler.transform(test)

# Training and Test a MLP

In [131]:
train_set = TensorDataset(Tensor(X_train), Tensor(y_train))
train_loader = DataLoader(train_set, batch_size=1000, shuffle=True)
test_set = TensorDataset(Tensor(X_test), Tensor(y_test))
test_loader = DataLoader(test_set, batch_size=y_test.shape[0])

In [132]:
hidden_layer_sizes = (100,100,100)
num_epochs = 500

In [24]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'

In [173]:
class LinearResBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_channels, out_channels),
            nn.ReLU(),
        )

    def forward(self, x: Tensor)-> Tensor:
        out = self.net(x)
        return out-x

In [25]:
from typing import List

class LinearBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_channels, out_channels),
            nn.ReLU(),
        )

    def forward(self, x: Tensor)-> Tensor:
        x = self.net(x)
        return x
    
class MLPClassifier(nn.Module):
    def __init__(self, in_channels: int, sizes: List[int], p: float=0.0):
        super().__init__()
        self.blocks = nn.ModuleList([
            LinearBlock(in_channels, sizes[0]),
            *[LinearBlock(sizes[i], sizes[i+1]) for i in range(len(sizes)-1)]
        ])
        self.dropout = nn.Dropout(p)
        self.project = nn.Sequential(
            nn.Linear(sizes[-1], 1),
            nn.Sigmoid()
        )
    def forward(self, x: Tensor)-> Tensor:
        for block in self.blocks:
            x = block(x)
        x = self.dropout(x)
        x = self.project(x)
        return x

class MLPCReslassifier(nn.Module):
    def __init__(self, in_channels: int, sizes: List[int], p: float=0.0):
        super().__init__()
        self.blocks = nn.ModuleList([
            LinearBlock(in_channels, sizes[0]),
            *[LinearResBlock(sizes[i], sizes[i+1]) for i in range(len(sizes)-1)]
        ])
        self.dropout = nn.Dropout(p)
        self.project = nn.Sequential(
            nn.Linear(sizes[-1], 1),
            nn.Sigmoid()
        )
    def forward(self, x: Tensor)-> Tensor:
        for block in self.blocks:
            x = block(x)
        x = self.dropout(x)
        x = self.project(x)
        return x

In [141]:
p=0
model = MLPClassifier(X_train.shape[-1], hidden_layer_sizes, p=p)
model.to(device)
model.train()

MLPClassifier(
  (blocks): ModuleList(
    (0): LinearBlock(
      (net): Sequential(
        (0): Linear(in_features=369, out_features=100, bias=True)
        (1): ReLU()
      )
    )
    (1-2): 2 x LinearBlock(
      (net): Sequential(
        (0): Linear(in_features=100, out_features=100, bias=True)
        (1): ReLU()
      )
    )
  )
  (dropout): Dropout(p=0, inplace=False)
  (project): Sequential(
    (0): Linear(in_features=100, out_features=1, bias=True)
    (1): Sigmoid()
  )
)

In [142]:
# criterion = nn.MSELoss(reduction='mean')
criterion = nn.BCELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), 0.001)

In [26]:
def train(model, dataloader, num_epochs, optimizer):
    pbar = trange(num_epochs, desc='Train', unit='epoch', initial=0, disable=False)
    for epoch in pbar:  # loop over the dataset multiple times
        running_loss = 0.0
        for inputs, targets in dataloader:
            # zero the parameter gradients
            optimizer.zero_grad()
            inputs = inputs.to(device)
            targets = targets.to(device)
            # forward pass
            outputs = model(inputs)
            # defining loss
            # print(outputs.dtype,targets.dtype)
            # print(outputs.shape,targets.shape)
            # print(outputs[0],targets[0])
            loss = criterion(outputs, targets)

            # computing gradients
            loss.backward()
            # accumulating running loss
            running_loss += loss.item()
            # updated weights based on computed gradients
            optimizer.step()
        pbar.set_postfix(loss = '%.3f' % running_loss)
        # print(loss.item())
    print('train loss:', running_loss)

In [144]:
train(model, train_loader, num_epochs, optimizer)

Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6263459026813507


In [145]:
y_pred = model(Tensor(X_test).to(device)).detach().cpu().numpy()

y_test =y_test.astype(int)
fpr, tpr, thresholds = roc_curve(y_test,y_pred)
ACC = balanced_accuracy_score(y_test,np.around(y_pred))
print(f'With Drop out{p}')
print('Test auc is', auc(fpr, tpr))
print('Balanced_accuracy_score is', ACC)

y_pred_train = model(Tensor(X_train).to(device)).detach().cpu().numpy()


fpr, tpr, thresholds = roc_curve(y_train,y_pred_train)
ACC = balanced_accuracy_score(y_train,np.around(y_pred_train))
print(f'With Drop out:{p}')
print('Train auc is', auc(fpr, tpr))
print('Balanced_accuracy_score of Train is', ACC)

With Drop out0
Test auc is 0.7497733729838487
Balanced_accuracy_score is 0.722542202372398
With Drop out:0
Train auc is 0.9339094995732742
Balanced_accuracy_score of Train is 0.8581407884234641


# play with dropout

In [146]:
for p in [0,0.1,0.2,0.3,0.4,0.5]:
    train_set = TensorDataset(Tensor(X_train), Tensor(y_train))
    train_loader = DataLoader(train_set, batch_size=1000, shuffle=True)
    test_set = TensorDataset(Tensor(X_test), Tensor(y_test))
    test_loader = DataLoader(test_set, batch_size=y_test.shape[0],shuffle=False)
    hidden_layer_sizes = (100,100,100)
    num_epochs = 500
    model = MLPClassifier(X_train.shape[-1], hidden_layer_sizes, p=p)
    model.to(device)
    model.train()
    # print(model)
    criterion = nn.BCELoss(reduction='mean')
    optimizer = torch.optim.Adam(model.parameters(), 0.001)
    train(model, train_loader, num_epochs, optimizer)
    model.eval()
    y_pred = model(Tensor(X_test).to(device)).detach().cpu().numpy()
    
    y_test =y_test.astype(int)
    
    fpr, tpr, thresholds = roc_curve(y_test,y_pred)
    ACC = balanced_accuracy_score(y_test,np.around(y_pred))
    print(f'With Drop out{p}')
    print('Test auc is', auc(fpr, tpr))
    print('Balanced_accuracy_score is', ACC)

    y_pred_train = model(Tensor(X_train).to(device)).detach().cpu().numpy()
    
    
    fpr, tpr, thresholds = roc_curve(y_train,y_pred_train)
    ACC = balanced_accuracy_score(y_train,np.around(y_pred_train))
    print(f'With Drop out:{p}')
    print('Train auc is', auc(fpr, tpr))
    print('Balanced_accuracy_score of Train is', ACC)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.7384424209594727
With Drop out0
Test auc is 0.7531921244348142
Balanced_accuracy_score is 0.7188830051847839
With Drop out:0
Train auc is 0.9312184490675954
Balanced_accuracy_score of Train is 0.8543059572306704


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.605219841003418
With Drop out0.1
Test auc is 0.746643432791271
Balanced_accuracy_score is 0.7244685320096842
With Drop out:0.1
Train auc is 0.936305556419373
Balanced_accuracy_score of Train is 0.8558965906852837


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 3.1344776153564453
With Drop out0.2
Test auc is 0.7452435964049217
Balanced_accuracy_score is 0.720679438849398
With Drop out:0.2
Train auc is 0.9235104331871754
Balanced_accuracy_score of Train is 0.8492930517979497


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6422038078308105
With Drop out0.3
Test auc is 0.7587541594348695
Balanced_accuracy_score is 0.7275362879600252
With Drop out:0.3
Train auc is 0.9337864919682252
Balanced_accuracy_score of Train is 0.8537384291652413


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6573023796081543
With Drop out0.4
Test auc is 0.7607482008025914
Balanced_accuracy_score is 0.7190847585040406
With Drop out:0.4
Train auc is 0.9358712290049168
Balanced_accuracy_score of Train is 0.858726456655368


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6352637112140656
With Drop out0.5
Test auc is 0.7600240998485469
Balanced_accuracy_score is 0.7266049061985252
With Drop out:0.5
Train auc is 0.9351433764205478
Balanced_accuracy_score of Train is 0.8580760021146252


In [169]:
y_train.shape

(4812, 1)

# MLP res Block

In [182]:
for p in [0,0.1,0.2,0.3,0.4,0.5]:
    for hidden_layer_sizes in [(100,100,100),(100,100,100,100),(100,100,100,100,100),(100,100,100,100,100,100)]:
        train_set = TensorDataset(Tensor(X_train), Tensor(y_train))
        train_loader = DataLoader(train_set, batch_size=1000, shuffle=True)
        test_set = TensorDataset(Tensor(X_test), Tensor(y_test))
        test_loader = DataLoader(test_set, batch_size=y_test.shape[0],shuffle=False)
        num_epochs = 500
        # model = MLPClassifier(X_train.shape[-1], hidden_layer_sizes, p=p)
        model = MLPCReslassifier(X_train.shape[-1], hidden_layer_sizes)
        model.to(device)
        model.train()
        
        criterion = nn.BCELoss(reduction='mean')
        optimizer = torch.optim.Adam(model.parameters(), 0.001)
        train(model, train_loader, num_epochs, optimizer)
        model.eval()
        y_pred = model(Tensor(X_test).to(device)).detach().cpu().numpy()
        
        y_test = y_test.astype(int)
        
        fpr, tpr, thresholds = roc_curve(y_test,y_pred)
        ACC = balanced_accuracy_score(y_test,np.around(y_pred))
        print(f'With Drop out{p}')
        print('Test auc is', auc(fpr, tpr))
        print('Balanced_accuracy_score is', ACC)
        
        y_pred_train = model(Tensor(X_train).to(device)).detach().cpu().numpy()
        fpr, tpr, thresholds = roc_curve(y_train,y_pred_train)
        ACC = balanced_accuracy_score(y_train,np.around(y_pred_train))
        print(f'With Drop out:{p}')
        print(f'hidden_layer_sizes:{hidden_layer_sizes}')
        print('Train auc is', auc(fpr, tpr))
        print('Balanced_accuracy_score of Train is', ACC)

Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.7860514223575592
With Drop out0
Test auc is 0.7493132095912975
Balanced_accuracy_score is 0.7169539118033983
With Drop out:0
hidden_layer_sizes:(100, 100, 100)
Train auc is 0.9318143103454233
Balanced_accuracy_score of Train is 0.8528946522789231


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5011220872402191
With Drop out0
Test auc is 0.746303492267044
Balanced_accuracy_score is 0.724134118973656
With Drop out:0
hidden_layer_sizes:(100, 100, 100, 100)
Train auc is 0.9433249370277077
Balanced_accuracy_score of Train is 0.8617600107804417


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6371416449546814
With Drop out0
Test auc is 0.7209350851785932
Balanced_accuracy_score is 0.7088367953834418
With Drop out:0
hidden_layer_sizes:(100, 100, 100, 100, 100)
Train auc is 0.9334130636840777
Balanced_accuracy_score of Train is 0.8579448746255351


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.3571505546569824
With Drop out0
Test auc is 0.727378754546359
Balanced_accuracy_score is 0.7130211039499431
With Drop out:0
hidden_layer_sizes:(100, 100, 100, 100, 100, 100)
Train auc is 0.9539942918942826
Balanced_accuracy_score of Train is 0.8757284572565849


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5848039388656616
With Drop out0.1
Test auc is 0.7702582442486485
Balanced_accuracy_score is 0.7263423505090816
With Drop out:0.1
hidden_layer_sizes:(100, 100, 100)
Train auc is 0.9326820149751394
Balanced_accuracy_score of Train is 0.8511962144064019


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.575721800327301
With Drop out0.1
Test auc is 0.7405065390185392
Balanced_accuracy_score is 0.7232663033264424
With Drop out:0.1
hidden_layer_sizes:(100, 100, 100, 100)
Train auc is 0.9347036933378944
Balanced_accuracy_score of Train is 0.843859812793482


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5571696758270264
With Drop out0.1
Test auc is 0.7512436848447329
Balanced_accuracy_score is 0.7315796455774567
With Drop out:0.1
hidden_layer_sizes:(100, 100, 100, 100, 100)
Train auc is 0.9429071949083143
Balanced_accuracy_score of Train is 0.8462900768106478


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.4067338705062866
With Drop out0.1
Test auc is 0.725358457609693
Balanced_accuracy_score is 0.711899023845584
With Drop out:0.1
hidden_layer_sizes:(100, 100, 100, 100, 100, 100)
Train auc is 0.9508434313593377
Balanced_accuracy_score of Train is 0.8766914409511667


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6326023638248444
With Drop out0.2
Test auc is 0.7698961937716263
Balanced_accuracy_score is 0.7321213394209403
With Drop out:0.2
hidden_layer_sizes:(100, 100, 100)
Train auc is 0.9330690051932703
Balanced_accuracy_score of Train is 0.8533030651698439


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5230993330478668
With Drop out0.2
Test auc is 0.7479645024707872
Balanced_accuracy_score is 0.7252783090308101
With Drop out:0.2
hidden_layer_sizes:(100, 100, 100, 100)
Train auc is 0.9377249812551613
Balanced_accuracy_score of Train is 0.8548185465062039


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.0401697754859924
With Drop out0.2
Test auc is 0.7326519782880264
Balanced_accuracy_score is 0.6980609571398565
With Drop out:0.2
hidden_layer_sizes:(100, 100, 100, 100, 100)
Train auc is 0.9185585132665084
Balanced_accuracy_score of Train is 0.8325190990038457


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.7641938030719757
With Drop out0.2
Test auc is 0.7278472091712085
Balanced_accuracy_score is 0.7188857689288833
With Drop out:0.2
hidden_layer_sizes:(100, 100, 100, 100, 100, 100)
Train auc is 0.936073275906749
Balanced_accuracy_score of Train is 0.857734966984897


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5974304676055908
With Drop out0.3
Test auc is 0.7668671302386769
Balanced_accuracy_score is 0.7178935847971964
With Drop out:0.3
hidden_layer_sizes:(100, 100, 100)
Train auc is 0.9346344151783091
Balanced_accuracy_score of Train is 0.8495682640378974


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.7970300912857056
With Drop out0.3
Test auc is 0.7412112937638877
Balanced_accuracy_score is 0.7107023226505411
With Drop out:0.3
hidden_layer_sizes:(100, 100, 100, 100)
Train auc is 0.9305991783368404
Balanced_accuracy_score of Train is 0.8467897088244136


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.4723608493804932
With Drop out0.3
Test auc is 0.7333525874172259
Balanced_accuracy_score is 0.7234072542755121
With Drop out:0.3
hidden_layer_sizes:(100, 100, 100, 100, 100)
Train auc is 0.9474551937888069
Balanced_accuracy_score of Train is 0.8683998300007256


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5254745185375214
With Drop out0.3
Test auc is 0.731782780768763
Balanced_accuracy_score is 0.715815249234443
With Drop out:0.3
hidden_layer_sizes:(100, 100, 100, 100, 100, 100)
Train auc is 0.9463068348692008
Balanced_accuracy_score of Train is 0.8659042613842503


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.614294558763504
With Drop out0.4
Test auc is 0.7700634002896405
Balanced_accuracy_score is 0.7287993190134539
With Drop out:0.4
hidden_layer_sizes:(100, 100, 100)
Train auc is 0.9339229751255127
Balanced_accuracy_score of Train is 0.8548071441158482


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5101560354232788
With Drop out0.4
Test auc is 0.7495011441900572
Balanced_accuracy_score is 0.7299324540942105
With Drop out:0.4
hidden_layer_sizes:(100, 100, 100, 100)
Train auc is 0.9422798906752633
Balanced_accuracy_score of Train is 0.8641052751604109


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.05475315451622
With Drop out0.4
Test auc is 0.7219272693102801
Balanced_accuracy_score is 0.6939789071050333
With Drop out:0.4
hidden_layer_sizes:(100, 100, 100, 100, 100)
Train auc is 0.9179794100472335
Balanced_accuracy_score of Train is 0.8250702283587814


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.519183337688446
With Drop out0.4
Test auc is 0.7360113092408548
Balanced_accuracy_score is 0.7264694827376543
With Drop out:0.4
hidden_layer_sizes:(100, 100, 100, 100, 100, 100)
Train auc is 0.9499744310034448
Balanced_accuracy_score of Train is 0.8745908096733734


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6326513886451721
With Drop out0.5
Test auc is 0.7530967752633848
Balanced_accuracy_score is 0.7177553975922262
With Drop out:0.5
hidden_layer_sizes:(100, 100, 100)
Train auc is 0.9335287288407916
Balanced_accuracy_score of Train is 0.8556001285360367


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.524756371974945
With Drop out0.5
Test auc is 0.7503413223962767
Balanced_accuracy_score is 0.7220751296195983
With Drop out:0.5
hidden_layer_sizes:(100, 100, 100, 100)
Train auc is 0.9409288801816091
Balanced_accuracy_score of Train is 0.8638399104394066


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.4353521764278412
With Drop out0.5
Test auc is 0.7376377726433554
Balanced_accuracy_score is 0.726201399560012
With Drop out:0.5
hidden_layer_sizes:(100, 100, 100, 100, 100)
Train auc is 0.9477549384443685
Balanced_accuracy_score of Train is 0.8724253920867411


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.489148736000061
With Drop out0.5
Test auc is 0.7483956465502946
Balanced_accuracy_score is 0.7210829454879114
With Drop out:0.5
hidden_layer_sizes:(100, 100, 100, 100, 100, 100)
Train auc is 0.9477419811826007
Balanced_accuracy_score of Train is 0.8745923645447855


# Iterative Method

In [75]:
y_test

array([[0],
       [0],
       [1],
       ...,
       [1],
       [1],
       [0]])

In [30]:
import pickle

In [31]:
for repeat in range(1,6):
    for xai_method in [Saliency,DeepLift,FeatureAblation,IntegratedGradients]:
        print(xai_method.get_name())
        num_epochs = 500
        hidden_layer_sizes = (100,100,100)
        reduce_rate = 0.5
        best_score = 0
        num_cur_features = X_train.shape[-1]
        select_arr = np.ones(num_cur_features)
        remaining_inds = np.nonzero(select_arr)[0]
        num_select = 3
        num_list = []
        auc_list = []
        acc_list = []
        feature_list = []
        resuming = True
        start_time = time.time()
        while resuming:
            num_list.append(num_cur_features)
            bool_arr = np.array(select_arr, dtype='bool') 
            # print(X_train[...,bool_arr].shape)
            # torch.manual_seed(42)
            train_set = TensorDataset(Tensor(X_train[...,bool_arr]), Tensor(y_train))
            train_loader = DataLoader(train_set, batch_size=y_train.shape[0], shuffle=True)
            test_set = TensorDataset(Tensor(X_test[...,bool_arr]), Tensor(y_test))
            test_loader = DataLoader(test_set, batch_size=y_test.shape[0])
        
            model = MLPClassifier(int(np.sum(select_arr)), hidden_layer_sizes, p=0.0)
            model.to(device)
            model.train()
        
            # criterion = nn.MSELoss(reduction='mean')
            criterion = nn.BCELoss(reduction='mean')
            optimizer = torch.optim.Adam(model.parameters(), 0.001)

            train(model, train_loader, num_epochs, optimizer)
        
            model.eval()
            y_pred = model(Tensor(X_test[...,bool_arr]).to(device)).detach().cpu().numpy()
            fpr, tpr, thresholds = roc_curve(y_test,y_pred)
            auc_score = auc(fpr, tpr)
            # print('Test auc is', auc_score)
            acc = balanced_accuracy_score(y_test,np.around(y_pred))
            # print('Test ACC is', acc)
            print(f'With {num_cur_features},{xai_method.get_name()} get ACC:{acc} AUC:{auc_score}')
            acc_list.append(acc)
            auc_list.append(auc_score)
            feature_list.append(np.where(select_arr==1)[0])
            
            xai = xai_method(model)
        
            num_remove = int(num_cur_features*(1-reduce_rate))
            if num_cur_features - num_remove<=num_select:
                num_remove = num_cur_features - num_select
            # print(num_remove)
            # print('num_remove', num_remove)
            xai_attr_test = xai.attribute(Tensor(X_test[...,bool_arr]).to(device))
            abs_xai_attr_test = np.abs(xai_attr_test.detach().cpu().numpy()).mean(0)
            inds = np.argpartition(abs_xai_attr_test, num_remove)[:num_remove]
            inds_to_remove = remaining_inds[inds]
            select_arr[inds_to_remove] = 0
            
            remaining_inds = np.nonzero(select_arr)[0]
            num_cur_features -= num_remove
            if num_remove == 0:
                resuming = False
        # np.save(f'./result/{xai_method.get_name()}_feature_final.npy',np.where(select_arr==1)[0])
        elapsed_time = time.time() - start_time
        dict_method = {}
        dict_method['auc'] = auc_list
        dict_method['acc'] = acc_list
        dict_method['feat'] = feature_list
        dict_method['time'] = elapsed_time
        with open(f'./result/{xai_method.get_name()}_{repeat}.pkl', 'wb') as f:
            pickle.dump(dict_method, f)

Saliency


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38475772738456726
With 369,Saliency get ACC:0.7141597665188986 AUC:0.7606487060150127




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.366638720035553
With 185,Saliency get ACC:0.727928739622141 AUC:0.7818134583282664




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.45297640562057495
With 93,Saliency get ACC:0.7289927811004124 AUC:0.7698105177045448




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6256109476089478
With 47,Saliency get ACC:0.6242828084062041 AUC:0.7095194401759952




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6463103890419006
With 24,Saliency get ACC:0.5838105398144975 AUC:0.7063148788927336




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6697629690170288
With 12,Saliency get ACC:0.49707043125462924 AUC:0.701052710127464




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.668323814868927
With 6,Saliency get ACC:0.5672363664503577 AUC:0.7068012978542291




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6749904155731201
With 3,Saliency get ACC:0.5580496810639309 AUC:0.7031697381076092
Deep Lift




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37862691283226013
With 369,Deep Lift get ACC:0.7243358722929126 AUC:0.7681260156759565


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3872824013233185
With 185,Deep Lift get ACC:0.7236725737090551 AUC:0.7841543495804636


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3992447555065155
With 93,Deep Lift get ACC:0.7216080568667986 AUC:0.7764752866002631


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.43131592869758606
With 47,Deep Lift get ACC:0.7249383685065833 AUC:0.8019404247321932


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.48790064454078674
With 24,Deep Lift get ACC:0.7096355174281703 AUC:0.7935068596128547


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5080883502960205
With 12,Deep Lift get ACC:0.7063107332765844 AUC:0.7848425218612161


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5389315485954285
With 6,Deep Lift get ACC:0.6761555214079618 AUC:0.7558895386758351


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5489560961723328
With 3,Deep Lift get ACC:0.6811496069955891 AUC:0.7570475474534862
Feature Ablation


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37151235342025757
With 369,Feature Ablation get ACC:0.720679438849398 AUC:0.7778626861381651


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37967848777770996
With 185,Feature Ablation get ACC:0.7132256210132992 AUC:0.7650444410051185


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3976901173591614
With 93,Feature Ablation get ACC:0.7282631526581691 AUC:0.7916731154028986


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4430498480796814
With 47,Feature Ablation get ACC:0.7233381606730269 AUC:0.8006138275644783


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4823334217071533
With 24,Feature Ablation get ACC:0.7076318029561006 AUC:0.7965483599942514


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.505646288394928
With 12,Feature Ablation get ACC:0.7076373304442996 AUC:0.7901627292525731


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5299996137619019
With 6,Feature Ablation get ACC:0.6984617000342704 AUC:0.7771814232176615


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5914753079414368
With 3,Feature Ablation get ACC:0.6664907082923378 AUC:0.6814536188465237
Integrated Gradients


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3932625353336334
With 369,Integrated Gradients get ACC:0.7140188155698288 AUC:0.7671891064262579


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3778427839279175
With 185,Integrated Gradients get ACC:0.7240042230009839 AUC:0.7807646174425418


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3814714550971985
With 93,Integrated Gradients get ACC:0.7190792310158418 AUC:0.7770294172921941


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4367649555206299
With 47,Integrated Gradients get ACC:0.7144886520667278 AUC:0.7922507379196745


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4702688455581665
With 24,Integrated Gradients get ACC:0.7062388759299999 AUC:0.794837602396719


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5034200549125671
With 12,Integrated Gradients get ACC:0.7069049382579569 AUC:0.7885860132438616


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5498339533805847
With 6,Integrated Gradients get ACC:0.6839437522800889 AUC:0.762048542401362


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5544193983078003
With 3,Integrated Gradients get ACC:0.6878710326453453 AUC:0.7646934455044938
Saliency


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3885156512260437
With 369,Saliency get ACC:0.7194108803077706 AUC:0.7814417347468964




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3536995053291321
With 185,Saliency get ACC:0.716019766297799 AUC:0.7616878737963895




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.45000842213630676
With 93,Saliency get ACC:0.7206739113611993 AUC:0.7661015731231414




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6323398947715759
With 47,Saliency get ACC:0.628342748488232 AUC:0.716772886564887




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.646482527256012
With 24,Saliency get ACC:0.5876714903213681 AUC:0.7076387123163492




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6547289490699768
With 12,Saliency get ACC:0.5802812385995556 AUC:0.7059486827995622




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6672528982162476
With 6,Saliency get ACC:0.5758233193672132 AUC:0.704548846413213




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6726519465446472
With 3,Saliency get ACC:0.563239992482616 AUC:0.7033203621610269
Deep Lift




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38236168026924133
With 369,Deep Lift get ACC:0.7197452933437987 AUC:0.7712379915318881


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3808014392852783
With 185,Deep Lift get ACC:0.7258697502680831 AUC:0.7802132504947101


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.39117830991744995
With 93,Deep Lift get ACC:0.7285948019500978 AUC:0.7824809025282731


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4266969561576843
With 47,Deep Lift get ACC:0.7436433885713654 AUC:0.8034784483235129


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.47891390323638916
With 24,Deep Lift get ACC:0.711899023845584 AUC:0.7934999502526061


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5031755566596985
With 12,Deep Lift get ACC:0.7135572703052279 AUC:0.7907859535469893


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5332711338996887
With 6,Deep Lift get ACC:0.6939982533137292 AUC:0.7721210077716484


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5895321369171143
With 3,Deep Lift get ACC:0.664227201874924 AUC:0.6844356987297832
Feature Ablation


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38954415917396545
With 369,Feature Ablation get ACC:0.7259360801264689 AUC:0.7857904860873122


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37701961398124695
With 185,Feature Ablation get ACC:0.7165531689089844 AUC:0.7690960898548481


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3819845914840698
With 93,Feature Ablation get ACC:0.7288048465016528 AUC:0.7848383762450667


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.430105984210968
With 47,Feature Ablation get ACC:0.7213482649214544 AUC:0.7989403805122874


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.48053452372550964
With 24,Feature Ablation get ACC:0.7208176260543684 AUC:0.8024558630067324


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5097689032554626
With 12,Feature Ablation get ACC:0.7134301380766552 AUC:0.7894068452413854


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5245277285575867
With 6,Feature Ablation get ACC:0.7000480891473297 AUC:0.7874570237792543


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5922537446022034
With 3,Feature Ablation get ACC:0.6654266668140664 AUC:0.6755364427296947
Integrated Gradients


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3986503481864929
With 369,Integrated Gradients get ACC:0.7162132283847574 AUC:0.7704337419989609


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3855324983596802
With 185,Integrated Gradients get ACC:0.7182114153686282 AUC:0.774073592977879


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38489460945129395
With 93,Integrated Gradients get ACC:0.7162187558729562 AUC:0.7744245884785035


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4325428605079651
With 47,Integrated Gradients get ACC:0.7220723658754988 AUC:0.7914865626761884


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4788835644721985
With 24,Integrated Gradients get ACC:0.7127557845164001 AUC:0.7933451805830395


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5046855807304382
With 12,Integrated Gradients get ACC:0.702709574715058 AUC:0.7918790143383043


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5832284092903137
With 6,Integrated Gradients get ACC:0.6663580485755662 AUC:0.6931677482118576


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5879533290863037
With 3,Integrated Gradients get ACC:0.6718800092861801 AUC:0.6909083874105929
Saliency


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3783801198005676
With 369,Saliency get ACC:0.7283322462606543 AUC:0.7783408138673624




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3695530891418457
With 185,Saliency get ACC:0.7316625579004389 AUC:0.7749234442884464




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4402537941932678
With 93,Saliency get ACC:0.7194799739102558 AUC:0.7655432968150613




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6306423544883728
With 47,Saliency get ACC:0.6321373691367169 AUC:0.7148382656953027




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6483128070831299
With 24,Saliency get ACC:0.5863393656654543 AUC:0.7111942691002355




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6596464514732361
With 12,Saliency get ACC:0.5828100644505124 AUC:0.7139082658058524




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6696717739105225
With 6,Saliency get ACC:0.5606448367732735 AUC:0.7072282963175873




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6770987510681152
With 3,Saliency get ACC:0.5600451043037022 AUC:0.7070237792542313
Deep Lift




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3823089599609375
With 369,Deep Lift get ACC:0.7270747426954243 AUC:0.7756516908586399


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37347736954689026
With 185,Deep Lift get ACC:0.725875277756282 AUC:0.7844846170003428


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3892381489276886
With 93,Deep Lift get ACC:0.729534474943896 AUC:0.7818839338028015


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.44103699922561646
With 47,Deep Lift get ACC:0.7203505533015687 AUC:0.7956225057209505


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4743030369281769
With 24,Deep Lift get ACC:0.709096587328786 AUC:0.799088240821606


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5026014447212219
With 12,Deep Lift get ACC:0.7047740915573145 AUC:0.7896652553146799


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5470403432846069
With 6,Deep Lift get ACC:0.6884680013708171 AUC:0.7632120786672121


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5922619104385376
With 3,Deep Lift get ACC:0.6665570381507235 AUC:0.6763158185657273
Feature Ablation


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3797397315502167
With 369,Feature Ablation get ACC:0.7218706125562422 AUC:0.7675497750312302


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38041365146636963
With 185,Feature Ablation get ACC:0.7240733166034691 AUC:0.7800598626971932


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3945050835609436
With 93,Feature Ablation get ACC:0.7166194987673702 AUC:0.7775724930077275


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4206664264202118
With 47,Feature Ablation get ACC:0.7322650541141095 AUC:0.7991752987607371


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.47258052229881287
With 24,Feature Ablation get ACC:0.7142260963772842 AUC:0.7948693854538621


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5062080025672913
With 12,Feature Ablation get ACC:0.7100998264368705 AUC:0.7898656267618869


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5433619618415833
With 6,Feature Ablation get ACC:0.6910051184540722 AUC:0.7602908011541394


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.59214186668396
With 3,Feature Ablation get ACC:0.6648269343444952 AUC:0.6756773936787644
Integrated Gradients


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3844602704048157
With 369,Integrated Gradients get ACC:0.7268702256320683 AUC:0.7783725969245056


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37673982977867126
With 185,Integrated Gradients get ACC:0.7152155167648717 AUC:0.7682282742076345


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.39494726061820984
With 93,Integrated Gradients get ACC:0.7204085919276562 AUC:0.7876228484252187


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4346763789653778
With 47,Integrated Gradients get ACC:0.7242723061786263 AUC:0.7950089545308822


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.47996455430984497
With 24,Integrated Gradients get ACC:0.7182141791127277 AUC:0.7943829664923665


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5013460516929626
With 12,Integrated Gradients get ACC:0.7118934963573853 AUC:0.7909531600650033


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5310701131820679
With 6,Integrated Gradients get ACC:0.6963336170777275 AUC:0.7754637562598804


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5922533869743347
With 3,Integrated Gradients get ACC:0.6648269343444952 AUC:0.6762743624042362
Saliency


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3818289041519165
With 369,Saliency get ACC:0.7153509402257426 AUC:0.7732486153642063




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3589174151420593
With 185,Saliency get ACC:0.7270609239749273 AUC:0.7681591806051494




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4635331630706787
With 93,Saliency get ACC:0.7198751893164708 AUC:0.7685668328598118




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6270796656608582
With 47,Saliency get ACC:0.5866046850989973 AUC:0.7042420708181788




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6463607549667358
With 24,Saliency get ACC:0.5914661109698531 AUC:0.7059597377759599




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6555216312408447
With 12,Saliency get ACC:0.588003139613297 AUC:0.7025548050454912




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6650215983390808
With 6,Saliency get ACC:0.5621759510043446 AUC:0.6960047315298981




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6709430813789368
With 3,Saliency get ACC:0.5631100965099439 AUC:0.6975551919696652
Deep Lift




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38697683811187744
With 369,Deep Lift get ACC:0.7245376256121693 AUC:0.7784624186077362


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3810286819934845
With 185,Deep Lift get ACC:0.7253391114009972 AUC:0.7778419580574196


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38694506883621216
With 93,Deep Lift get ACC:0.7277380412792819 AUC:0.7844417789668019


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4385359287261963
With 47,Deep Lift get ACC:0.73412505389301 AUC:0.7938661463457776


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4646686315536499
With 24,Deep Lift get ACC:0.721610820610898 AUC:0.8047787899222836


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4961267411708832
With 12,Deep Lift get ACC:0.7116254131797428 AUC:0.7880567562488254


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.523138165473938
With 6,Deep Lift get ACC:0.6996528737411145 AUC:0.7871986137059597


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5921611785888672
With 3,Deep Lift get ACC:0.6665570381507235 AUC:0.675079043081243
Feature Ablation


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.39343759417533875
With 369,Feature Ablation get ACC:0.7345257967874239 AUC:0.7915667112550715


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3778386414051056
With 185,Feature Ablation get ACC:0.7206766751052986 AUC:0.7765402345865993


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38295838236808777
With 93,Feature Ablation get ACC:0.7349928695402235 AUC:0.7934930408923577


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.42398056387901306
With 47,Feature Ablation get ACC:0.7188194390704976 AUC:0.7924151806935893


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4787522852420807
With 24,Feature Ablation get ACC:0.7094973302231999 AUC:0.7967349127209613


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5022626519203186
With 12,Feature Ablation get ACC:0.7081652055672861 AUC:0.7886343787656014


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5360094308853149
With 6,Feature Ablation get ACC:0.6876748068142875 AUC:0.7691845296660291


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5877295136451721
With 3,Feature Ablation get ACC:0.6700172457631803 AUC:0.6907287440441315
Integrated Gradients


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3874373435974121
With 369,Integrated Gradients get ACC:0.7289319787302254 AUC:0.780745271233846


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38952550292015076
With 185,Integrated Gradients get ACC:0.7136263639077131 AUC:0.7777134439567972


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.40499240159988403
With 93,Integrated Gradients get ACC:0.7274644306134406 AUC:0.7919439623246404


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.44013169407844543
With 47,Integrated Gradients get ACC:0.7270084128370387 AUC:0.8001191173706843


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.46928536891937256
With 24,Integrated Gradients get ACC:0.7144223222083421 AUC:0.7965898161557425


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.509171187877655
With 12,Integrated Gradients get ACC:0.7096963197983572 AUC:0.7884492079109412


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5355674028396606
With 6,Integrated Gradients get ACC:0.6875449108416154 AUC:0.7696377836983318


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5393649935722351
With 3,Integrated Gradients get ACC:0.6892031573012591 AUC:0.7692840244536079
Saliency


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37372779846191406
With 369,Saliency get ACC:0.7180124257934709 AUC:0.7648164321169175




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.35017892718315125
With 185,Saliency get ACC:0.7274644306134406 AUC:0.7803431464673823




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.46067023277282715
With 93,Saliency get ACC:0.7327183081464121 AUC:0.7799216754922229




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6284144520759583
With 47,Saliency get ACC:0.5975878041500382 AUC:0.7157406281437589




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6426669359207153
With 24,Saliency get ACC:0.5896005837027538 AUC:0.7096714461014626




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6604412794113159
With 12,Saliency get ACC:0.5760223089423704 AUC:0.6892377041025017




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.6742001175880432
With 6,Saliency get ACC:0.5617779718540301 AUC:0.6918563516366892




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.683847963809967
With 3,Saliency get ACC:0.5507920890588899 AUC:0.6953262323534939
Deep Lift




Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38988780975341797
With 369,Deep Lift get ACC:0.7256044308345402 AUC:0.7777756281990338


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.38448482751846313
With 185,Deep Lift get ACC:0.7274727218457389 AUC:0.7861774102612291


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3997839391231537
With 93,Deep Lift get ACC:0.7224703450258134 AUC:0.7881341410836087


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.43495941162109375
With 47,Deep Lift get ACC:0.7327956929811954 AUC:0.8065462042738539


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4744735062122345
With 24,Deep Lift get ACC:0.7279370308544391 AUC:0.8006856849110628


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5027377009391785
With 12,Deep Lift get ACC:0.709831743259228 AUC:0.7867757608587507


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5300319790840149
With 6,Deep Lift get ACC:0.6892722509037443 AUC:0.7716953911803398


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5924282073974609
With 3,Deep Lift get ACC:0.6648269343444952 AUC:0.6761817769769062
Feature Ablation


               activations. The hooks and attributes will be removed
            after the attribution is finished


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3893316686153412
With 369,Feature Ablation get ACC:0.7148175376145571 AUC:0.7755687785356579


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.39413419365882874
With 185,Feature Ablation get ACC:0.7138806283648584 AUC:0.770451706335607


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.37091678380966187
With 93,Feature Ablation get ACC:0.7205467791326265 AUC:0.7781888079418949


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.42423924803733826
With 47,Feature Ablation get ACC:0.723542677736383 AUC:0.7950365919718761


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.47650599479675293
With 24,Feature Ablation get ACC:0.7037736161933295 AUC:0.7911880783134528


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5035417675971985
With 12,Feature Ablation get ACC:0.7089003614977283 AUC:0.790400411245122


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.522627055644989
With 6,Feature Ablation get ACC:0.7071066915772135 AUC:0.7876891782836044


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5913752913475037
With 3,Feature Ablation get ACC:0.664893264202881 AUC:0.681536531169506
Integrated Gradients


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.39954501390457153
With 369,Integrated Gradients get ACC:0.7190156649015554 AUC:0.7868876924947764


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.39022308588027954
With 185,Integrated Gradients get ACC:0.7226030047425849 AUC:0.78403136296804


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.3915769159793854
With 93,Integrated Gradients get ACC:0.7175564080170689 AUC:0.7712172634511425


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.4469735026359558
With 47,Integrated Gradients get ACC:0.7179488596791845 AUC:0.798691643543341


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.47558239102363586
With 24,Integrated Gradients get ACC:0.7119542987275722 AUC:0.7959983749184696


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.496622234582901
With 12,Integrated Gradients get ACC:0.7074991432393292 AUC:0.7895450324463558


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5303881764411926
With 6,Integrated Gradients get ACC:0.6946007495273998 AUC:0.7747120178648419


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 0.5376461744308472
With 3,Integrated Gradients get ACC:0.6926633649137159 AUC:0.7713029395182242


In [46]:
feature_saliency = np.load('Saliency_feature.npy')

In [47]:
feature_saliency.shape

(73,)

In [48]:
bool_arr = np.array(select_arr, dtype='bool')

In [49]:
X_test[...,bool_arr].shape

(1204, 73)

In [50]:
select_arr.shape

(369,)

In [51]:
print('The best score is:', best_score)
print('best features:', np.where(select_arr==1)[0])

The best score is: 0
best features: [  0   1  14  17  23  24  27  28  29  38  50  51  52  54  55  62  63  73
  74  75  76  79  88  89  90  93  94 125 127 129 130 137 138 147 151 154
 157 158 164 166 193 254 255 256 257 258 259 273 274 275 276 277 278 279
 280 281 289 291 297 298 299 300 301 302 311 323 324 325 326 330 332 334
 368]


In [52]:
from sklearn.ensemble import GradientBoostingClassifier

# XGboost and SVM

In [None]:
clf = GradientBoostingClassifier(n_estimators=500, learning_rate=0.05, random_state=156).fit(X_train, np.squeeze(y_train))
clf.score(X_test, np.squeeze(y_test))

In [None]:
y_pred_proba = clf.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba)
print('Test auc is', auc(fpr, tpr))
acc = balanced_accuracy_score(y_test,np.around(y_pred_proba))
print(f'balanced accuarcy score:{acc}')

In [None]:
from sklearn.svm import SVC

In [None]:
clf = SVC(gamma='auto')

In [None]:
clf.fit(X_train, np.squeeze(y_train))
y_pred_proba = clf.predict(X_test)
fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba)
print('Test auc is', auc(fpr, tpr))
acc = balanced_accuracy_score(y_test,np.around(y_pred_proba))
print(f'balanced accuarcy score:{acc}')

# logistic Regression

In [71]:
clf = LogisticRegression(penalty=None)
clf.fit(X_train, np.squeeze(y_train))
y_pred_proba = clf.predict_proba(X_test)[:, 1]
print(y_pred_proba)
fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba)
print('Test auc is', auc(fpr, tpr))
acc = balanced_accuracy_score(y_test,np.around(y_pred_proba))
print(f'balanced accuarcy score:{acc}')

[0.58345724 0.26040545 0.67795407 ... 0.84154498 0.68576292 0.18908735]
Test auc is 0.7917090440761909
balanced accuarcy score:0.7216080568667986


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# L1 penalty

In [55]:
from sklearn.linear_model import LogisticRegression

In [56]:
clf = LogisticRegression(penalty='l1',solver='liblinear')
clf.fit(X_train, np.squeeze(y_train))
y_pred_proba = clf.predict_proba(X_test)[:, 1]
print(y_pred_proba)
fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba)
print('Test auc is', auc(fpr, tpr))
acc = balanced_accuracy_score(y_test,np.around(y_pred_proba))
print(f'balanced accuarcy score:{acc}')

[0.57961152 0.2762406  0.67486701 ... 0.82669343 0.65260206 0.20646377]
Test auc is 0.7894441557867273
balanced accuarcy score:0.7065677614778292


In [57]:
coef = clf.coef_

array([0.])

In [60]:
np.where(coef!=0)[1]

array([  0,   1,   2,  12,  13,  17,  23,  24,  27,  28,  29,  30,  32,
        38,  45,  46,  48,  49,  50,  54,  55,  62,  63,  64,  73,  74,
        75,  76,  78,  81,  87,  89,  93,  94, 111, 113, 114, 147, 149,
       150, 153, 156, 158, 164, 166, 182, 193, 196, 208, 224, 255, 259,
       263, 273, 274, 276, 277, 280, 281, 284, 289, 290, 291, 309, 310,
       312, 324, 326, 328, 330, 332, 368])

In [61]:
non_zero_coef = coef[:,np.where(coef!=0)[1]]

In [62]:
non_zero_coef.shape

(1, 72)

# RFE logstic regression

In [183]:
from sklearn.feature_selection import RFE

In [None]:
num_list = []
auc_list = []
acc_list = []
feature_list = []
for num_feature in [369,185,93,47,24,12,6,3]:
    estimator = LogisticRegression(penalty=None)
    selector = RFE(estimator, n_features_to_select=5, step=1)
    selector = selector.fit(X_train, np.squeeze(y_train))
    selected_RFE = selector.ranking_
    selected_RFE[selected_RFE != 1] = 0
    bool_arr = np.array(selected_RFE, dtype='bool') 
    X_train_selected = X_train[...,bool_arr]
    X_test_selected = X_test[...,bool_arr]
    clf = LogisticRegression(penalty=None)
    clf.fit(X_train_selected, np.squeeze(y_train))
    
    y_pred_proba_linear = clf.predict_proba(X_test_selected)[:,1]
    fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba_linear)
    
    acc = balanced_accuracy_score(y_test,np.around(y_pred_proba_linear))
    print(f'With {num_feature} in RFE')
    auc_score = auc(fpr, tpr)
    print('Test auc is', auc_score)
    print(f'balanced accuarcy score:{acc}')
    num_list.append(num_feature)
    acc_list.append(acc)
    auc_list.append(auc_list)

dict_method = {}
dict_method['auc'] = auc_list
dict_method['acc'] = acc_list
dict_method['feat'] = feature_list
with open(f'./result/linear.pkl', 'wb') as f:
    pickle.dump(dict_method, f)

# RFE svc

In [192]:
from sklearn.svm import SVC

In [196]:
estimator = SVC(                    
            kernel = 'linear',
            probability = True,
            random_state = 42) 
selector = RFE(estimator, n_features_to_select=3, step=1)
selector = selector.fit(X_train, np.squeeze(y_train))

selected_RFE = selector.ranking_
selected_RFE[selected_RFE != 1] = 0

bool_arr = np.array(selected_RFE, dtype='bool') 
X_train_selected = X_train[...,bool_arr]
X_test_selected = X_test[...,bool_arr]

X_train_selected.shape

clf = LogisticRegression(penalty=None)
clf.fit(X_train_selected, np.squeeze(y_train))

y_pred_proba_linear = clf.predict_proba(X_test_selected)[:,1]
fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba_linear)
print('Test auc is', auc(fpr, tpr))
acc = balanced_accuracy_score(y_test,np.around(y_pred_proba_linear))
print(f'balanced accuarcy score:{acc}')

Test auc is 0.48835772798125077
balanced accuarcy score:0.4937235371502482


# Decision Tree

In [195]:
estimator = DecisionTreeClassifier()
selector = RFE(estimator, n_features_to_select=3, step=1)
selector = selector.fit(X_train, np.squeeze(y_train))

selected_RFE = selector.ranking_
selected_RFE[selected_RFE != 1] = 0

bool_arr = np.array(selected_RFE, dtype='bool') 
X_train_selected = X_train[...,bool_arr]
X_test_selected = X_test[...,bool_arr]

X_train_selected.shape

clf = LogisticRegression(penalty=None)
clf.fit(X_train_selected, np.squeeze(y_train))

y_pred_proba_linear = clf.predict_proba(X_test_selected)[:,1]
fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba_linear)
print('Test auc is', auc(fpr, tpr))
acc = balanced_accuracy_score(y_test,np.around(y_pred_proba_linear))
print(f'balanced accuarcy score:{acc}')

Test auc is 0.7285395270681098
balanced accuarcy score:0.664920901643875


array([ 13,  16, 276])

# Curve with repetition

In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.feature_selection import RFE

In [73]:
# def get_estimator(method):
#     if method =='linear':
#         estimator = LogisticRegression(penalty=None)
#     elif method == 'SVM':
#         estimator = SVC(kernel = 'linear',probability = True) 
#     elif method =='DT':
#         estimator = DecisionTreeClassifier()
#     else:
#         raise 'wrong method'
#     return estimator
# for repeat in range(2,6):
#     # for method in ['linear','DT']:
#     for method in ['DT']:
#         auc_list = []
#         acc_list = []
#         feature_list = []
#         for num_feature in [369,185,93,47,24,12,6,3]:
#             estimator = get_estimator(method)
#             selector = RFE(estimator, n_features_to_select=num_feature)
#             selector = selector.fit(X_train, np.squeeze(y_train))
#             selected_RFE = selector.ranking_
#             selected_RFE[selected_RFE != 1] = 0
#             bool_arr = np.array(selected_RFE, dtype='bool') 
#             X_train_selected = X_train[...,bool_arr]
#             X_test_selected = X_test[...,bool_arr]
#             clf = get_estimator(method)
#             clf.fit(X_train_selected, np.squeeze(y_train))
            
#             y_pred_proba_linear = clf.predict_proba(X_test_selected)[:,1]
#             fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba_linear)
            
#             acc = balanced_accuracy_score(y_test,np.around(y_pred_proba_linear))
#             print(f'With {num_feature} in RFE')
#             auc_score = auc(fpr, tpr)
#             print('Test auc is', auc_score)
#             print(f'balanced accuarcy score:{acc}')

#             acc_list.append(acc)
#             auc_list.append(auc_score)
#             feature_list.append(np.where(selected_RFE==1)[0])
#             # print(np.where(selected_RFE==1)[0])
#             print(f'With{num_feature},{method} get ACC:{acc} AUC:{auc_score}')
#         dict_method = {}
#         dict_method['auc'] = auc_list
#         dict_method['acc'] = acc_list
#         dict_method['feat'] = feature_list
#         print(dict_method)
#         # with open(f'./result/{method}_{repeat}.pkl', 'wb') as f:
#         #     pickle.dump(dict_method, f)

In [74]:
def get_result(name):
    acc_curve = []
    auc_curve = []
    feature_curve = []
    for i in range(1,6):
        with open(f'./result/{name}_{i}.pkl','rb') as file:
            i_th_result = pickle.load(file)
            acc_curve.append(i_th_result['acc'])
            auc_curve.append(i_th_result['auc'])
            feature_curve.append(i_th_result['feat'])
    return acc_curve, auc_curve, feature_curve

In [64]:
import time

def get_estimator(method):
    if method =='linear':
        estimator = LogisticRegression(penalty=None)
    elif method == 'SVM':
        estimator = SVC(kernel = 'linear',probability = True) 
    elif method =='DT':
        estimator = DecisionTreeClassifier()
    else:
        raise 'wrong method'
    return estimator
    
for method in ['DT','SVM','linear']:
    for repeat in range(1,6):
        reduce_rate = 0.5
        num_cur_features = X_train.shape[-1]
        select_arr = np.ones(num_cur_features)
        remaining_inds = np.nonzero(select_arr)[0]
        num_select = 3
        num_list = []
        auc_list = []
        acc_list = []
        feature_list = []
        resuming = True
        start_time = time.time()
        while resuming:
            bool_arr = np.array(select_arr, dtype='bool') 
            X_train_selected = X_train[...,bool_arr]
            X_test_selected = X_test[...,bool_arr]
            clf = get_estimator(method)
            clf.fit(X_train_selected, np.squeeze(y_train))
            
            y_pred_proba = clf.predict_proba(X_test_selected)[:,1]
            acc = balanced_accuracy_score(y_test,np.around(y_pred_proba))
            fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba)
            

            auc_score = auc(fpr, tpr)
            acc = balanced_accuracy_score(y_test,np.around(y_pred_proba))
            print(f'With {num_cur_features},{method} get ACC:{acc} AUC:{auc_score}')
            acc_list.append(acc)
            auc_list.append(auc_score)
            feature_list.append(np.where(select_arr==1)[0])
            
            num_remove = int(num_cur_features*(1-reduce_rate))
            if num_cur_features - num_remove<=num_select:
                num_remove = num_cur_features - num_select
            num_cur_features -= num_remove
            # print(num_cur_features)
            estimator = get_estimator(method)
            if num_remove == 0:
                resuming = False
                continue
            selector = RFE(estimator, n_features_to_select=num_cur_features,step=num_remove)
            selector = selector.fit(X_train_selected, np.squeeze(y_train))
            selected_RFE = selector.ranking_
            selected_RFE[selected_RFE != 1] = 0
            bool_arr = np.array(selected_RFE, dtype='bool') 

            inds = np.where(selected_RFE==0)[0]
            # print(inds)
            inds_to_remove = remaining_inds[inds]
            # print(inds_to_remove)
            select_arr[inds_to_remove] = 0
            remaining_inds = np.nonzero(select_arr)[0]
            
            # print(remaining_inds,num_cur_features)

        elapsed_time = time.time() - start_time
        dict_method = {}
        dict_method['auc'] = auc_list
        dict_method['acc'] = acc_list
        dict_method['feat'] = feature_list
        dict_method['time'] = elapsed_time
        with open(f'./result/{method}_{repeat}.pkl', 'wb') as f:
            pickle.dump(dict_method, f)

With 369,DT get ACC:0.6675768597234045 AUC:0.6717791326265518
With 185,DT get ACC:0.6566628342748488 AUC:0.6592911549133842
With 93,DT get ACC:0.6724355218501608 AUC:0.6768519849210122
With 47,DT get ACC:0.6822191759620593 AUC:0.6865126524204871
With 24,DT get ACC:0.681420453917331 AUC:0.6858134251633372
With 12,DT get ACC:0.6812822667123606 AUC:0.6822260853223079
With 6,DT get ACC:0.675428656709818 AUC:0.6773162939297124
With 3,DT get ACC:0.648542954110793 AUC:0.6428524050101153
With 369,DT get ACC:0.6666454779619045 AUC:0.6702715102203257
With 185,DT get ACC:0.6589263406922626 AUC:0.6628522391854693
With 93,DT get ACC:0.6627209613407475 AUC:0.6668624318737081
With 47,DT get ACC:0.6816194434924882 AUC:0.6852427120068099
With 24,DT get ACC:0.6721674386725185 AUC:0.6760919552936755
With 12,DT get ACC:0.6578567717257924 AUC:0.6581925666338704
With 6,DT get ACC:0.6708380777607039 AUC:0.672612401472523
With 3,DT get ACC:0.6468791801629503 AUC:0.6410283339045071
With 369,DT get ACC:0.670904

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 185,linear get ACC:0.7136871662779 AUC:0.7916371867296064
With 93,linear get ACC:0.7084968548592149 AUC:0.7908965033109654


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 47,linear get ACC:0.7082315354256719 AUC:0.7868490000773849
With 24,linear get ACC:0.6686491924339741 AUC:0.7397727649601469
With 12,linear get ACC:0.5834816542666681 AUC:0.6410559713455013
With 6,linear get ACC:0.5762931558641122 AUC:0.6382811722696973
With 3,linear get ACC:0.5557972296229148 AUC:0.6026426921078525
With 369,linear get ACC:0.7216080568667986 AUC:0.7917104259482406


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 185,linear get ACC:0.7136871662779 AUC:0.7916371867296064
With 93,linear get ACC:0.7084968548592149 AUC:0.7908965033109654


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 47,linear get ACC:0.7082315354256719 AUC:0.7868490000773849
With 24,linear get ACC:0.6686491924339741 AUC:0.7397727649601469
With 12,linear get ACC:0.5834816542666681 AUC:0.6410559713455013
With 6,linear get ACC:0.5762931558641122 AUC:0.6382811722696973
With 3,linear get ACC:0.5557972296229148 AUC:0.6026426921078525
With 369,linear get ACC:0.7216080568667986 AUC:0.7917104259482406


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 185,linear get ACC:0.7136871662779 AUC:0.7916371867296064


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 93,linear get ACC:0.7084968548592149 AUC:0.7908965033109654
With 47,linear get ACC:0.7082315354256719 AUC:0.7868490000773849


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


With 24,linear get ACC:0.6686491924339741 AUC:0.7397727649601469
With 12,linear get ACC:0.5834816542666681 AUC:0.6410559713455013
With 6,linear get ACC:0.5762931558641122 AUC:0.6382811722696973
With 3,linear get ACC:0.5557972296229148 AUC:0.6026426921078525


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 369,linear get ACC:0.7216080568667986 AUC:0.7917104259482406


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 185,linear get ACC:0.7136871662779 AUC:0.7916371867296064
With 93,linear get ACC:0.7084968548592149 AUC:0.7908965033109654


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 47,linear get ACC:0.7082315354256719 AUC:0.7868490000773849
With 24,linear get ACC:0.6686491924339741 AUC:0.7397727649601469
With 12,linear get ACC:0.5834816542666681 AUC:0.6410559713455013
With 6,linear get ACC:0.5762931558641122 AUC:0.6382811722696973
With 3,linear get ACC:0.5557972296229148 AUC:0.6026426921078525
With 369,linear get ACC:0.7216080568667986 AUC:0.7917104259482406


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 185,linear get ACC:0.7136871662779 AUC:0.7916371867296064
With 93,linear get ACC:0.7084968548592149 AUC:0.7908965033109654


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

With 47,linear get ACC:0.7082315354256719 AUC:0.7868490000773849
With 24,linear get ACC:0.6686491924339741 AUC:0.7397727649601469
With 12,linear get ACC:0.5834816542666681 AUC:0.6410559713455013
With 6,linear get ACC:0.5762931558641122 AUC:0.6382811722696973
With 3,linear get ACC:0.5557972296229148 AUC:0.6026426921078525


In [89]:
name = 'linear'
Linear_acc, Linear_auc, Linear_feat = get_result(name)

In [90]:
name = 'DT'
DT_acc, DT_auc, DT_feat = get_result(name)

In [91]:
name = 'SVM'
SVM_acc, SVM_auc, SVM_feat = get_result(name)

In [92]:
for i in range(len(SVM_feat[0])):
    print(SVM_feat[0][i].shape)

(369,)
(185,)
(93,)
(47,)
(24,)
(12,)
(6,)
(3,)


In [108]:
# Linear_feat = [284,328,368]
# DT_feat = [1,182,368]
# SVM_feat = [13,16,276]
# feats_RFE = [Linear_feat, DT_feat, SVM_feat]
# name = ['linear','DT','SVM']
feats_RFE = [ DT_feat]
name = ['DT']

In [109]:
for j in range(len(feats_RFE)):

    repeat_times = 6 if name[j]=='DT' else 2
    print(repeat_times)
    for repeat in range(1,repeat_times):
        aucs = []
        accs = []
        method_feat = feats_RFE[j]
        label = name[j]
        for step in range(len(method_feat[0])):
            feat = method_feat[repeat-1][step]
            X_train_select = X_train[...,feat]
            X_test_select = X_test[...,feat]
            print(X_train_select.shape)
            train_set = TensorDataset(Tensor(X_train_select), Tensor(y_train))
            train_loader = DataLoader(train_set, batch_size=1000, shuffle=True)
            test_set = TensorDataset(Tensor(X_test_select), Tensor(y_test))
            test_loader = DataLoader(test_set, batch_size=y_test.shape[0],shuffle=False)
            hidden_layer_sizes = (100,100,100)
            num_epochs = 500
            model = MLPClassifier(X_train_select.shape[-1], hidden_layer_sizes, p=0.0)
            model.to(device)
            model.train()
            # print(model)
            criterion = nn.BCELoss(reduction='mean')
            optimizer = torch.optim.Adam(model.parameters(), 0.001)
            train(model, train_loader, num_epochs, optimizer)
            model.eval()
            y_pred = model(Tensor(X_test_select).to(device)).detach().cpu().numpy()
            
            y_test =y_test.astype(int)
            
            fpr, tpr, thresholds = roc_curve(y_test,y_pred)
            ACC = balanced_accuracy_score(y_test,np.around(y_pred))
            auc_score = auc(fpr, tpr)
            # print(f'With Drop out{p}')
            # print('Test auc is', auc(fpr, tpr))
            # print('Balanced_accuracy_score is', ACC)
            print(f'With feat {feat} from {label}, test auc is:{auc_score} test acc is:{ACC}')
            aucs.append(auc_score)
            accs.append(ACC)
        dict_method = {}
        dict_method['auc'] = aucs
        dict_method['acc'] = accs
        with open(f'./result/feature_{label}_results_{repeat}.pkl', 'wb') as f:
            pickle.dump(dict_method, f)
    # y_pred_train = model(Tensor(X_train).to(device)).detach().cpu().numpy()
    
    
    # fpr, tpr, thresholds = roc_curve(y_train,y_pred_train)
    # ACC = balanced_accuracy_score(y_train,np.around(y_pred_train))
    # print(f'With Drop out:{p}')
    # print('Train auc is', auc(fpr, tpr))
    # print('Balanced_accuracy_score of Train is', ACC)

6
(4812, 369)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.7008539736270905
With feat [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 2

Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5745718479156494
With feat [  0   1   2   4   7   8   9  10  11  12  13  14  15  16  17  18  19  20
  21  22  23  27  39  48  49  50  51  52  53  54  55  56  57  58  59  60
  61  62  63  73  74  76  79  81  82  83  84  85  86  87  88  89  90  91
  92  93  94  96 111 112 113 114 115 116 117 118 119 120 121 122 123 124
 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
 143 144 145 146 147 149 150 151 154 157 159 160 161 162 163 164 166 169
 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 187 190
 193 240 241 254 255 256 257 258 259 273 274 275 276 277 278 279 280 283
 289 291 292 297 298 299 300 301 302 303 304 310 311 320 321 322 323 324
 325 326 327 328 329 330 331 332 334 335 336 337 340 343 346 347 348 349
 350 351 355 356 368] from DT, test auc is:0.7442997777949745 test acc is:0.7230092751251976
(4812, 93)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6476767659187317
With feat [  0   1   2   4  10  11  13  14  15  16  17  20  23  27  39  59  60  61
  62  63  73  74  76  79  81  82  83  84  85  86  87  88  89  90  93 112
 113 116 125 127 129 130 137 138 147 150 151 154 157 159 164 166 173 177
 182 187 190 193 240 241 254 255 258 259 274 275 276 277 278 279 280 289
 291 292 297 298 299 300 301 302 311 323 324 325 326 328 329 330 331 337
 340 343 368] from DT, test auc is:0.7582635948572249 test acc is:0.7198751893164708
(4812, 47)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.8188439011573792
With feat [  0   1   2  10  11  14  15  16  17  76  89 113 125 127 137 138 147 151
 154 164 166 182 187 190 193 274 275 276 277 278 279 280 289 292 297 299
 300 302 323 324 325 326 328 329 330 331 368] from DT, test auc is:0.7773804127928187 test acc is:0.7180151895375704
(4812, 24)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.0462221205234528
With feat [  1  11  15 137 154 164 182 190 193 274 275 276 277 289 297 323 324 325
 326 328 329 330 331 368] from DT, test auc is:0.7734766242524073 test acc is:0.7163431243574294
(4812, 12)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.4176270067691803
With feat [  1  11 164 182 190 277 324 326 328 329 331 368] from DT, test auc is:0.7797917795195507 test acc is:0.7218042826978565
(4812, 6)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.5638334155082703
With feat [  1 182 326 329 331 368] from DT, test auc is:0.7788728346064981 test acc is:0.7232745945587407
(4812, 3)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.7136850357055664
With feat [  1 182 368] from DT, test auc is:0.7606680522237086 test acc is:0.7164149817040141
(4812, 369)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.651178389787674
With feat [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 23

Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.621159702539444
With feat [  0   1   2   3   4  10  11  12  13  14  15  16  17  19  23  24  27  30
  39  46  54  65  67  68  69  70  71  72  73  74  75  76  77  78  79  80
  81  82  88  89  90  93  98 112 113 114 123 125 126 127 130 137 138 147
 149 150 151 154 157 158 164 166 182 184 185 186 187 188 189 190 191 192
 193 194 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
 255 257 273 274 275 276 277 278 279 280 289 291 292 297 298 299 300 301
 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
 320 321 322 323 324 325 326 327 328 329 330 331 332 336 337 338 339 340
 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
 359 360 361 367 368] from DT, test auc is:0.7638684678908211 test acc is:0.7284013398631395
(4812, 93)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6388844549655914
With feat [  0   1   2   3   4  10  11  12  13  14  16  17  23  73  75  76  79  82
  88  89  90  93  98 112 113 114 123 125 126 127 130 137 138 147 149 150
 151 154 157 158 164 166 182 187 190 193 224 240 241 255 273 274 275 276
 277 278 279 280 289 291 292 297 299 300 301 302 310 311 320 321 322 323
 324 325 326 327 328 329 330 331 332 336 337 338 340 341 342 343 344 345
 346 355 368] from DT, test auc is:0.7688058967244104 test acc is:0.7313253811203113
(4812, 47)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.7991389632225037
With feat [  0   1   2  11  12  13  14  16  17  23  76  79  88 125 126 137 151 154
 157 164 166 182 187 190 193 240 274 275 276 277 279 280 289 292 297 299
 300 301 302 323 324 325 326 328 329 331 368] from DT, test auc is:0.7898448986811413 test acc is:0.7363885603104238
(4812, 24)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.9545502960681915
With feat [  1  11  12 154 157 164 182 190 193 274 275 276 277 279 289 297 323 324
 325 326 328 329 331 368] from DT, test auc is:0.7785094022574262 test acc is:0.7169483843151995
(4812, 12)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.4516369998455048
With feat [  1  11 164 182 190 289 324 326 328 329 331 368] from DT, test auc is:0.7867370684413588 test acc is:0.725330820168699
(4812, 6)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.5725897550582886
With feat [  1 182 326 329 331 368] from DT, test auc is:0.7779055241717059 test acc is:0.7257978929214985
(4812, 3)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.6992295384407043
With feat [  1 182 368] from DT, test auc is:0.7620112318560199 test acc is:0.7178079087301148
(4812, 369)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6724514961242676
With feat [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 2

Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6267584562301636
With feat [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  25  26  27  28  29  30  48  54  62  63  73  76
  77  79  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100
 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
 155 156 157 158 159 160 161 162 163 164 165 166 168 182 187 190 193 240
 241 255 256 257 258 259 273 274 275 276 277 279 280 286 289 290 292 297
 298 299 300 301 302 310 311 321 322 323 324 325 326 327 328 329 330 331
 332 333 334 335 336 337 338 339 340 341 342 343 345 346 347 348 349 350
 351 364 365 367 368] from DT, test auc is:0.7441892280309982 test acc is:0.7198116232021845
(4812, 93)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6493271589279175
With feat [  0   1   2   3   4  10  11  12  13  14  15  16  17  23  27  29  76  79
  86  87  88  89  90  91  92  93  94  95  96  97  99 108 113 115 118 125
 126 127 129 130 137 138 147 148 151 154 157 158 160 164 166 182 187 190
 193 240 241 255 274 275 276 277 279 280 286 289 290 292 297 298 299 300
 301 302 310 311 321 322 323 324 325 326 327 328 329 330 331 332 335 337
 340 342 368] from DT, test auc is:0.7658915285715865 test acc is:0.7304022905911096
(4812, 47)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.8299391269683838
With feat [  0   1   2  10  11  13  14  15  16  17  76  79  88 126 127 129 137 148
 151 154 157 164 166 182 187 190 193 274 275 276 277 279 280 289 292 297
 299 300 323 324 325 326 328 329 330 331 368] from DT, test auc is:0.7806416308301183 test acc is:0.7262732569065966
(4812, 24)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.0071535408496857
With feat [  1  11  15  16 137 164 182 187 190 193 274 275 276 277 289 297 323 324
 325 326 328 329 331 368] from DT, test auc is:0.7758617354101949 test acc is:0.7174845506704843
(4812, 12)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.453231304883957
With feat [  1 164 182 277 289 323 324 326 328 329 331 368] from DT, test auc is:0.7878784947544136 test acc is:0.7194164077959693
(4812, 6)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.6184103190898895
With feat [  1 182 328 329 331 368] from DT, test auc is:0.7808599666139714 test acc is:0.7278624097637552
(4812, 3)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.6948357820510864
With feat [  1 182 368] from DT, test auc is:0.7594492410758703 test acc is:0.7161441347822722
(4812, 369)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.594884604215622
With feat [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 23

Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5524109303951263
With feat [  0   1   2   3   4  10  11  12  13  14  15  16  17  18  19  20  21  22
  24  25  26  27  28  29  30  31  53  55  60  61  62  63  65  67  73  74
  76  79  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97
  98  99 100 103 104 105 106 107 108 113 117 118 119 120 121 122 123 124
 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 164
 166 168 170 177 182 187 190 193 240 241 254 255 258 259 273 274 275 276
 277 278 279 280 282 283 284 285 286 287 288 289 290 291 292 293 294 295
 296 297 298 299 300 301 302 303 304 311 320 321 322 323 324 325 326 327
 328 329 330 331 334 335 336 337 338 340 343 346 347 348 349 355 356 357
 358 359 360 361 368] from DT, test auc is:0.7536661065478625 test acc is:0.7201460362382126
(4812, 93)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.5514217913150787
With feat [  0   1   2   3   4  10  11  12  13  14  15  16  17  18  21  22  27  62
  74  76  79  88  89  90  91  93  95  97 113 123 125 127 129 130 137 138
 140 147 148 150 151 152 153 154 157 164 166 182 187 190 193 240 241 255
 258 259 273 274 275 276 277 278 279 280 283 289 291 292 297 298 299 301
 302 311 323 324 325 326 328 329 330 331 334 337 340 343 347 357 358 359
 360 361 368] from DT, test auc is:0.7628749018870845 test acc is:0.7357197342383675
(4812, 47)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.8386216163635254
With feat [  0   1   2   4  10  11  13  14  15  16  17  62  76  79  89  90 123 125
 147 151 154 164 166 182 187 190 193 274 275 276 277 279 280 289 291 292
 297 302 323 324 325 326 328 329 330 331 368] from DT, test auc is:0.7795969355605425 test acc is:0.7214145947798402
(4812, 24)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.0982359647750854
With feat [  1  15  62 164 182 187 190 193 274 275 276 277 279 289 302 323 324 325
 326 328 329 330 331 368] from DT, test auc is:0.7833901743369778 test acc is:0.7182749814829146
(4812, 12)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.419725686311722
With feat [  1  15 164 182 190 289 324 326 328 329 331 368] from DT, test auc is:0.7807729086748401 test acc is:0.7193500779375837
(4812, 6)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.591853052377701
With feat [  1 182 326 329 331 368] from DT, test auc is:0.7752025824424863 test acc is:0.724399438407199
(4812, 3)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.689613997936249
With feat [  1 182 368] from DT, test auc is:0.7596537581392265 test acc is:0.7188056203500006
(4812, 369)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6286557912826538
With feat [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 2

Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.9548037648200989
With feat [  0   1   2   3   4  10  11  12  13  14  15  16  17  18  20  21  22  23
  24  25  26  27  28  29  36  39  48  50  51  52  53  54  55  56  57  58
  59  60  61  62  63  64  73  74  76  79  81  82  83  84  85  86  87  88
  89  90  91  92  93  94  95  96  98  99 105 113 114 115 116 117 118 119
 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 154 162 163
 164 165 166 167 169 172 173 174 175 176 177 178 179 180 181 182 183 184
 185 186 187 188 189 190 193 240 241 255 257 258 259 273 274 275 276 277
 278 279 280 282 286 289 292 297 298 299 300 302 310 311 320 321 322 323
 324 325 326 327 328 329 330 331 332 333 334 335 336 337 340 343 347 348
 349 350 360 361 368] from DT, test auc is:0.7503123030832329 test acc is:0.7238660357960136
(4812, 93)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.6275436282157898
With feat [  0   1   2   3   4  10  11  12  13  14  15  16  17  18  23  27  39  48
  51  52  53  54  55  56  57  63  64  73  76  79  85  88  89  90  93  95
 105 113 116 123 125 126 127 129 137 138 139 147 149 151 154 164 166 167
 175 178 182 187 190 193 240 241 257 258 273 274 275 276 277 278 279 280
 282 289 297 298 299 300 310 311 323 324 325 326 328 329 330 331 335 337
 340 343 368] from DT, test auc is:0.7646934455044939 test acc is:0.7244049658953978
(4812, 47)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 1.7699767053127289
With feat [  0   1   2   4  12  13  16  17  76  88  89  90 125 126 137 151 164 166
 182 187 190 193 240 273 274 275 276 277 279 280 289 297 298 299 300 311
 323 324 325 326 328 329 330 331 335 340 368] from DT, test auc is:0.7753849895530472 test acc is:0.7274727218457389
(4812, 24)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.014258861541748
With feat [  1  12  17 125 137 164 182 190 193 275 276 277 279 289 297 323 324 325
 326 328 329 330 331 368] from DT, test auc is:0.7725645886996031 test acc is:0.7123578053660855
(4812, 12)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.4481990337371826
With feat [  1 164 182 190 277 289 324 326 328 329 331 368] from DT, test auc is:0.7796425373381828 test acc is:0.7158926340692262
(4812, 6)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.573814183473587
With feat [  1 182 326 329 331 368] from DT, test auc is:0.7751224338636037 test acc is:0.7214726334059276
(4812, 3)


Train:   0%|          | 0/500 [00:00<?, ?epoch/s]

train loss: 2.7070230841636658
With feat [  1 182 368] from DT, test auc is:0.7602451993764994 test acc is:0.7203367345810716


In [104]:
dict_method

{'auc': [0.7531175033441303,
  0.7537352001503476,
  0.7816918535878925,
  0.7933852548724809,
  0.7974092662812164,
  0.7252119791724245,
  0.7132808958952874,
  0.5305545728909868],
 'acc': [0.7119681174480692,
  0.7176199741313553,
  0.7146959328741833,
  0.7292055893960666,
  0.721420122268039,
  0.6973202737212156,
  0.691997302585759,
  0.5245033551853368]}

In [105]:
label

'DT'