In [None]:
pip install verstack

In [2]:
pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.5/44.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0
[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetClassifier

from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import resample, shuffle


import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import random
import joblib

import warnings
warnings.filterwarnings(action='ignore') 

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Hyperparameter setting

In [None]:
CFG = {
    'EPOCHS': 30,
    'LEARNING_RATE':1e-2,
    # 배치 사이즈는 파라미터로써 변환 가능
    'BATCH_SIZE':256,
    'SEED':42
}

# Fixed RandomSeed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    # 환경변수 설정은 캐글 노트북에선 안해도 됨.
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

# 1~9번 까지의 노트북 연결작업

### Data load and divide

In [None]:
# 전처리 된 train, test
train = pd.read_csv('../input/daconio-117/train_df.csv')
test_df = pd.read_csv('../input/daconio-117/test_df.csv')
stacked_no_al_train = pd.read_csv('/kaggle/input/daacon-1-without-al/stacked_train_1_without_AL')
stacked_train_2 = pd.read_csv('../input/dacon-pipline-19/stacked_train_2')
stacked_valid_2 = pd.read_csv('../input/dacon-pipline-19/stacked_valid_2')

In [None]:
train_col = train.columns
train_col = train_col.drop(['AL', 'Y_LABEL'])
test_col = test_df.columns
test_col

In [None]:
stacked_no_al_train.drop(train_col, axis=1, inplace=True)
stacked_train_2.drop(test_col, axis=1, inplace=True)
stacked_valid_2.drop(test_col, axis=1, inplace=True)

In [None]:
feature = train.drop(['Y_LABEL'], axis=1)
target = train['Y_LABEL']

stacked_train_1, stacked_valid_1, y_train, y_valid = train_test_split(stacked_no_al_train, target, test_size=0.2, random_state=42)

In [None]:
# DataLoader사용할 고정된 index값이 필요함.
stacked_train_1.reset_index(inplace=True, drop=True)
stacked_valid_1.reset_index(drop=True, inplace=True)
y_train.reset_index(inplace=True, drop=True)
y_valid.reset_index(drop=True, inplace=True)

In [None]:
pd.merge(stacked_train_1, stacked_train_2,left_index=True, right_index=True)

# bootstrap

In [None]:
y_train.value_counts()

In [None]:
stacked_train = pd.merge(stacked_train_1, stacked_train_2,left_index=True, right_index=True)
stacked_train['Y_LABEL'] = y_train

train_target_0 = stacked_train.loc[stacked_train.Y_LABEL ==0]
train_target_1 = stacked_train.loc[stacked_train.Y_LABEL ==1]

num = 1
X_sampled_target_0, y_sampled_target_0 = train_target_0.iloc[:, :-1], train_target_0.iloc[:, -1]
X_sampled_target_1, y_sampled_target_1 = resample(train_target_1.iloc[:, :-1], train_target_1.iloc[:,-1], replace=True, n_samples=int((10285)*0.15))

X_sampled_target_0['Y_LABEL'] = y_sampled_target_0
X_sampled_target_1['Y_LABEL'] = y_sampled_target_1

X_sampled = pd.concat([X_sampled_target_0, X_sampled_target_1])
X_sampled= shuffle(X_sampled)
X_sampled.reset_index(drop=True, inplace=True)

y_sampled = X_sampled.Y_LABEL
X_sampled = X_sampled.drop(['Y_LABEL'], axis=1)

In [None]:
X_sampled_1 = X_sampled.iloc[:,:-112]
X_sampled_2 = X_sampled.iloc[:,112:]

### teacher model 만들기

### tabnet teacher model build

In [None]:
# stacked_train_1으로 tabnet내의 train_valid를 나눈다.
X_tabnet_train, X_tabnet_valid, y_tabnet_train, y_tabnet_valid = train_test_split(X_sampled_1, y_sampled, test_size=0.2, random_state=42)
X_tabnet_train = X_tabnet_train.to_numpy()
X_tabnet_valid = X_tabnet_valid.to_numpy()

In [None]:
max_epochs = CFG['EPOCHS']
batch_size = CFG['BATCH_SIZE']
# 학습률 우선 깃허브에 있는것을 따라함.
tabnet = TabNetClassifier(optimizer_fn=torch.optim.Adam, # Any optimizer works here
                       optimizer_params=dict(lr=2e-2),
                       scheduler_fn=torch.optim.lr_scheduler.OneCycleLR,
                       scheduler_params={"is_batch_level":True,
                                         "max_lr":5e-2,
                                         "steps_per_epoch":int(X_tabnet_train.shape[0] / batch_size)+1,
                                         "epochs":max_epochs
                                          },
                       mask_type='entmax', # "sparsemax",
                      )

In [None]:
# loss_fn 기본값이 nn.BCELoss이다.
tabnet.fit(
    X_train=X_tabnet_train, y_train=y_tabnet_train,
    eval_set=[(X_tabnet_train, y_tabnet_train), (X_tabnet_valid, y_tabnet_valid)],
    eval_name=['train', 'val'],
    eval_metric=['logloss'],
    max_epochs=max_epochs , patience=0.0001,
    batch_size=batch_size,
    virtual_batch_size=128,
    num_workers=0,
    weights=1,
    drop_last=False
) 

In [None]:
preds = tabnet.predict(stacked_valid_1.to_numpy())
valid_score = f1_score(y_valid, preds, average='macro')
print(valid_score)

## lgb teacher model build

In [None]:
import pandas as pd
from verstack import LGBMTuner

# tune the hyperparameters and fit the optimized model
ml_1 = LGBMTuner(metric = 'f1_macro', seed=42, device_type = 'gpu') # <- the only required argument
ml_1.fit(X_sampled_1, y_sampled)

In [None]:
preds_1 = ml_1.predict(stacked_valid_1)
y_valid

In [None]:
f1_score(preds_1, y_valid, average="macro")

In [None]:
teacher_model_list = [tabnet, ml_1]

In [None]:
y_train_df = pd.DataFrame(y_sampled)
y_valid_df = pd.DataFrame(y_valid)

for num in range(len(teacher_model_list)):
    model = teacher_model_list[num]
    if num == 0:
        y_train_df['{}_pred_prob'.format(num)] = model.predict_proba(X_sampled_1.to_numpy())[:,1]
        y_train_df['{}_pred'.format(num)] =  model.predict(X_sampled_1.to_numpy())
        y_valid_df['{}_pred_prob'.format(num)] = model.predict_proba(stacked_valid_1.to_numpy())[:,1]
        y_valid_df['{}_pred'.format(num)] =  model.predict(stacked_valid_1.to_numpy())
    elif num == 1:
        y_train_df['{}_pred_prob'.format(num)] = model.predict_proba(X_sampled_1)
        y_train_df['{}_pred'.format(num)] = model.predict(X_sampled_1)
        y_valid_df['{}_pred_prob'.format(num)] = model.predict_proba(stacked_valid_1)
        y_valid_df['{}_pred'.format(num)] = model.predict(stacked_valid_1)
    else:
        pass

In [None]:
y_train_df['pred_prob'] = 0
y_valid_df['pred_prob'] = 0
pred_prob_num = 2
for i in range(pred_prob_num):
    y_train_df['pred_prob'] = y_train_df['pred_prob'] + y_train_df.iloc[:,2*i+1]
    y_valid_df['pred_prob'] = y_valid_df['pred_prob'] + y_valid_df.iloc[:,2*i+1]
y_train_df['pred_prob'] = y_train_df['pred_prob']/pred_prob_num
y_valid_df['pred_prob'] = y_valid_df['pred_prob']/pred_prob_num

In [None]:
y_train_df = pd.DataFrame(y_sampled)
y_valid_df = pd.DataFrame(y_valid)

# CustomDataset

In [None]:
# stacked_train_1과 stacked_train_2가 이미 준비가 되어 있는 상태이다.
# stacked_train_1은 lgbm만을 사용하므로 torch에 넣은후 다시 numpy배열로 바꾸어 주어야 하는지 훈련이 되는지를 통해 알아본다.
class CustomDataset(Dataset):
    def __init__(self, teacher_X, student_X, data_y):
        super(CustomDataset, self).__init__()
        self.teacher_X = teacher_X
        self.student_X = student_X
        self.data_y = data_y
        
    def __len__(self):
        return len(self.teacher_X)
    
    def __getitem__(self, index):
        if self.data_y is None :
            test_X = torch.Tensor(self.teacher_X.iloc[index])
            return test_X
        else:
            teacher_X = torch.Tensor(self.teacher_X.iloc[index]) 
            student_X = torch.Tensor(self.student_X.iloc[index]) 
            y = self.data_y.values[index] 

            return teacher_X, student_X, y

In [None]:
train_dataset = CustomDataset(X_sampled_1, X_sampled_2, y_train_df)
valid_dataset= CustomDataset(stacked_valid_1, stacked_valid_2, y_valid_df)

In [None]:
# 배치 훈련을 위한 DataLoader함수 사용
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

# Define Student Model

In [None]:
class Student(nn.Module):
    def __init__(self):
        super(Student, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=112, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        output = self.classifier(x)
        return output

In [None]:
student_model = Student()

student_model.eval()
optimizer = torch.optim.Adam(student_model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=0.001, threshold_mode='abs',min_lr=1e-8, verbose=True)

In [None]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

In [None]:
# 완전한 회귀로 할꺼면 이 함수 사용
def competition_metric_reg(true, pred):
    return log_loss(true, pred)

# Define Knowledge distillation Loss

In [None]:
# BCELoss로 sotf_target과 hard_target에 대한 student_pred의 크로스 엔트로피를 구한다.
# 여기서 alpha는 하나의 파라미터가 된다.
def distillation(student_logits, labels, teacher_logits, alpha):
    distillation_loss = nn.BCELoss()(student_logits, teacher_logits)
    student_loss = nn.BCELoss()(student_logits, labels.reshape(-1, 1))
    return alpha * student_loss + (1-alpha) * distillation_loss

In [None]:
#reg
def distillation_reg(student_logits, labels, teacher_logits, alpha):
    distillation_loss = nn.BCELoss()(student_logits, teacher_logits)
    student_loss = nn.MSELoss()(student_logits, labels.reshape(-1, 1))
    return alpha * student_loss + (1-alpha) * distillation_loss

In [None]:
use_loss_fn = distillation_reg

In [None]:
def distill_loss(output, target, teacher_output, loss_fn=use_loss_fn, opt=optimizer, alpha=0.1):
    loss_b = loss_fn(output, target, teacher_output, alpha)

    if opt is not None:
        opt.zero_grad()
        loss_b.backward()
        opt.step()

    return loss_b.item()

# Student Train / Validation
1. 두번째 것이 원본 첫번째 것이 tree형 모델을 이용한 것.

In [None]:
def student_train(s_model, optimizer, train_loader, val_loader, scheduler, device, alpha = 0.1):
    s_model.to(device)
    
    best_score = 0
    best_model = None

    for epoch in range(CFG["EPOCHS"]):
        train_loss = []
        s_model.train()
        
        for teacher_X, student_X, y_df in tqdm(train_loader):
            student_X = student_X.float().to(device)
            y_df = y_df.numpy()
            y = torch.Tensor(y_df[:,0].reshape(-1,1)).float().to(device)
            
            optimizer.zero_grad()

            output = s_model(student_X)
            teacher_output = torch.Tensor(y_df[:,-1].reshape(-1,1)).float().to(device)
            
                
            loss_b = distill_loss(output, y, teacher_output, loss_fn=use_loss_fn, opt=optimizer, alpha = alpha)

            train_loss.append(loss_b)

        val_loss, val_score = validation_student(s_model, val_loader, distill_loss, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_model = s_model
            best_score = val_score
        
    return best_model, val_loss, val_score 

In [None]:
def validation_student(s_model, val_loader, criterion, device):
    s_model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35
    
    with torch.no_grad():
        
        for teacher_X, student_X, y_df in tqdm(val_loader):
            teacher_X = pd.DataFrame(teacher_X.numpy())
            student_X = student_X.float().to(device)
            y_df = y_df.numpy()
            y = torch.Tensor(y_df[:,0].reshape(-1,1)).float().to(device)
            
            model_pred = s_model(student_X)
            teacher_output = torch.Tensor(y_df[:,-1].reshape(-1,1)).float().to(device)
            
            loss_b = distill_loss(model_pred, y, teacher_output, loss_fn=use_loss_fn, opt=None)
            val_loss.append(loss_b)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1    

# Run (Student Model)

In [None]:
best_student_model = student_train(student_model, optimizer, train_loader, valid_loader, scheduler, device)

# Choose Inference Threshold

In [None]:
def choose_threshold(model, val_loader, device):
    model.to(device)
    model.eval()
    
    thresholds = [0.1,0.15,  0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
    pred_labels = []
    true_labels = []
    
    best_score = 0
    best_thr = None
    with torch.no_grad():
        for _, student_X, y_df in tqdm(iter(val_loader)):
            student_X = student_X.float().to(device)
            y_df = y_df.numpy()
            y = torch.Tensor(y_df[:,0].reshape(-1,1)).float().to(device)
            
            model_pred = model(student_X)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        for threshold in thresholds:
            pred_labels_thr = np.where(np.array(pred_labels) > threshold, 1, 0)
            score_thr = competition_metric(true_labels, pred_labels_thr)
            if best_score < score_thr:
                best_score = score_thr
                best_thr = threshold
    return best_thr, best_score

In [None]:
best_threshold, best_score = choose_threshold(best_student_model[0], valid_loader, device)
print(f'Best Threshold : [{best_threshold}], Score : [{best_score:.5f}]')

# Teacher

In [None]:
class Teacher(nn.Module):
    def __init__(self):
        super(Teacher, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=112, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(),
            nn.Linear(in_features=1024, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        output = self.classifier(x)
        return output

    
class Teacher_2(nn.Module):
    def __init__(self):
        super(Teacher_2, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=112, out_features=128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Linear(in_features=1024, out_features=256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=1),
            nn.Hardsigmoid()
        )
        
    def forward(self, x):
        output = self.classifier(x)
        return output

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)

    best_score = 0
    best_model = None
    criterion_1 = nn.BCELoss().to(device)
    criterion_2 = nn.MSELoss().to(device)
    criterion = criterion_2

    for epoch in range(15):
        train_loss = []
  
        model.train()
        for teacher_X, student_X, y_df in tqdm(train_loader):
            teacher_X = teacher_X.float().to(device)
            y_df = y_df.numpy()
            y = torch.Tensor(y_df[:,0].reshape(-1,1)).float().to(device)
            
            optimizer.zero_grad()
            
            y_pred = model(teacher_X)
            
            loss = criterion(y_pred, y.reshape(-1, 1))
            loss.backward()
            
            optimizer.step()

            train_loss.append(loss.item())

        val_loss, val_score = validation_teacher(model, val_loader, criterion, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')

        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_model = model
            best_score = val_score
        
    return best_model 

In [None]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation_teacher(model, val_loader, criterion, device):
    model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35
    
    with torch.no_grad():
        for teacher_X, student_X, y_df in tqdm(val_loader):
            teacher_X = teacher_X.float().to(device)
            y_df = y_df.numpy()
            y = torch.Tensor(y_df[:,0].reshape(-1,1)).float().to(device)
            
            model_pred = model(teacher_X.to(device))
            
            loss = criterion(model_pred, y.reshape(-1, 1))
            val_loss.append(loss.item())      
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1   

In [None]:
model_1 = Teacher()
model_1.eval()
optimizer = torch.optim.Adam(model_1.parameters(), lr= CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=0.01, threshold_mode='abs',min_lr=1e-8, verbose=True)

teacher_model_1 = train(model_1, optimizer, train_loader, valid_loader, scheduler, device)

model_2 = Teacher_2()
model_2.eval()
optimizer = torch.optim.Adam(model_2.parameters(), lr= CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=0.01, threshold_mode='abs',min_lr=1e-8, verbose=True)

teacher_model_2 = train(model_2, optimizer, train_loader, valid_loader, scheduler, device)

In [None]:
best_threshold, best_score = choose_threshold(teacher_model_1, valid_loader, device)
print(f'Best Threshold : [{best_threshold}], Score : [{best_score:.5f}]')
best_threshold, best_score = choose_threshold(teacher_model_2, valid_loader, device)
print(f'Best Threshold : [{best_threshold}], Score : [{best_score:.5f}]')

# test 전처리

In [None]:
stacked_test = pd.read_csv('/kaggle/input/only-stocked-df/only_stacked_test.csv')

In [None]:
stacked_test.head()

In [None]:
# 데이터셋은 하나만 넣어도 되지만 그렇게 할시에 오류가 뜨기 때문에 
test_datasets = CustomDataset(stacked_test, None, None)
test_loaders = DataLoader(test_datasets, batch_size = CFG['BATCH_SIZE'], shuffle=False)

# threshold_2

In [None]:
def inference_2(model, test_loader, threshold, device):
    model.to(device)
    model.eval()
    
    test_predict = []
    with torch.no_grad():
        for teacher,student_x,_ in tqdm(test_loader):
            student_x = student_x.float().to(device)
            model_pred = model(student_x)

            model_pred = model_pred.squeeze(1).to('cpu')
            test_predict += model_pred
 # 앙상블용 => np.where부분 제거하면 됨.
  #  test_predict = np.where(np.array(test_predict) > threshold, 1, 0)
    test_predict = np.array(test_predict)
    print('Done.')
    return test_predict

In [None]:
valid_1 = inference_2(teacher_model_1, valid_loader, best_threshold, device)
valid_2 = inference_2(teacher_model_2, valid_loader, best_threshold, device)


y_stacked_2_valid = pd.DataFrame(y_valid)
y_stacked_2_valid['valid_1'] = valid_1
y_stacked_2_valid['valid_2'] = valid_2

In [None]:
# Ensemble 4 * Teacher_1 + 6 * Teacher_2
y_stacked_2_valid['sum'] = (4*y_stacked_2_valid.valid_1 + 6*y_stacked_2_valid.valid_2 )/10
y_stacked_2_valid['target'] = np.where(y_stacked_2_valid['sum'] > 0.35, 1, 0)
competition_metric(y_stacked_2_valid['Y_LABEL'], y_stacked_2_valid['target'])

In [None]:
y_stacked_2_valid.head()

In [None]:
valid_tabnet_preds = tabnet.predict_proba(stacked_valid_2.to_numpy())
# valid_lgb_preds = teacher_model_list[1].predict_proba(stacked_valid_2)
valid_te_preds = inference_2(teacher_model, valid_loader, best_threshold, device)
valid_preds = inference_2(best_student_model[0], valid_loader, best_threshold, device)

y_stacked_2_valid = pd.DataFrame(y_valid)
y_stacked_2_valid['tabnet'] = valid_tabnet_preds[:,1]
# y_stacked_2_valid['lgb'] = valid_lgb_preds
y_stacked_2_valid['te'] = valid_te_preds
y_stacked_2_valid['student'] = valid_preds

y_stacked_2_valid['sum'] = (y_stacked_2_valid.tabnet + y_stacked_2_valid.student + y_stacked_2_valid.te)/3
y_stacked_2_valid['target'] = np.where(y_stacked_2_valid['sum'] > 0.2, 1, 0)


In [None]:
y_stacked_2_valid.head()

In [None]:
return_list = []
th_list = [0.1,0.15,  0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
y_stacked_2_valid['sum'] = (*y_stacked_2_valid.tabnet + 0*y_stacked_2_valid.student + 3*y_stacked_2_valid.te)/6
for th in th_list:
    ex_list = []
    y_stacked_2_valid['target'] = np.where(y_stacked_2_valid['sum'] > th, 1, 0)
    score = competition_metric(y_stacked_2_valid['Y_LABEL'], y_stacked_2_valid['target'])
    ex_list.append(th)
    ex_list.append(score)
    return_list.append(ex_list)

    
    
# 321 : 0.4 0.574
# 213 : 0.3 0.580
# 132 : 0.3 0.576
# 123 : 0.3 0.566
# 231 : 0.35 0.579
# 312 : 0.35 0.574
# 303 : 0.35 0.573
# 523 : 0.35 0.577
# 325 : 0.3 0.583
# 253 : 0.3 0.578
# 415 : 0.3 0.569
# 055 : 0.25 0.575
# 333 : 0.569
# => 213, 231

In [None]:
best_score = 0
best_th = 0
for i in range(len(return_list)):
    
    if best_score < return_list[i][1]:
        best_score = return_list[i][1]
        best_th = return_list[i][0]
print(best_th, best_score)

# Submission

In [None]:
def inference(model, test_loader, threshold, device):
    model.to(device)
    model.eval()
    
    test_predict = []
    with torch.no_grad():
        for x in tqdm(test_loader):
            x = x.float().to(device)
            model_pred = model(x)

            model_pred = model_pred.squeeze(1).to('cpu')
            test_predict += model_pred
 # 앙상블용 => np.where부분 제거하면 됨.
  #test_predict = np.where(np.array(test_predict) > threshold, 1, 0)
    test_predict = np.array(test_predict)
    print('Done.')
    return test_predict


In [None]:
tabnet_preds = tabnet.predict_proba(stacked_test.to_numpy())
te_preds = inference(teacher_model, test_loaders, best_threshold, device)
lgb_preds = teacher_model_list[1].predict_proba(stacked_test)

In [None]:
preds = inference(best_student_model[0], test_loaders, best_threshold, device)

In [None]:
preds

# Submit

In [None]:
best_th = 0.35
submit = pd.read_csv('../input/dacom-competition-1/sample_submission.csv')
submit['Y_LABEL_0'] = preds
submit['Y_LABEL_1'] = tabnet_preds[:,1]
submit['Y_LABEL_2'] = te_preds
# submit['sum'] = te_preds
submit['sum'] = (0*submit.Y_LABEL_0 + 3*submit.Y_LABEL_1 + 3*submit.Y_LABEL_2)/6
submit['Y_LABEL'] = np.where(submit['Y_LABEL_2'] > best_th, 1, 0)
submit.head()

In [None]:
submit_df = pd.DataFrame(submit[['ID', 'Y_LABEL']])
submit_df.head()

In [None]:
submit_df.Y_LABEL.value_counts()

In [None]:
submit_df.to_csv('./submit_3.csv', index=False)