## Import

In [83]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer

from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import random
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import scipy.stats as stats
import warnings
warnings.filterwarnings(action='ignore') 

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


## Hyperparameter setting

In [84]:
CFG = {
    'EPOCHS': 30,
    'LEARNING_RATE':1e-2,
    'BATCH_SIZE':256,
    'SEED':41
}

## Fixed RandomSeed

In [85]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

## Data Load

In [86]:
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

## Data Preprocessing
#### 1. 결측치 처리
#### 2. Train / Validation 분할
#### 3. Data label-encoding, scaling

In [87]:
from pandas.core.reshape.reshape import get_dummies
temp=get_dummies(train['COMPONENT_ARBITRARY'])
train=pd.concat([temp,train],axis=1)
train=train.drop('COMPONENT_ARBITRARY',axis=1)

temp=get_dummies(test['COMPONENT_ARBITRARY'])
test=pd.concat([temp,test],axis=1)
test=test.drop('COMPONENT_ARBITRARY',axis=1)

In [88]:
train

Unnamed: 0,COMPONENT1,COMPONENT2,COMPONENT3,COMPONENT4,ID,ANONYMOUS_1,YEAR,SAMPLE_TRANSFER_DAY,ANONYMOUS_2,AG,...,U25,U20,U14,U6,U4,V,V100,V40,ZN,Y_LABEL
0,0,0,1,0,TRAIN_00000,1486,2011,7,200,0,...,,,,,,0,,154.0,75,0
1,0,1,0,0,TRAIN_00001,1350,2021,51,375,0,...,2.0,4.0,6.0,216.0,1454.0,0,,44.0,652,0
2,0,1,0,0,TRAIN_00002,2415,2015,2,200,0,...,0.0,3.0,39.0,11261.0,41081.0,0,,72.6,412,1
3,0,0,1,0,TRAIN_00003,7389,2010,2,200,0,...,,,,,,0,,133.3,7,0
4,0,0,1,0,TRAIN_00004,3954,2015,4,200,0,...,,,,,,0,,133.1,128,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14090,0,0,1,0,TRAIN_14090,1616,2014,8,200,0,...,,,,,,0,,135.4,16,0
14091,1,0,0,0,TRAIN_14091,2784,2013,2,200,0,...,,,,,,0,14.5,117.5,1408,0
14092,0,0,1,0,TRAIN_14092,1788,2008,9,550,0,...,,,,,,0,,54.0,1301,0
14093,0,1,0,0,TRAIN_14093,2498,2009,19,550,0,...,7.0,8.0,100.0,1625.0,18890.0,0,,44.3,652,0


In [89]:
categorical_features = ['YEAR']


# Inference(실제 진단 환경)에 사용하는 컬럼
test_stage_features = ['COMPONENT1','COMPONENT2','COMPONENT3','COMPONENT4', 'ANONYMOUS_1', 'YEAR' , 'ANONYMOUS_2', 'AG', 'CO', 'CR', 'CU', 'FE', 'H2O', 'MN', 'MO', 'NI', 'PQINDEX', 'TI', 'V', 'V40', 'ZN']




In [90]:
#train = train.fillna(train.mean())
#test = test.fillna(test.mean())
train = train.fillna(0)
test = test.fillna(0)

In [91]:
train['FUEL']=train['FUEL']*10000
train['SOOTPERCENTAGE']=train['SOOTPERCENTAGE']*10000
#FUEL,SOOTPERCENTAGE
#% to ppm


In [92]:
def remove_out(dataframe, remove_col):
    dff = dataframe
    q1=dff[remove_col].quantile(0.005)
    q3=dff[remove_col].quantile(0.995)
    iqr=q3-q1

    condition=dff[remove_col]>q3+1.5*iqr
    a=dff[condition].index
    dff.drop(a,inplace=True)
    return dff


categorical_features_=categorical_features.copy()
categorical_features_.append('ID')
categorical_features_.append('YEAR')
categorical_features_.append('Y_LABEL')
for i in train:
  if i not in categorical_features_:
    train=remove_out(train,i)

"""for i in test:
  if i not in categorical_features_:
    test=remove_out(test,i)"""

'for i in test:\n  if i not in categorical_features_:\n    test=remove_out(test,i)'

In [93]:
all_X = train.drop(['ID', 'Y_LABEL'], axis = 1)
all_y = train['Y_LABEL']

test = test.drop(['ID'], axis = 1)

train_X, val_X, train_y, val_y = train_test_split(all_X, all_y, test_size=0.2, random_state=CFG['SEED'], stratify=all_y)

In [94]:
def get_values(value):
    return value.values.reshape(-1, 1)

for col in train_X.columns:
    if col not in categorical_features:
        scaler = StandardScaler()
        train_X[col] = scaler.fit_transform(get_values(train_X[col]))
        val_X[col] = scaler.transform(get_values(val_X[col]))
        if col in test.columns:
            test[col] = scaler.transform(get_values(test[col]))
            
le = LabelEncoder()
for col in categorical_features:
  print(col)  
  train_X[col] = le.fit_transform(train_X[col])
  val_X[col] = le.transform(val_X[col])
  if col in test.columns:
    print(col)
    test[col] = le.transform(test[col])
        


YEAR
YEAR


In [95]:
val_X

Unnamed: 0,COMPONENT1,COMPONENT2,COMPONENT3,COMPONENT4,ANONYMOUS_1,YEAR,SAMPLE_TRANSFER_DAY,ANONYMOUS_2,AG,AL,...,U50,U25,U20,U14,U6,U4,V,V100,V40,ZN
100,-0.624435,2.269106,-0.993669,-0.252666,-0.722446,6,-0.139226,-0.349365,-0.147611,-0.043918,...,-0.158041,0.017575,0.411199,2.212603,0.513512,1.011975,-0.143279,-0.603531,-1.484538,0.141913
309,-0.624435,-0.440702,1.006371,-0.252666,-0.787785,4,0.530404,-0.349365,-0.147611,-0.194108,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,-0.603531,0.618896,-0.901664
3441,-0.624435,-0.440702,1.006371,-0.252666,-0.289754,1,2.060988,-0.013852,5.836707,0.012403,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,-0.603531,0.922073,-1.038680
11930,1.601447,-0.440702,-0.993669,-0.252666,1.147230,7,-0.330549,-0.349365,-0.147611,-0.156561,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,1.703677,-0.142570,1.412599
9872,1.601447,-0.440702,-0.993669,-0.252666,-0.004680,7,-0.521872,-0.349365,-0.147611,-0.100239,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,1.438099,-0.680767,0.885180
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12121,1.601447,-0.440702,-0.993669,-0.252666,1.702374,7,-0.330549,-0.349365,-0.147611,-0.081466,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,1.504493,-0.344688,0.883303
7109,-0.624435,-0.440702,1.006371,-0.252666,0.252322,12,0.243419,0.438611,-0.147611,-0.175334,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,-0.603531,0.724655,-1.094988
5817,-0.624435,-0.440702,-0.993669,3.957791,-0.495452,9,0.147758,-0.349365,-0.147611,-0.194108,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,-0.603531,-0.727771,1.198628
3070,1.601447,-0.440702,-0.993669,-0.252666,0.915396,9,-0.330549,-0.349365,5.836707,-0.156561,...,-0.158041,-0.212768,-0.213389,-0.197929,-0.222872,-0.311127,-0.143279,1.670479,-0.229528,0.866410


## CustomDataset

In [96]:


#__len__
#__len__ 함수는 데이터셋의 샘플 개수를 반환합니다.
"""
__getitem__ 함수는 주어진 인덱스 idx 에 해당하는 샘플을 데이터셋에서 불러오고 반환합니다.
인덱스를 기반으로, 디스크에서 이미지의 위치를 식별하고, read_image 를 사용하여 이미지를 텐서로 변환하고,
self.img_labels 의 csv 데이터로부터 해당하는 정답(label)을 가져오고, (해당하는 경우) 변형(transform) 함수들을 호출한 뒤,
텐서 이미지와 라벨을 Python 사전(dict)형으로 반환합니다.
"""

class CustomDataset(Dataset):
    def __init__(self, data_X, data_y, distillation=False):
        super(CustomDataset, self).__init__()
        self.data_X = data_X
        self.data_y = data_y
        self.distillation = distillation
        
    def __len__(self):
        return len(self.data_X)
    
    def __getitem__(self, index):
        if self.distillation:
            # 지식 증류 학습 시
            teacher_X = torch.Tensor(self.data_X.iloc[index])
            student_X = torch.Tensor(self.data_X[test_stage_features].iloc[index])
            y = self.data_y.values[index]
            return teacher_X, student_X, y
        else:
            if self.data_y is None:
                test_X = torch.Tensor(self.data_X.iloc[index])
                return test_X
            else:
                teacher_X = torch.Tensor(self.data_X.iloc[index])
                y = self.data_y.values[index]
                return teacher_X, y

In [97]:
train_dataset = CustomDataset(train_X, train_y, False)
val_dataset = CustomDataset(val_X, val_y, False)
#객체 생성 

# DataLoader
DataLoader로 학습용 데이터 준비하기
Dataset 은 데이터셋의 특징(feature)을 가져오고 하나의 샘플에 정답(label)을 지정하는 일을 한 번에 합니다.
모델을 학습할 때, 일반적으로 샘플들을 “미니배치(minibatch)”로 전달하고, 매 에폭(epoch)마다 데이터를 다시 섞어서 과적합(overfit)을 막고,
Python의 multiprocessing 을 사용하여 데이터 검색 속도를 높이려고 합니다.



In [98]:
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

## Define Teacher Model

In [99]:
class Teacher(nn.Module):
    def __init__(self):
        super(Teacher, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=55, out_features=256),
            nn.LazyBatchNorm1d(256),
            nn.SiLU(),
            nn.Linear(in_features=256, out_features=1024),
            nn.LazyBatchNorm1d(1024),
            nn.SiLU(),
            nn.Linear(in_features=1024, out_features=256),
            nn.LazyBatchNorm1d(256),
            nn.SiLU(),  
            nn.Linear(in_features=256, out_features=1),
            nn.Sigmoid()
        )
    def forward(self, x):

      output=self.classifier(x)
      return output

## Teacher Train / Validation

In [100]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    best_score = 0
    best_model = None
    criterion = nn.BCELoss().to(device)
    best_f1=0
    for epoch in range(CFG["EPOCHS"]):#
        train_loss = []
  
        model.train()
        for X, y in tqdm(train_loader):
            X = X.float().to(device)
            y = y.float().to(device)
            
            optimizer.zero_grad()
            
            y_pred = model(X)
            
            loss = criterion(y_pred, y.reshape(-1, 1))
            loss.backward()
            
            optimizer.step()

            train_loss.append(loss.item())

        val_loss, val_score = validation_teacher(model, val_loader, criterion, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')

        if scheduler is not None:
            scheduler.step(val_score)
        
        if best_score < val_score:
            best_model = model
            best_score = val_score
        print(best_score,'best_f1')
    return best_model 




In [101]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation_teacher(model, val_loader, criterion, device):
    model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35
    best_f1=-1
    with torch.no_grad():
        for X, y in tqdm(val_loader):
            X = X.float().to(device)
            y = y.float().to(device)
            
            model_pred = model(X.to(device))
            
            loss = criterion(model_pred, y.reshape(-1, 1))
            val_loss.append(loss.item())      
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
        if best_f1 < val_f1:
          best_f1=val_f1
    return val_loss, val_f1 

## Run (Teacher Model)

In [102]:
model = Teacher()
model.eval()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)


'''mode_=['min','max']
factor_=[0.1, 0.01 ,0.001,0.0001]
threshold_mode_=['rel','abs']

for i in mode_:
  for j in factor_:
    for k in threshold_mode_:
      print(i,j,k)
      scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=i, factor=j, patience=1, threshold_mode=k,min_lr=1e-8, verbose=True)
      teacher_model = train(model, optimizer, train_loader, val_loader, scheduler, device)
'''
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor= 0.1, patience=1, threshold_mode= 'abs',min_lr=1e-8, verbose=True)
teacher_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.64046] Val Loss : [0.52574] Val F1 Score : [0.07742]
0.07741935483870968 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.40108] Val Loss : [0.29289] Val F1 Score : [0.63937]
0.6393749664825441 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.24263] Val Loss : [0.20283] Val F1 Score : [0.73394]
0.7339404622013317 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.19898] Val Loss : [0.17919] Val F1 Score : [0.77669]
0.7766936714805682 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.18478] Val Loss : [0.16474] Val F1 Score : [0.79920]
0.7992009053326867 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.17594] Val Loss : [0.15776] Val F1 Score : [0.81690]
0.8168970164938091 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.17425] Val Loss : [0.15676] Val F1 Score : [0.81878]
0.8187786448656015 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.17214] Val Loss : [0.15577] Val F1 Score : [0.82579]
0.8257938116395414 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.17286] Val Loss : [0.15761] Val F1 Score : [0.82276]
0.8257938116395414 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.17051] Val Loss : [0.15570] Val F1 Score : [0.82593]
0.8259312127683489 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.17013] Val Loss : [0.15683] Val F1 Score : [0.82474]
0.8259312127683489 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.16602] Val Loss : [0.15412] Val F1 Score : [0.82276]
Epoch 00012: reducing learning rate of group 0 to 1.0000e-04.
0.8259312127683489 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.16796] Val Loss : [0.15395] Val F1 Score : [0.82182]
0.8259312127683489 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.16803] Val Loss : [0.15325] Val F1 Score : [0.82671]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.16683] Val Loss : [0.15317] Val F1 Score : [0.82172]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.16614] Val Loss : [0.15290] Val F1 Score : [0.82369]
Epoch 00016: reducing learning rate of group 0 to 1.0000e-05.
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.16587] Val Loss : [0.15287] Val F1 Score : [0.82369]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.16777] Val Loss : [0.15276] Val F1 Score : [0.82182]
Epoch 00018: reducing learning rate of group 0 to 1.0000e-06.
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.16778] Val Loss : [0.15316] Val F1 Score : [0.82182]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.16542] Val Loss : [0.15296] Val F1 Score : [0.82182]
Epoch 00020: reducing learning rate of group 0 to 1.0000e-07.
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.16644] Val Loss : [0.15280] Val F1 Score : [0.82182]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.16873] Val Loss : [0.15287] Val F1 Score : [0.82182]
Epoch 00022: reducing learning rate of group 0 to 1.0000e-08.
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.16665] Val Loss : [0.15295] Val F1 Score : [0.82182]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.16795] Val Loss : [0.15285] Val F1 Score : [0.82182]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.16630] Val Loss : [0.15275] Val F1 Score : [0.81973]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.16627] Val Loss : [0.15275] Val F1 Score : [0.81973]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.16680] Val Loss : [0.15279] Val F1 Score : [0.82172]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.16807] Val Loss : [0.15303] Val F1 Score : [0.82566]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.16795] Val Loss : [0.15278] Val F1 Score : [0.82182]
0.8267070428081384 best_f1


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.16416] Val Loss : [0.15276] Val F1 Score : [0.82182]
0.8267070428081384 best_f1


In [103]:
#0.80535011117898 best_f1.  lr_scheduler=max , gru= num_layers =1
#0.8062359237412331 best_f1  lr_scheduler=min , gru= num_layers =2
#0.8071693142869618 best_f1. lr_scheduler=max , gru= num_layers =2


### layer 
#0.8080850716191927 best_f1  lr_scheduler=max , gru= num_layers =2 265 512 증가하는 폭을 조금 수정
# 0.8071796488389799 best_f1 linear layer 하나 더 추가 nn.Linear(in_features=128, out_features=256)
#0.4776727811747267 best_f1 그냥 젤큰 레이어 추가하는건 별로 안좋음
#0.8117531050504077 1)best_f1 최고 nn.Linear(in_features=128, out_features=256) 피처 범위 512 로 바꿈
#                   2) 
#0.8071332686913297   //  최고범위 256 


In [104]:
test

Unnamed: 0,COMPONENT1,COMPONENT2,COMPONENT3,COMPONENT4,ANONYMOUS_1,YEAR,ANONYMOUS_2,AG,CO,CR,...,FE,H2O,MN,MO,NI,PQINDEX,TI,V,V40,ZN
0,1.601447,-0.440702,-0.993669,-0.252666,-0.391393,9,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.407389,-0.104598,-0.358368,-0.399628,-0.277070,-0.275650,-0.168760,-0.143279,-0.403443,0.928349
1,-0.624435,-0.440702,1.006371,-0.252666,-0.100027,4,-0.349365,-0.147611,-0.125053,-0.060054,...,0.339233,-0.104598,0.086623,-0.399628,-0.277070,1.740263,0.275958,-0.143279,0.433230,-1.096865
2,-0.624435,2.269106,-0.993669,-0.252666,-0.493032,3,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.427037,-0.104598,-0.358368,-0.399628,-0.277070,-0.274909,-0.168760,-0.143279,-1.508040,0.220744
3,-0.624435,-0.440702,1.006371,-0.252666,-0.772782,2,-0.349365,-0.147611,-0.125053,0.125259,...,0.016446,-0.104598,0.234953,-0.349620,-0.277070,5.646927,-0.168760,-0.143279,0.806913,-0.942957
4,-0.624435,2.269106,-0.993669,-0.252666,2.528555,6,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.404582,-0.104598,-0.358368,-0.399628,-0.277070,-0.271206,-0.168760,-0.143279,-1.059150,-0.239105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,-0.624435,-0.440702,1.006371,-0.252666,-0.622743,7,-0.349365,-0.147611,-0.125053,0.125259,...,2.497701,-0.104598,9.283100,-0.316282,0.711394,1.197403,-0.168760,-0.143279,-0.875834,1.063489
6037,-0.624435,-0.440702,1.006371,-0.252666,0.547075,9,-0.349365,-0.147611,-0.125053,0.495885,...,1.624771,-0.104598,0.383283,-0.399628,-0.277070,0.230180,-0.168760,-0.143279,0.689402,-1.094988
6038,-0.624435,-0.440702,1.006371,-0.252666,0.640971,7,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.292308,-0.104598,-0.358368,-0.399628,-0.277070,-0.231955,-0.168760,-0.143279,4.553140,-1.094988
6039,-0.624435,2.269106,-0.993669,-0.252666,-0.792141,6,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.435457,-0.104598,-0.358368,-0.399628,-0.277070,-0.279353,-0.168760,-0.143279,-1.348226,-0.222213


## Define Student Model

In [112]:
class Student(nn.Module):
    def __init__(self):
        super(Student, self).__init__()
        self.lstm = nn.GRU(input_size=21, hidden_size=21, num_layers=1, bias=True, batch_first=True)
        self.classifier = nn.Sequential(
            nn.Linear(in_features=21, out_features=128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Linear(in_features=128, out_features=1024),
            nn.BatchNorm1d(1024),
            nn.SiLU(),
            nn.Linear(in_features=1024, out_features=128),
            nn.BatchNorm1d(128),
            nn.SiLU(),
            nn.Linear(in_features=128, out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        hidden, _ = self.lstm(x)
        output = self.classifier(x)
        return output

## Define Knowledge distillation Loss

In [113]:
def distillation(student_logits, labels, teacher_logits, alpha):
    distillation_loss = nn.BCELoss()(student_logits, teacher_logits)
    student_loss = nn.BCELoss()(student_logits, labels.reshape(-1, 1))
    return alpha * student_loss + (1-alpha) * distillation_loss

In [114]:
def distill_loss(output, target, teacher_output, loss_fn=distillation, opt=optimizer):
    loss_b = loss_fn(output, target, teacher_output, alpha=0.1)

    if opt is not None:
        opt.zero_grad()
        loss_b.backward()
        opt.step()

    return loss_b.item()

## Student Train / Validation

In [115]:
def student_train(s_model, t_model, optimizer, train_loader, val_loader, scheduler, device):
    s_model.to(device)
    #t_model.to(device)
    
    best_score = 0
    best_model = None

    for epoch in range(CFG["EPOCHS"]):#
        train_loss = []
        s_model.train()
        t_model.eval()
        
        for X_t, X_s, y in tqdm(train_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)
            
            optimizer.zero_grad()

            output = s_model(X_s)
            with torch.no_grad():
                teacher_output = t_model(X_t)
  
            loss_b = distill_loss(output, y, teacher_output, loss_fn=distillation, opt=optimizer)

            train_loss.append(loss_b)

        val_loss, val_score = validation_student(s_model, t_model, val_loader, distill_loss, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_model = s_model
            best_score = val_score
        print('best f1 : ',best_score)
    return best_model


In [116]:
def validation_student(s_model, t_model, val_loader, criterion, device):
    s_model.eval()
    t_model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35#0.35
    
    with torch.no_grad():
        for X_t, X_s, y in tqdm(val_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)
            
            model_pred = s_model(X_s)
            teacher_output = t_model(X_t)
            
            loss_b = distill_loss(model_pred, y, teacher_output, loss_fn=distillation, opt=None)
            val_loss.append(loss_b)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1    

## Run (Student Model)

In [117]:
train_dataset = CustomDataset(train_X, train_y, True)
val_dataset = CustomDataset(val_X, val_y, True)

train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [118]:
student_model = Student()
student_model.eval()
optimizer = torch.optim.AdamW(student_model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.01, patience=1, threshold_mode='rel',min_lr=1e-8, verbose=True)
'''
mode_=['min','max']
factor_=[0.1, 0.01 ,0.001,0.0001]
threshold_mode_=['rel','abs']

for i in mode_:
  for j in factor_:
    for k in threshold_mode_:
      print(i,j,k)
      scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=i, factor=j, patience=1, threshold_mode=k,min_lr=1e-8, verbose=True)
      best_student_model = student_train(student_model,teacher_model , optimizer, train_loader, val_loader, scheduler, device)
'''
#min 0.01 rel
#scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.00005, step_size_up=5, max_lr=CFG['LEARNING_RATE'],gamma=0.5)
#xg_model
#teacher_model
best_student_model = student_train(student_model,teacher_model , optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.33812] Val Loss : [0.28697] Val F1 Score : [0.49631]
best f1 :  0.4963117746280305


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.27306] Val Loss : [0.28026] Val F1 Score : [0.48244]
best f1 :  0.4963117746280305


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.27017] Val Loss : [0.27780] Val F1 Score : [0.49121]
best f1 :  0.4963117746280305


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.26807] Val Loss : [0.27971] Val F1 Score : [0.50184]
Epoch 00004: reducing learning rate of group 0 to 1.0000e-04.
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.26546] Val Loss : [0.27962] Val F1 Score : [0.49813]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.26525] Val Loss : [0.27976] Val F1 Score : [0.49832]
Epoch 00006: reducing learning rate of group 0 to 1.0000e-06.
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.26460] Val Loss : [0.27971] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.26400] Val Loss : [0.27953] Val F1 Score : [0.49832]
Epoch 00008: reducing learning rate of group 0 to 1.0000e-08.
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.26439] Val Loss : [0.27938] Val F1 Score : [0.49813]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.26426] Val Loss : [0.27980] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.26515] Val Loss : [0.27930] Val F1 Score : [0.49851]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.26362] Val Loss : [0.27900] Val F1 Score : [0.49795]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.26423] Val Loss : [0.27910] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.26530] Val Loss : [0.27932] Val F1 Score : [0.49813]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.26335] Val Loss : [0.27929] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.26556] Val Loss : [0.27945] Val F1 Score : [0.49813]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.26441] Val Loss : [0.27938] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.26523] Val Loss : [0.27976] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.26448] Val Loss : [0.27950] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.26483] Val Loss : [0.27949] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.26488] Val Loss : [0.27928] Val F1 Score : [0.49813]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.26468] Val Loss : [0.27942] Val F1 Score : [0.49795]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.26445] Val Loss : [0.27934] Val F1 Score : [0.49813]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.26553] Val Loss : [0.27951] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.26410] Val Loss : [0.27931] Val F1 Score : [0.49851]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.26416] Val Loss : [0.27932] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.26405] Val Loss : [0.27990] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.26571] Val Loss : [0.27920] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.26558] Val Loss : [0.27932] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.26604] Val Loss : [0.27968] Val F1 Score : [0.49832]
best f1 :  0.5018382007258778


## Choose Inference Threshold

In [127]:

def choose_threshold(model, val_loader, device):
    model.to(device)
    model.eval()
    
    thresholds = [0.1,0.15,0.17, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
    pred_labels = []
    true_labels = []
    
    best_score = 0
    best_thr = None
    with torch.no_grad():
        for _, x_s, y in tqdm(iter(val_loader)):
            x_s = x_s.float().to(device)
            y = y.float().to(device)
            
            model_pred = model(x_s)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        for threshold in thresholds:
            pred_labels_thr = np.where(np.array(pred_labels) > threshold, 1, 0)
            score_thr = competition_metric(true_labels, pred_labels_thr)
            if best_score < score_thr:
                best_score = score_thr
                best_thr = threshold
    return best_thr, best_score

In [128]:
best_threshold, best_score = choose_threshold(best_student_model, val_loader, device)

print(f'Best Threshold : [{best_threshold}], Score : [{best_score:.5f}]')

#2022-11-22 14:16:40	0.5518035852
#Best Threshold : [0.2], Score : [0.54655]

#edit	2022-11-22 14:34:00	0.5587866109
#Best Threshold : [0.15], Score : [0.56701]
#0.8267070428081384 best_f1


#edit	2022-11-22 14:53:16	0.5721608406
#Best Threshold : [0.15], Score : [0.57218]
#0.8267070428081384 best_f1


  0%|          | 0/11 [00:00<?, ?it/s]

Best Threshold : [0.15], Score : [0.57218]


## Inference

In [121]:
test_datasets = CustomDataset(test, None, False)
test_loaders = DataLoader(test_datasets, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [122]:
def inference(model, test_loader, threshold, device):
    model.to(device)
    model.eval()
    
    test_predict = []
    with torch.no_grad():
        for x in tqdm(test_loader):
            x = x.float().to(device)
            model_pred = model(x)

            model_pred = model_pred.squeeze(1).to('cpu')
            test_predict += model_pred
        
    test_predict = np.where(np.array(test_predict) > threshold, 1, 0)
    print('Done.')
    return test_predict

In [123]:
preds = inference(best_student_model, test_loaders, best_threshold, device)

  0%|          | 0/24 [00:00<?, ?it/s]

Done.


In [124]:

test

Unnamed: 0,COMPONENT1,COMPONENT2,COMPONENT3,COMPONENT4,ANONYMOUS_1,YEAR,ANONYMOUS_2,AG,CO,CR,...,FE,H2O,MN,MO,NI,PQINDEX,TI,V,V40,ZN
0,1.601447,-0.440702,-0.993669,-0.252666,-0.391393,9,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.407389,-0.104598,-0.358368,-0.399628,-0.277070,-0.275650,-0.168760,-0.143279,-0.403443,0.928349
1,-0.624435,-0.440702,1.006371,-0.252666,-0.100027,4,-0.349365,-0.147611,-0.125053,-0.060054,...,0.339233,-0.104598,0.086623,-0.399628,-0.277070,1.740263,0.275958,-0.143279,0.433230,-1.096865
2,-0.624435,2.269106,-0.993669,-0.252666,-0.493032,3,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.427037,-0.104598,-0.358368,-0.399628,-0.277070,-0.274909,-0.168760,-0.143279,-1.508040,0.220744
3,-0.624435,-0.440702,1.006371,-0.252666,-0.772782,2,-0.349365,-0.147611,-0.125053,0.125259,...,0.016446,-0.104598,0.234953,-0.349620,-0.277070,5.646927,-0.168760,-0.143279,0.806913,-0.942957
4,-0.624435,2.269106,-0.993669,-0.252666,2.528555,6,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.404582,-0.104598,-0.358368,-0.399628,-0.277070,-0.271206,-0.168760,-0.143279,-1.059150,-0.239105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,-0.624435,-0.440702,1.006371,-0.252666,-0.622743,7,-0.349365,-0.147611,-0.125053,0.125259,...,2.497701,-0.104598,9.283100,-0.316282,0.711394,1.197403,-0.168760,-0.143279,-0.875834,1.063489
6037,-0.624435,-0.440702,1.006371,-0.252666,0.547075,9,-0.349365,-0.147611,-0.125053,0.495885,...,1.624771,-0.104598,0.383283,-0.399628,-0.277070,0.230180,-0.168760,-0.143279,0.689402,-1.094988
6038,-0.624435,-0.440702,1.006371,-0.252666,0.640971,7,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.292308,-0.104598,-0.358368,-0.399628,-0.277070,-0.231955,-0.168760,-0.143279,4.553140,-1.094988
6039,-0.624435,2.269106,-0.993669,-0.252666,-0.792141,6,-0.349365,-0.147611,-0.125053,-0.430680,...,-0.435457,-0.104598,-0.358368,-0.399628,-0.277070,-0.279353,-0.168760,-0.143279,-1.348226,-0.222213


## Submit

In [125]:
submit = pd.read_csv('./sample_submission.csv')
submit['Y_LABEL'] = preds
submit.head()

Unnamed: 0,ID,Y_LABEL
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,0
4,TEST_0004,1


In [126]:
submit.to_csv('./submit.csv', index=False)

array([0, 1])

In [None]:
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)

In [None]:
h0

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np