In [1]:
import math
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import KFold

from copy import deepcopy

import warnings

warnings.filterwarnings(action='ignore')
torch.set_printoptions(sci_mode=True)

In [2]:
import gc

gc.collect()
torch.cuda.empty_cache()

In [3]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# 데이터 전처리

In [4]:
class MakeDataset():

    def __init__(self, DATA_PATH):
        self.preporcessing(DATA_PATH)
        self.oof_user_set = self.split_data()
    
    def split_data(self):
        user_list = self.all_df['userID'].unique().tolist()
        oof_user_set = {}
        kf = KFold(n_splits = 5, random_state = 22, shuffle = True)
        for idx, (train_user, valid_user) in enumerate(kf.split(user_list)):
            oof_user_set[idx] = valid_user.tolist()
        
        return oof_user_set

    def preporcessing(self, DATA_PATH):

        dtype = {
            'userID': 'int16',
            'answerCode': 'int8',
            'KnowledgeTag': 'int16'
        }
        
        train_df = pd.read_csv(os.path.join(DATA_PATH, 'train_data.csv'), dtype=dtype, parse_dates=['Timestamp'])
        train_df = train_df.sort_values(by=['userID', 'Timestamp']).reset_index(drop=True)

        test_df = pd.read_csv(os.path.join(DATA_PATH, 'test_data.csv'), dtype=dtype, parse_dates=['Timestamp'])
        test_df = test_df.sort_values(by=['userID', 'Timestamp']).reset_index(drop=True)


        def get_large_paper_number(x):
            return x[1:4]
        
        train_df['large_paper_number'] = train_df['assessmentItemID'].apply(lambda x : get_large_paper_number(x))
        test_df['large_paper_number'] = test_df['assessmentItemID'].apply(lambda x : get_large_paper_number(x))

        # 문제 푸는데 걸린 시간
        def get_now_elapsed(df):
            
            diff = df.loc[:, ['userID','Timestamp']].groupby('userID').diff().fillna(pd.Timedelta(seconds=0))
            diff = diff.fillna(pd.Timedelta(seconds=0))
            diff = diff['Timestamp'].apply(lambda x: x.total_seconds())
            df['now_elapsed'] = diff
            df['now_elapsed'] = df['now_elapsed'].apply(lambda x : x if x < 650 and x >=0 else 0)
            df['now_elapsed'] = df['now_elapsed']

            return df

        train_df = get_now_elapsed(df = train_df)
        test_df = get_now_elapsed(df = test_df)

        all_df = pd.concat([train_df, test_df])
        all_df = all_df[all_df['answerCode'] != -1].reset_index(drop = True)

        # 문항별 정답률
        train_df = train_df.set_index('assessmentItemID')
        train_df['assessmentItemID_mean_answerCode'] = all_df.groupby('assessmentItemID').mean()['answerCode']
        train_df = train_df.reset_index(drop = False)

        test_df = test_df.set_index('assessmentItemID')
        test_df['assessmentItemID_mean_answerCode'] = all_df.groupby('assessmentItemID').mean()['answerCode']
        test_df = test_df.reset_index(drop = False)

        # 문항별 정답률 표준편차
        train_df = train_df.set_index('assessmentItemID')
        train_df['assessmentItemID_std_answerCode'] = all_df.groupby('assessmentItemID').std()['answerCode']
        train_df = train_df.reset_index(drop = False)

        test_df = test_df.set_index('assessmentItemID')
        test_df['assessmentItemID_std_answerCode'] = all_df.groupby('assessmentItemID').std()['answerCode']
        test_df = test_df.reset_index(drop = False)

        # 올바르게 푼 사람들의 문항별 풀이 시간 평균
        train_df = train_df.set_index('assessmentItemID')
        train_df['assessmentItemID_mean_now_elapsed'] = all_df[all_df['answerCode'] == 1].groupby('assessmentItemID').mean()['now_elapsed']
        train_df = train_df.reset_index(drop = False)

        test_df = test_df.set_index('assessmentItemID')
        test_df['assessmentItemID_mean_now_elapsed'] = all_df[all_df['answerCode'] == 1].groupby('assessmentItemID').mean()['now_elapsed']
        test_df = test_df.reset_index(drop = False)

        # 올바르게 푼 사람들의 문항별 풀이 시간 표준 편차
        train_df = train_df.set_index('assessmentItemID')
        train_df['assessmentItemID_std_now_elapsed'] = all_df[all_df['answerCode'] == 1].groupby('assessmentItemID').std()['now_elapsed']
        train_df = train_df.reset_index(drop = False)

        test_df = test_df.set_index('assessmentItemID')
        test_df['assessmentItemID_std_now_elapsed'] = all_df[all_df['answerCode'] == 1].groupby('assessmentItemID').std()['now_elapsed']
        test_df = test_df.reset_index(drop = False)

        # 문제 푼 시간
        train_df['hour'] = train_df['Timestamp'].dt.hour
        test_df['hour'] = test_df['Timestamp'].dt.hour

        # 문제 푼 요일
        train_df['dayofweek'] = train_df['Timestamp'].dt.dayofweek
        test_df['dayofweek'] = test_df['Timestamp'].dt.dayofweek

        # index 로 변환

        def get_val2idx(val_list : list) -> dict:
            val2idx = {}
            for idx, val in enumerate(val_list):
                val2idx[val] = idx
            
            return val2idx

        assessmentItemID2idx = get_val2idx(all_df['assessmentItemID'].unique().tolist())
        testId2idx = get_val2idx(all_df['testId'].unique().tolist())
        large_paper_number2idx = get_val2idx(all_df['large_paper_number'].unique().tolist())

        train_df['assessmentItemID2idx'] = train_df['assessmentItemID'].apply(lambda x : assessmentItemID2idx[x])
        train_df['testId2idx'] = train_df['testId'].apply(lambda x : testId2idx[x])
        train_df['large_paper_number2idx'] = train_df['large_paper_number'].apply(lambda x : large_paper_number2idx[x])

        test_df['assessmentItemID2idx'] = test_df['assessmentItemID'].apply(lambda x : assessmentItemID2idx[x])
        test_df['testId2idx'] = test_df['testId'].apply(lambda x : testId2idx[x])
        test_df['large_paper_number2idx'] = test_df['large_paper_number'].apply(lambda x : large_paper_number2idx[x])

        self.train_df, self.test_df = train_df, test_df
        self.all_df = pd.concat([train_df, test_df[test_df['answerCode'] != -1]]).reset_index(drop=True)
        self.num_assessmentItemID = len(assessmentItemID2idx)
        self.num_testId = len(testId2idx)
        self.num_large_paper_number = len(large_paper_number2idx)
        self.num_hour = 24
        self.num_dayofweek = 7

    def get_oof_data(self, oof):

        val_user_list = self.oof_user_set[oof]

        train = []
        valid = []

        group_df = self.all_df.groupby('userID')

        for userID, df in group_df:
            if userID in val_user_list:
                trn_df = df.iloc[:-1, :]
                val_df = df.copy()
                train.append(trn_df)
                valid.append(val_df)
            else:
                train.append(df)

        train = pd.concat(train)
        valid = pd.concat(valid)
        
        return train, valid
    
    def get_test_data(self):
        return self.test_df.copy()

In [5]:
class CustomDataset(Dataset):
    def __init__(
        self, 
        df,
        cat_cols = ['assessmentItemID2idx', 'testId2idx', 'large_paper_number2idx', 'hour', 'dayofweek'],
        num_cols = ['now_elapsed', 'assessmentItemID_mean_now_elapsed', 'assessmentItemID_std_now_elapsed', 'assessmentItemID_mean_answerCode', 'assessmentItemID_std_answerCode']
        ):

        self.cat_cols = cat_cols
        self.num_cols = num_cols
        self.user_list = df['userID'].unique().tolist()
        self.get_df = df.groupby('userID')

    def __len__(self):
        return len(self.user_list)

    def __getitem__(self, idx):
        user = self.user_list[idx]
        get_df = self.get_df.get_group(user)

        now_df = get_df.iloc[1:, :]
        now_cat_feature = now_df[self.cat_cols].values
        now_num_feature = now_df[self.num_cols].values
        now_answerCode = now_df['answerCode'].values

        past_df = get_df.iloc[:-1, :]
        past_cat_feature = past_df[self.cat_cols].values
        past_num_feature = past_df[self.num_cols].values
        past_answerCode = past_df['answerCode'].values

        return {
            'past_cat_feature' : past_cat_feature, 
            'past_num_feature' : past_num_feature, 
            'past_answerCode' : past_answerCode, 
            'now_cat_feature' : now_cat_feature, 
            'now_num_feature' : now_num_feature, 
            'now_answerCode' : now_answerCode
            }

In [6]:
def pad_sequence(seq, max_len, padding_value = 0):
    try:
        seq_len, col = seq.shape
        padding = np.zeros((max_len - seq_len, col)) + padding_value
    except:
        seq_len = seq.shape[0]
        padding = np.zeros((max_len - seq_len, )) + padding_value

    padding_seq = np.concatenate([padding, seq])

    return padding_seq

def train_make_batch(samples):
    max_len = 0
    for sample in samples:
        seq_len, col = sample['past_cat_feature'].shape
        if max_len < seq_len:
            max_len = seq_len
    
    past_cat_feature = []
    past_num_feature = []
    past_answerCode = []
    now_cat_feature = []
    now_num_feature = []
    now_answerCode = []

    for sample in samples:
        past_cat_feature += [pad_sequence(sample['past_cat_feature'] + 1, max_len = max_len, padding_value = 0)]
        past_num_feature += [pad_sequence(sample['past_num_feature'], max_len = max_len, padding_value = 0)]
        past_answerCode += [pad_sequence(sample['past_answerCode'] + 1, max_len = max_len, padding_value = 0)]
        now_cat_feature += [pad_sequence(sample['now_cat_feature'] + 1, max_len = max_len, padding_value = 0)]
        now_num_feature += [pad_sequence(sample['now_num_feature'], max_len = max_len, padding_value = 0)]
        now_answerCode += [pad_sequence(sample['now_answerCode'], max_len = max_len, padding_value = -1)]

    return torch.tensor(past_cat_feature, dtype = torch.long), torch.tensor(past_num_feature, dtype = torch.float32), torch.tensor(past_answerCode, dtype = torch.long), torch.tensor(now_cat_feature, dtype = torch.long), torch.tensor(now_num_feature, dtype = torch.float32), torch.tensor(now_answerCode, dtype = torch.float32)

# 모델

In [7]:
class ScaledDotProductAttention(nn.Module):
    def __init__(self, hidden_units, dropout_rate):
        super(ScaledDotProductAttention, self).__init__()
        self.hidden_units = hidden_units
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, Q, K, V, mask):
        """
        Q, K, V : (batch_size, num_heads, max_len, hidden_units)
        mask : (batch_size, 1, max_len, max_len)
        """
        attn_score = torch.matmul(Q, K.transpose(2, 3)) / math.sqrt(self.hidden_units) # (batch_size, num_heads, max_len, max_len)
        attn_score = attn_score.masked_fill(mask == 0, -1e9)  # 유사도가 0인 지점은 -infinity로 보내 softmax 결과가 0이 되도록 함
        attn_dist = self.dropout(F.softmax(attn_score, dim=-1))  # attention distribution
        output = torch.matmul(attn_dist, V)  # (batch_size, num_heads, max_len, hidden_units) / # dim of output : batchSize x num_head x seqLen x hidden_units
        return output, attn_dist


class MultiHeadAttention(nn.Module):
    def __init__(self, num_heads, hidden_units, dropout_rate):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads # head의 수
        self.hidden_units = hidden_units
        
        # query, key, value, output 생성을 위해 Linear 모델 생성
        self.W_Q = nn.Linear(hidden_units, hidden_units, bias=False)
        self.W_K = nn.Linear(hidden_units, hidden_units, bias=False)
        self.W_V = nn.Linear(hidden_units, hidden_units, bias=False)
        self.W_O = nn.Linear(hidden_units, hidden_units, bias=False)

        self.attention = ScaledDotProductAttention(hidden_units, dropout_rate)
        self.dropout = nn.Dropout(dropout_rate) # dropout rate
        self.layerNorm = nn.LayerNorm(hidden_units, 1e-6) # layer normalization

    def forward(self, enc, mask):
        """
        enc : (batch_size, max_len, hidden_units)
        mask : (batch_size, 1, max_len, max_len)
        
        """
        residual = enc # residual connection을 위해 residual 부분을 저장
        batch_size, seqlen = enc.size(0), enc.size(1)

        # Query, Key, Value를 (num_head)개의 Head로 나누어 각기 다른 Linear projection을 통과시킴
        Q = self.W_Q(enc).view(batch_size, seqlen, self.num_heads, self.hidden_units // self.num_heads) # (batch_size, max_len, num_heads, hidden_units)
        K = self.W_K(enc).view(batch_size, seqlen, self.num_heads, self.hidden_units // self.num_heads) # (batch_size, max_len, num_heads, hidden_units)
        V = self.W_V(enc).view(batch_size, seqlen, self.num_heads, self.hidden_units // self.num_heads) # (batch_size, max_len, num_heads, hidden_units)

        # Head별로 각기 다른 attention이 가능하도록 Transpose 후 각각 attention에 통과시킴
        Q, K, V = Q.transpose(1, 2), K.transpose(1, 2), V.transpose(1, 2) # (batch_size, num_heads, max_len, hidden_units)
        output, attn_dist = self.attention(Q, K, V, mask) # output : (batch_size, num_heads, max_len, hidden_units) / attn_dist : (batch_size, num_heads, max_len, max_len)

        # 다시 Transpose한 후 모든 head들의 attention 결과를 합칩니다.
        output = output.transpose(1, 2).contiguous() # (batch_size, max_len, num_heads, hidden_units) / contiguous() : 가변적 메모리 할당
        output = output.view(batch_size, seqlen, -1) # (batch_size, max_len, hidden_units * num_heads)

        # Linear Projection, Dropout, Residual sum, and Layer Normalization
        output = self.layerNorm(self.dropout(self.W_O(output)) + residual) # (batch_size, max_len, hidden_units)
        return output, attn_dist


class PositionwiseFeedForward(nn.Module):
    def __init__(self, hidden_units, dropout_rate):
        super(PositionwiseFeedForward, self).__init__()

        self.W_1 = nn.Linear(hidden_units, hidden_units)
        self.W_2 = nn.Linear(hidden_units, hidden_units)
        self.dropout = nn.Dropout(dropout_rate)
        self.layerNorm = nn.LayerNorm(hidden_units, 1e-6) # layer normalization

    def forward(self, x):
        residual = x
        output = self.W_2(F.relu(self.dropout(self.W_1(x))))
        output = self.layerNorm(self.dropout(output) + residual)
        return output


class SASRecBlock(nn.Module):
    def __init__(self, num_heads, hidden_units, dropout_rate):
        super(SASRecBlock, self).__init__()
        self.attention = MultiHeadAttention(num_heads, hidden_units, dropout_rate)
        self.pointwise_feedforward = PositionwiseFeedForward(hidden_units, dropout_rate)

    def forward(self, input_enc, mask):
        """
        input_enc : (batch_size, max_len, hidden_units)
        mask : (batch_size, 1, max_len, max_len)
        """
        output_enc, attn_dist = self.attention(input_enc, mask)
        output_enc = self.pointwise_feedforward(output_enc)
        return output_enc, attn_dist


class SASRec(nn.Module):
    def __init__(
        self, 
        num_assessmentItemID, 
        num_testId,
        num_large_paper_number,
        num_hour,
        num_dayofweek,
        num_cols,
        cat_cols,
        emb_size,
        hidden_units,
        num_heads, 
        num_layers, 
        dropout_rate, 
        device):
        super(SASRec, self).__init__()

        self.assessmentItemID_emb = nn.Embedding(num_assessmentItemID + 1, emb_size, padding_idx = 0) # 문항에 대한 정보
        self.testId_emb = nn.Embedding(num_testId + 1, emb_size, padding_idx = 0) # 시험지에 대한 정보-
        self.large_paper_number_emb = nn.Embedding(num_large_paper_number + 1, emb_size, padding_idx = 0) # 핫년에 대한 정보
        self.hour_emb = nn.Embedding(num_hour + 1, emb_size, padding_idx = 0) # 문제 풀이 시간에 대한 정보
        self.dayofweek_emb = nn.Embedding(num_dayofweek + 1, emb_size, padding_idx = 0) # 문제 풀이 요일에 대항 정보-
        self.answerCode_emb = nn.Embedding(3, hidden_units, padding_idx = 0) # 문제 정답 여부에 대한 정보

        self.cat_emb = nn.Sequential(
            nn.Linear(len(cat_cols) * emb_size, hidden_units // 2),
            nn.LayerNorm(hidden_units // 2, eps=1e-6)
        )

        self.num_emb = nn.Sequential(
            nn.Linear(len(num_cols), hidden_units // 2),
            nn.LayerNorm(hidden_units // 2, eps=1e-6)
        )

        self.predict_layer = nn.Sequential(
            nn.Linear(hidden_units * 2, 1),
            nn.Sigmoid()
        )

        self.lstm = nn.LSTM(
            input_size = hidden_units,
            hidden_size = hidden_units,
            num_layers = num_layers,
            batch_first = True,
            bidirectional = False,
            dropout = dropout_rate,
            )

        self.cat_cols = cat_cols
        self.num_cols = num_cols
        
        self.hidden_units = hidden_units
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.device = device

        self.emb_layernorm = nn.LayerNorm(hidden_units, eps=1e-6)

        self.dropout = nn.Dropout(dropout_rate)
        
        self.blocks = nn.ModuleList([SASRecBlock(num_heads, hidden_units, dropout_rate) for _ in range(num_layers)])
    
    
    def forward(self, past_cat_feature, past_num_feature, past_answerCode, now_cat_feature, now_num_feature):
        """
        past_cat_feature : (batch_size, max_len, cat_cols)
        past_num_feature : (batch_size, max_len, num_cols)
        past_answerCode : (batch_size, max_len)

        now_cat_feature : (batch_size, max_len, cat_cols)
        now_num_feature : (batch_size, max_len, num_cols)
        
        """

        past_cat_emb_list = []
        for idx in range(len(self.cat_cols)):
            if self.cat_cols[idx] == 'assessmentItemID2idx':
                past_cat_emb_list.append(self.assessmentItemID_emb(past_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'testId2idx':
                past_cat_emb_list.append(self.testId_emb(past_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'large_paper_number2idx':
                past_cat_emb_list.append(self.large_paper_number_emb(past_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'hour':
                past_cat_emb_list.append(self.hour_emb(past_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'dayofweek':
                past_cat_emb_list.append(self.dayofweek_emb(past_cat_feature[:, :, idx]))

        past_cat_emb = torch.concat(past_cat_emb_list, dim = -1)
        past_cat_emb = self.cat_emb(past_cat_emb)
        past_num_emb = self.num_emb(past_num_feature)

        past_emb = torch.concat([past_cat_emb, past_num_emb], dim = -1)
        past_emb += self.answerCode_emb(past_answerCode.to(self.device))
        past_emb = self.emb_layernorm(self.dropout(past_emb)) # LayerNorm

        # masking 
        mask_pad = torch.BoolTensor(past_answerCode > 0).unsqueeze(1).unsqueeze(1) # (batch_size, 1, 1, max_len)
        mask_time = (1 - torch.triu(torch.ones((1, 1, past_answerCode.size(1), past_answerCode.size(1))), diagonal=1)).bool() # (batch_size, 1, max_len, max_len)
        mask = (mask_pad & mask_time).to(self.device) # (batch_size, 1, max_len, max_len)
        for block in self.blocks:
            past_emb, attn_dist = block(past_emb, mask)

        past_emb, _ = self.lstm(past_emb)

        now_cat_emb_list = []
        for idx in range(len(self.cat_cols)):
            if self.cat_cols[idx] == 'assessmentItemID2idx':
                now_cat_emb_list.append(self.assessmentItemID_emb(now_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'testId2idx':
                now_cat_emb_list.append(self.testId_emb(now_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'large_paper_number2idx':
                now_cat_emb_list.append(self.large_paper_number_emb(now_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'hour':
                now_cat_emb_list.append(self.hour_emb(now_cat_feature[:, :, idx]))
            elif self.cat_cols[idx] == 'dayofweek':
                now_cat_emb_list.append(self.dayofweek_emb(now_cat_feature[:, :, idx]))

        now_cat_emb = torch.concat(now_cat_emb_list, dim = -1)
        now_cat_emb = self.cat_emb(now_cat_emb)
        now_num_emb = self.num_emb(now_num_feature)

        now_emb = torch.concat([now_cat_emb, now_num_emb], dim = -1)

        emb = torch.concat([past_emb, now_emb], dim = -1)
        output = self.predict_layer(emb)

        return output

# 학습 함수

In [8]:
from sklearn.metrics import roc_auc_score

def train(model, data_loader, criterion, optimizer):
    model.train()
    loss_val = 0

    for past_cat_feature, past_num_feature, past_answerCode, now_cat_feature, now_num_feature, now_answerCode in data_loader:

        past_cat_feature, past_num_feature, past_answerCode = past_cat_feature.to(device), past_num_feature.to(device), past_answerCode
        now_cat_feature, now_num_feature, now_answerCode = now_cat_feature.to(device), now_num_feature.to(device), now_answerCode.to(device)

        optimizer.zero_grad()

        output = model(past_cat_feature, past_num_feature, past_answerCode, now_cat_feature, now_num_feature).squeeze(2)
        loss = criterion(output[:, -1], now_answerCode[:, -1])

        loss.backward()
        optimizer.step()

        loss_val += loss.item()

    loss_val /= len(data_loader)

    return loss_val

def evaluate(model, data_loader):
    model.eval()

    target = []
    pred = []

    with torch.no_grad():
        for past_cat_feature, past_num_feature, past_answerCode, now_cat_feature, now_num_feature, now_answerCode in data_loader:
            past_cat_feature, past_num_feature, past_answerCode = past_cat_feature.to(device), past_num_feature.to(device), past_answerCode
            now_cat_feature, now_num_feature, now_answerCode = now_cat_feature.to(device), now_num_feature.to(device), now_answerCode.to(device)
            
            output = model(past_cat_feature, past_num_feature, past_answerCode, now_cat_feature, now_num_feature).squeeze(2)

            target.extend(now_answerCode[:, -1].cpu().numpy().tolist())
            pred.extend(output[:, -1].cpu().numpy().tolist())

    roc_auc = roc_auc_score(target, pred)

    return roc_auc


def predict(model, data_loader):
    model.eval()

    pred = []

    with torch.no_grad():
        for past_cat_feature, past_num_feature, past_answerCode, now_cat_feature, now_num_feature, now_answerCode in data_loader:
            past_cat_feature, past_num_feature, past_answerCode = past_cat_feature.to(device), past_num_feature.to(device), past_answerCode
            now_cat_feature, now_num_feature = now_cat_feature.to(device), now_num_feature.to(device)
            
            output = model(past_cat_feature, past_num_feature, past_answerCode, now_cat_feature, now_num_feature).squeeze(2)
            pred.extend(output[:, -1].cpu().numpy().tolist())

    return pred

# 학습

In [9]:
batch_size = 32
epochs = 20
lr = 0.001
device = 'cuda' if torch.cuda.is_available() else 'cpu'

emb_size = 64
hidden_units = 128
num_heads = 8 # 2,4,8,16,32
num_layers = 1
dropout_rate = 0.5
num_workers = 8

DATA_PATH = '/opt/ml/input/data'
MODEL_PATH = '/opt/ml/model'
SUBMISSION_PATH = '/opt/ml/submission'

model_name = 'Transformer-and-LSTM-Last-num-faeture.pt'
submission_name = 'Transformer-and-LSTM-Last-num-faeture.csv'

In [10]:
if not os.path.isdir(MODEL_PATH):
    os.mkdir(MODEL_PATH)

In [11]:
if not os.path.isdir(SUBMISSION_PATH):
    os.mkdir(SUBMISSION_PATH)

In [12]:
make_dataset = MakeDataset(DATA_PATH = DATA_PATH)

# OOF Ensemble

In [13]:
oof_roc_auc = 0

for oof in make_dataset.oof_user_set.keys():
    train_df, valid_df = make_dataset.get_oof_data(oof)
    
    seed_everything(22 + oof)
    
    train_dataset = CustomDataset(df = train_df)
    train_data_loader = DataLoader(
        train_dataset, 
        batch_size = batch_size, 
        shuffle = True, 
        drop_last = False,
        collate_fn = train_make_batch,
        num_workers = num_workers)

    valid_dataset = CustomDataset(df = valid_df)
    valid_data_loader = DataLoader(
        valid_dataset, 
        batch_size = 1, 
        shuffle = False, 
        drop_last = False,
        collate_fn = train_make_batch,
        num_workers = num_workers)

    model = SASRec(
        num_assessmentItemID = make_dataset.num_assessmentItemID, 
        num_testId = make_dataset.num_testId,
        num_large_paper_number = make_dataset.num_large_paper_number,
        num_hour = make_dataset.num_hour,
        num_dayofweek = make_dataset.num_dayofweek,
        num_cols = train_dataset.num_cols,
        cat_cols = train_dataset.cat_cols,
        emb_size = emb_size,
        hidden_units = hidden_units,
        num_heads = num_heads,
        num_layers = num_layers,
        dropout_rate = dropout_rate,
        device = device).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    criterion = nn.BCELoss()

    best_epoch = 0
    best_train_loss = 0
    best_roc_auc = 0

    for epoch in range(1, epochs + 1):
        tbar = tqdm(range(1))
        for _ in tbar:
            train_loss = train(model = model, data_loader = train_data_loader, criterion = criterion, optimizer = optimizer)
            roc_auc = evaluate(model = model, data_loader = valid_data_loader)
            if best_roc_auc < roc_auc:
                best_epoch = epoch
                best_train_loss = train_loss
                best_roc_auc = roc_auc
                torch.save(model.state_dict(), os.path.join(MODEL_PATH, f'oof_{oof}_' + model_name))

            tbar.set_description(f'OOF-{oof}| Epoch: {epoch:3d}| Train loss: {train_loss:.5f}| roc_auc: {roc_auc:.5f}')
    
    print(f'BEST OOF-{oof}| Epoch: {best_epoch:3d}| Train loss: {best_train_loss:.5f}| roc_auc: {best_roc_auc:.5f}')

    oof_roc_auc += best_roc_auc

print(f'Total roc_auc: {oof_roc_auc / len(make_dataset.oof_user_set.keys()):.5f}')

OOF-0| Epoch:   1| Train loss: 0.59971| roc_auc: 0.77717: 100%|██████████| 1/1 [01:39<00:00, 99.55s/it]
OOF-0| Epoch:   2| Train loss: 0.56844| roc_auc: 0.78818: 100%|██████████| 1/1 [01:40<00:00, 100.96s/it]
OOF-0| Epoch:   3| Train loss: 0.54295| roc_auc: 0.79291: 100%|██████████| 1/1 [01:36<00:00, 96.59s/it]
OOF-0| Epoch:   4| Train loss: 0.51666| roc_auc: 0.80256: 100%|██████████| 1/1 [01:37<00:00, 97.94s/it]
OOF-0| Epoch:   5| Train loss: 0.48929| roc_auc: 0.79091: 100%|██████████| 1/1 [01:38<00:00, 98.27s/it]
OOF-0| Epoch:   6| Train loss: 0.46034| roc_auc: 0.80132: 100%|██████████| 1/1 [01:38<00:00, 98.12s/it]
OOF-0| Epoch:   7| Train loss: 0.43360| roc_auc: 0.78937: 100%|██████████| 1/1 [01:35<00:00, 95.27s/it]
OOF-0| Epoch:   8| Train loss: 0.41886| roc_auc: 0.78744: 100%|██████████| 1/1 [01:36<00:00, 96.94s/it]
OOF-0| Epoch:   9| Train loss: 0.39095| roc_auc: 0.78383: 100%|██████████| 1/1 [01:37<00:00, 97.74s/it]
OOF-0| Epoch:  10| Train loss: 0.36775| roc_auc: 0.77543: 100%|

BEST OOF-0| Epoch:   4| Train loss: 0.51666| roc_auc: 0.80256


OOF-1| Epoch:   1| Train loss: 0.60155| roc_auc: 0.78034: 100%|██████████| 1/1 [01:39<00:00, 99.51s/it]
OOF-1| Epoch:   2| Train loss: 0.56484| roc_auc: 0.78598: 100%|██████████| 1/1 [01:39<00:00, 99.05s/it]
OOF-1| Epoch:   3| Train loss: 0.54012| roc_auc: 0.79483: 100%|██████████| 1/1 [01:39<00:00, 99.14s/it]
OOF-1| Epoch:   4| Train loss: 0.51556| roc_auc: 0.79357: 100%|██████████| 1/1 [01:38<00:00, 98.76s/it]
OOF-1| Epoch:   5| Train loss: 0.48503| roc_auc: 0.79213: 100%|██████████| 1/1 [01:40<00:00, 100.10s/it]
OOF-1| Epoch:   6| Train loss: 0.45637| roc_auc: 0.79111: 100%|██████████| 1/1 [01:38<00:00, 98.90s/it]
OOF-1| Epoch:   7| Train loss: 0.44110| roc_auc: 0.78224: 100%|██████████| 1/1 [01:38<00:00, 98.34s/it]
OOF-1| Epoch:   8| Train loss: 0.41439| roc_auc: 0.78997: 100%|██████████| 1/1 [01:39<00:00, 99.65s/it]
OOF-1| Epoch:   9| Train loss: 0.39886| roc_auc: 0.76857: 100%|██████████| 1/1 [01:40<00:00, 100.91s/it]
OOF-1| Epoch:  10| Train loss: 0.38080| roc_auc: 0.76770: 100%

BEST OOF-1| Epoch:   3| Train loss: 0.54012| roc_auc: 0.79483


OOF-2| Epoch:   1| Train loss: 0.59443| roc_auc: 0.75540: 100%|██████████| 1/1 [01:45<00:00, 105.94s/it]
OOF-2| Epoch:   2| Train loss: 0.55727| roc_auc: 0.77142: 100%|██████████| 1/1 [01:36<00:00, 96.37s/it]
OOF-2| Epoch:   3| Train loss: 0.53494| roc_auc: 0.76731: 100%|██████████| 1/1 [01:39<00:00, 99.85s/it]
OOF-2| Epoch:   4| Train loss: 0.50582| roc_auc: 0.76507: 100%|██████████| 1/1 [01:37<00:00, 97.96s/it]
OOF-2| Epoch:   5| Train loss: 0.48361| roc_auc: 0.76077: 100%|██████████| 1/1 [01:38<00:00, 98.25s/it]
OOF-2| Epoch:   6| Train loss: 0.46091| roc_auc: 0.76388: 100%|██████████| 1/1 [01:38<00:00, 98.24s/it]
OOF-2| Epoch:   7| Train loss: 0.43657| roc_auc: 0.75780: 100%|██████████| 1/1 [01:37<00:00, 97.69s/it]
OOF-2| Epoch:   8| Train loss: 0.42245| roc_auc: 0.74285: 100%|██████████| 1/1 [01:37<00:00, 97.79s/it]
OOF-2| Epoch:   9| Train loss: 0.39937| roc_auc: 0.74903: 100%|██████████| 1/1 [01:38<00:00, 98.14s/it]
OOF-2| Epoch:  10| Train loss: 0.38097| roc_auc: 0.75273: 100%|

BEST OOF-2| Epoch:   2| Train loss: 0.55727| roc_auc: 0.77142


OOF-3| Epoch:   1| Train loss: 0.59080| roc_auc: 0.74925: 100%|██████████| 1/1 [01:42<00:00, 102.69s/it]
OOF-3| Epoch:   2| Train loss: 0.55753| roc_auc: 0.76119: 100%|██████████| 1/1 [01:40<00:00, 100.78s/it]
OOF-3| Epoch:   3| Train loss: 0.53396| roc_auc: 0.77215: 100%|██████████| 1/1 [01:40<00:00, 100.25s/it]
OOF-3| Epoch:   4| Train loss: 0.50714| roc_auc: 0.76645: 100%|██████████| 1/1 [01:39<00:00, 99.10s/it]
OOF-3| Epoch:   5| Train loss: 0.48257| roc_auc: 0.76905: 100%|██████████| 1/1 [01:40<00:00, 100.14s/it]
OOF-3| Epoch:   6| Train loss: 0.46368| roc_auc: 0.77158: 100%|██████████| 1/1 [01:41<00:00, 101.84s/it]
OOF-3| Epoch:   7| Train loss: 0.44217| roc_auc: 0.77233: 100%|██████████| 1/1 [01:39<00:00, 99.56s/it]
OOF-3| Epoch:   8| Train loss: 0.42300| roc_auc: 0.76852: 100%|██████████| 1/1 [01:38<00:00, 98.44s/it]
OOF-3| Epoch:   9| Train loss: 0.39978| roc_auc: 0.77006: 100%|██████████| 1/1 [01:36<00:00, 96.70s/it]
OOF-3| Epoch:  10| Train loss: 0.38358| roc_auc: 0.76413: 1

BEST OOF-3| Epoch:   7| Train loss: 0.44217| roc_auc: 0.77233


OOF-4| Epoch:   1| Train loss: 0.59253| roc_auc: 0.76747: 100%|██████████| 1/1 [01:39<00:00, 99.47s/it]
OOF-4| Epoch:   2| Train loss: 0.55597| roc_auc: 0.77669: 100%|██████████| 1/1 [01:39<00:00, 100.00s/it]
OOF-4| Epoch:   3| Train loss: 0.53110| roc_auc: 0.77722: 100%|██████████| 1/1 [01:38<00:00, 98.66s/it]
OOF-4| Epoch:   4| Train loss: 0.50334| roc_auc: 0.77389: 100%|██████████| 1/1 [01:41<00:00, 101.60s/it]
OOF-4| Epoch:   5| Train loss: 0.48361| roc_auc: 0.77917: 100%|██████████| 1/1 [01:38<00:00, 98.84s/it]
OOF-4| Epoch:   6| Train loss: 0.46057| roc_auc: 0.78134: 100%|██████████| 1/1 [01:38<00:00, 98.88s/it]
OOF-4| Epoch:   7| Train loss: 0.44489| roc_auc: 0.76565: 100%|██████████| 1/1 [01:39<00:00, 99.11s/it]
OOF-4| Epoch:   8| Train loss: 0.41733| roc_auc: 0.77967: 100%|██████████| 1/1 [01:39<00:00, 99.14s/it]
OOF-4| Epoch:   9| Train loss: 0.40534| roc_auc: 0.76630: 100%|██████████| 1/1 [01:37<00:00, 97.26s/it]
OOF-4| Epoch:  10| Train loss: 0.38910| roc_auc: 0.75790: 100%

BEST OOF-4| Epoch:   6| Train loss: 0.46057| roc_auc: 0.78134
Total roc_auc: 0.78450





```
Transformer + LSTM

OOF-0| Epoch:   1| Train loss: 0.52477| roc_auc: 0.80000: 100%|██████████| 1/1 [01:38<00:00, 98.07s/it]
OOF-0| Epoch:   2| Train loss: 0.48019| roc_auc: 0.82682: 100%|██████████| 1/1 [01:39<00:00, 99.26s/it]
OOF-0| Epoch:   3| Train loss: 0.46314| roc_auc: 0.83884: 100%|██████████| 1/1 [01:37<00:00, 97.91s/it]
OOF-0| Epoch:   4| Train loss: 0.45776| roc_auc: 0.84076: 100%|██████████| 1/1 [01:37<00:00, 97.66s/it]
OOF-0| Epoch:   5| Train loss: 0.45500| roc_auc: 0.84148: 100%|██████████| 1/1 [01:37<00:00, 97.78s/it]
OOF-0| Epoch:   6| Train loss: 0.45323| roc_auc: 0.84380: 100%|██████████| 1/1 [01:37<00:00, 97.85s/it]
OOF-0| Epoch:   7| Train loss: 0.45137| roc_auc: 0.84131: 100%|██████████| 1/1 [01:37<00:00, 97.40s/it]
OOF-0| Epoch:   8| Train loss: 0.45024| roc_auc: 0.84404: 100%|██████████| 1/1 [01:37<00:00, 97.81s/it]
OOF-0| Epoch:   9| Train loss: 0.44937| roc_auc: 0.84054: 100%|██████████| 1/1 [01:37<00:00, 97.37s/it]
OOF-0| Epoch:  10| Train loss: 0.44839| roc_auc: 0.84440: 100%|██████████| 1/1 [01:36<00:00, 96.96s/it]
OOF-0| Epoch:  11| Train loss: 0.44704| roc_auc: 0.84210: 100%|██████████| 1/1 [01:37<00:00, 97.51s/it]
OOF-0| Epoch:  12| Train loss: 0.44628| roc_auc: 0.84230: 100%|██████████| 1/1 [01:37<00:00, 97.21s/it]
OOF-0| Epoch:  13| Train loss: 0.44576| roc_auc: 0.84385: 100%|██████████| 1/1 [01:34<00:00, 94.85s/it]
OOF-0| Epoch:  14| Train loss: 0.44503| roc_auc: 0.84311: 100%|██████████| 1/1 [01:36<00:00, 96.64s/it]
OOF-0| Epoch:  15| Train loss: 0.44477| roc_auc: 0.84491: 100%|██████████| 1/1 [01:37<00:00, 97.62s/it]
OOF-0| Epoch:  16| Train loss: 0.44386| roc_auc: 0.84396: 100%|██████████| 1/1 [01:36<00:00, 96.84s/it]
OOF-0| Epoch:  17| Train loss: 0.44374| roc_auc: 0.84396: 100%|██████████| 1/1 [01:38<00:00, 98.16s/it]
OOF-0| Epoch:  18| Train loss: 0.44301| roc_auc: 0.84501: 100%|██████████| 1/1 [01:37<00:00, 97.63s/it]
OOF-0| Epoch:  19| Train loss: 0.44265| roc_auc: 0.84667: 100%|██████████| 1/1 [01:36<00:00, 96.81s/it]
OOF-0| Epoch:  20| Train loss: 0.44247| roc_auc: 0.84498: 100%|██████████| 1/1 [01:38<00:00, 98.22s/it]
BEST OOF-0| Epoch:  19| Train loss: 0.44265| roc_auc: 0.84667
OOF-1| Epoch:   1| Train loss: 0.52293| roc_auc: 0.79625: 100%|██████████| 1/1 [01:40<00:00, 100.78s/it]
OOF-1| Epoch:   2| Train loss: 0.47909| roc_auc: 0.82069: 100%|██████████| 1/1 [01:39<00:00, 99.50s/it]
OOF-1| Epoch:   3| Train loss: 0.46254| roc_auc: 0.82880: 100%|██████████| 1/1 [01:41<00:00, 101.82s/it]
OOF-1| Epoch:   4| Train loss: 0.45729| roc_auc: 0.83281: 100%|██████████| 1/1 [01:39<00:00, 99.28s/it]
OOF-1| Epoch:   5| Train loss: 0.45431| roc_auc: 0.83544: 100%|██████████| 1/1 [01:40<00:00, 100.21s/it]
OOF-1| Epoch:   6| Train loss: 0.45311| roc_auc: 0.83672: 100%|██████████| 1/1 [01:40<00:00, 100.52s/it]
OOF-1| Epoch:   7| Train loss: 0.45150| roc_auc: 0.83782: 100%|██████████| 1/1 [01:39<00:00, 99.23s/it]
OOF-1| Epoch:   8| Train loss: 0.45009| roc_auc: 0.84167: 100%|██████████| 1/1 [01:40<00:00, 100.66s/it]
OOF-1| Epoch:   9| Train loss: 0.44958| roc_auc: 0.84115: 100%|██████████| 1/1 [01:41<00:00, 101.62s/it]
OOF-1| Epoch:  10| Train loss: 0.44834| roc_auc: 0.84416: 100%|██████████| 1/1 [01:38<00:00, 98.10s/it]
OOF-1| Epoch:  11| Train loss: 0.44807| roc_auc: 0.84281: 100%|██████████| 1/1 [01:42<00:00, 102.32s/it]
OOF-1| Epoch:  12| Train loss: 0.44749| roc_auc: 0.84403: 100%|██████████| 1/1 [01:47<00:00, 107.22s/it]
OOF-1| Epoch:  13| Train loss: 0.44708| roc_auc: 0.84413: 100%|██████████| 1/1 [01:43<00:00, 103.36s/it]
OOF-1| Epoch:  14| Train loss: 0.44596| roc_auc: 0.84537: 100%|██████████| 1/1 [01:45<00:00, 105.42s/it]
OOF-1| Epoch:  15| Train loss: 0.44553| roc_auc: 0.84605: 100%|██████████| 1/1 [01:42<00:00, 102.77s/it]
OOF-1| Epoch:  16| Train loss: 0.44484| roc_auc: 0.84319: 100%|██████████| 1/1 [01:43<00:00, 103.27s/it]
OOF-1| Epoch:  17| Train loss: 0.44386| roc_auc: 0.84629: 100%|██████████| 1/1 [01:42<00:00, 102.47s/it]
OOF-1| Epoch:  18| Train loss: 0.44338| roc_auc: 0.84715: 100%|██████████| 1/1 [01:40<00:00, 100.93s/it]
OOF-1| Epoch:  19| Train loss: 0.44230| roc_auc: 0.84585: 100%|██████████| 1/1 [01:40<00:00, 100.61s/it]
OOF-1| Epoch:  20| Train loss: 0.44192| roc_auc: 0.84535: 100%|██████████| 1/1 [01:39<00:00, 99.27s/it]
BEST OOF-1| Epoch:  18| Train loss: 0.44338| roc_auc: 0.84715
OOF-2| Epoch:   1| Train loss: 0.51970| roc_auc: 0.78403: 100%|██████████| 1/1 [01:38<00:00, 98.32s/it]
OOF-2| Epoch:   2| Train loss: 0.47772| roc_auc: 0.80440: 100%|██████████| 1/1 [01:37<00:00, 97.63s/it]
OOF-2| Epoch:   3| Train loss: 0.46244| roc_auc: 0.80889: 100%|██████████| 1/1 [01:40<00:00, 100.74s/it]
OOF-2| Epoch:   4| Train loss: 0.45709| roc_auc: 0.80997: 100%|██████████| 1/1 [01:37<00:00, 97.55s/it]
OOF-2| Epoch:   5| Train loss: 0.45449| roc_auc: 0.81164: 100%|██████████| 1/1 [01:37<00:00, 97.57s/it]
OOF-2| Epoch:   6| Train loss: 0.45169| roc_auc: 0.81552: 100%|██████████| 1/1 [01:39<00:00, 99.63s/it]
OOF-2| Epoch:   7| Train loss: 0.45070| roc_auc: 0.81324: 100%|██████████| 1/1 [01:37<00:00, 97.79s/it]
OOF-2| Epoch:   8| Train loss: 0.44941| roc_auc: 0.81411: 100%|██████████| 1/1 [01:37<00:00, 97.72s/it]
OOF-2| Epoch:   9| Train loss: 0.44849| roc_auc: 0.81487: 100%|██████████| 1/1 [01:39<00:00, 99.01s/it]
OOF-2| Epoch:  10| Train loss: 0.44789| roc_auc: 0.81395: 100%|██████████| 1/1 [01:39<00:00, 99.21s/it]
OOF-2| Epoch:  11| Train loss: 0.44667| roc_auc: 0.81517: 100%|██████████| 1/1 [01:38<00:00, 98.32s/it]
OOF-2| Epoch:  12| Train loss: 0.44609| roc_auc: 0.81281: 100%|██████████| 1/1 [01:38<00:00, 98.84s/it]
OOF-2| Epoch:  13| Train loss: 0.44555| roc_auc: 0.81465: 100%|██████████| 1/1 [01:36<00:00, 96.25s/it]
OOF-2| Epoch:  14| Train loss: 0.44469| roc_auc: 0.81327: 100%|██████████| 1/1 [01:36<00:00, 96.51s/it]
OOF-2| Epoch:  15| Train loss: 0.44395| roc_auc: 0.81708: 100%|██████████| 1/1 [01:37<00:00, 97.63s/it]
OOF-2| Epoch:  16| Train loss: 0.44391| roc_auc: 0.81544: 100%|██████████| 1/1 [01:36<00:00, 96.90s/it]
OOF-2| Epoch:  17| Train loss: 0.44322| roc_auc: 0.81266: 100%|██████████| 1/1 [01:39<00:00, 99.18s/it]
OOF-2| Epoch:  18| Train loss: 0.44301| roc_auc: 0.81554: 100%|██████████| 1/1 [01:38<00:00, 98.51s/it]
OOF-2| Epoch:  19| Train loss: 0.44257| roc_auc: 0.81338: 100%|██████████| 1/1 [01:37<00:00, 97.23s/it]
OOF-2| Epoch:  20| Train loss: 0.44195| roc_auc: 0.81325: 100%|██████████| 1/1 [01:38<00:00, 98.03s/it]
BEST OOF-2| Epoch:  15| Train loss: 0.44395| roc_auc: 0.81708
OOF-3| Epoch:   1| Train loss: 0.52103| roc_auc: 0.78738: 100%|██████████| 1/1 [01:40<00:00, 100.32s/it]
OOF-3| Epoch:   2| Train loss: 0.47674| roc_auc: 0.81192: 100%|██████████| 1/1 [01:38<00:00, 98.18s/it]
OOF-3| Epoch:   3| Train loss: 0.46216| roc_auc: 0.81796: 100%|██████████| 1/1 [01:39<00:00, 99.33s/it]
OOF-3| Epoch:   4| Train loss: 0.45711| roc_auc: 0.81927: 100%|██████████| 1/1 [01:41<00:00, 101.23s/it]
OOF-3| Epoch:   5| Train loss: 0.45448| roc_auc: 0.82194: 100%|██████████| 1/1 [01:40<00:00, 100.92s/it]
OOF-3| Epoch:   6| Train loss: 0.45278| roc_auc: 0.82510: 100%|██████████| 1/1 [01:40<00:00, 100.05s/it]
OOF-3| Epoch:   7| Train loss: 0.45115| roc_auc: 0.82411: 100%|██████████| 1/1 [01:39<00:00, 99.57s/it]
OOF-3| Epoch:   8| Train loss: 0.45009| roc_auc: 0.82123: 100%|██████████| 1/1 [01:44<00:00, 104.08s/it]
OOF-3| Epoch:   9| Train loss: 0.44902| roc_auc: 0.82471: 100%|██████████| 1/1 [01:40<00:00, 100.83s/it]
OOF-3| Epoch:  10| Train loss: 0.44880| roc_auc: 0.82259: 100%|██████████| 1/1 [01:39<00:00, 99.15s/it]
OOF-3| Epoch:  11| Train loss: 0.44775| roc_auc: 0.82719: 100%|██████████| 1/1 [01:43<00:00, 103.14s/it]
OOF-3| Epoch:  12| Train loss: 0.44744| roc_auc: 0.82476: 100%|██████████| 1/1 [01:41<00:00, 101.16s/it]
OOF-3| Epoch:  13| Train loss: 0.44708| roc_auc: 0.82705: 100%|██████████| 1/1 [01:39<00:00, 99.88s/it]
OOF-3| Epoch:  14| Train loss: 0.44678| roc_auc: 0.82755: 100%|██████████| 1/1 [01:38<00:00, 98.96s/it]
OOF-3| Epoch:  15| Train loss: 0.44548| roc_auc: 0.82721: 100%|██████████| 1/1 [01:39<00:00, 99.94s/it]
OOF-3| Epoch:  16| Train loss: 0.44502| roc_auc: 0.82887: 100%|██████████| 1/1 [01:41<00:00, 101.27s/it]
OOF-3| Epoch:  17| Train loss: 0.44490| roc_auc: 0.82872: 100%|██████████| 1/1 [01:40<00:00, 100.17s/it]
OOF-3| Epoch:  18| Train loss: 0.44450| roc_auc: 0.82785: 100%|██████████| 1/1 [01:40<00:00, 100.01s/it]
OOF-3| Epoch:  19| Train loss: 0.44350| roc_auc: 0.82463: 100%|██████████| 1/1 [01:40<00:00, 100.35s/it]
OOF-3| Epoch:  20| Train loss: 0.44274| roc_auc: 0.82693: 100%|██████████| 1/1 [01:39<00:00, 99.75s/it]
BEST OOF-3| Epoch:  16| Train loss: 0.44502| roc_auc: 0.82887
OOF-4| Epoch:   1| Train loss: 0.52196| roc_auc: 0.79815: 100%|██████████| 1/1 [01:39<00:00, 99.32s/it]
OOF-4| Epoch:   2| Train loss: 0.47833| roc_auc: 0.81957: 100%|██████████| 1/1 [01:41<00:00, 101.01s/it]
OOF-4| Epoch:   3| Train loss: 0.46250| roc_auc: 0.82446: 100%|██████████| 1/1 [01:38<00:00, 98.31s/it]
OOF-4| Epoch:   4| Train loss: 0.45728| roc_auc: 0.82602: 100%|██████████| 1/1 [01:40<00:00, 100.32s/it]
OOF-4| Epoch:   5| Train loss: 0.45418| roc_auc: 0.82892: 100%|██████████| 1/1 [01:43<00:00, 103.14s/it]
OOF-4| Epoch:   6| Train loss: 0.45243| roc_auc: 0.83067: 100%|██████████| 1/1 [01:41<00:00, 101.37s/it]
OOF-4| Epoch:   7| Train loss: 0.45110| roc_auc: 0.83291: 100%|██████████| 1/1 [01:42<00:00, 102.66s/it]
OOF-4| Epoch:   8| Train loss: 0.44995| roc_auc: 0.83463: 100%|██████████| 1/1 [01:40<00:00, 100.25s/it]
OOF-4| Epoch:   9| Train loss: 0.44927| roc_auc: 0.83693: 100%|██████████| 1/1 [01:41<00:00, 101.71s/it]
OOF-4| Epoch:  10| Train loss: 0.44869| roc_auc: 0.83525: 100%|██████████| 1/1 [01:40<00:00, 100.53s/it]
OOF-4| Epoch:  11| Train loss: 0.44797| roc_auc: 0.83294: 100%|██████████| 1/1 [01:39<00:00, 99.20s/it]
OOF-4| Epoch:  12| Train loss: 0.44675| roc_auc: 0.83649: 100%|██████████| 1/1 [01:39<00:00, 99.98s/it]
OOF-4| Epoch:  13| Train loss: 0.44629| roc_auc: 0.83484: 100%|██████████| 1/1 [01:40<00:00, 100.90s/it]
OOF-4| Epoch:  14| Train loss: 0.44550| roc_auc: 0.83575: 100%|██████████| 1/1 [01:40<00:00, 100.93s/it]
OOF-4| Epoch:  15| Train loss: 0.44494| roc_auc: 0.83657: 100%|██████████| 1/1 [01:40<00:00, 100.53s/it]
OOF-4| Epoch:  16| Train loss: 0.44418| roc_auc: 0.83849: 100%|██████████| 1/1 [01:39<00:00, 99.02s/it]
OOF-4| Epoch:  17| Train loss: 0.44395| roc_auc: 0.83664: 100%|██████████| 1/1 [01:39<00:00, 99.17s/it]
OOF-4| Epoch:  18| Train loss: 0.44372| roc_auc: 0.83450: 100%|██████████| 1/1 [01:40<00:00, 100.40s/it]
OOF-4| Epoch:  19| Train loss: 0.44249| roc_auc: 0.83715: 100%|██████████| 1/1 [01:39<00:00, 99.87s/it]
OOF-4| Epoch:  20| Train loss: 0.44256| roc_auc: 0.83985: 100%|██████████| 1/1 [01:39<00:00, 99.62s/it]
BEST OOF-4| Epoch:  20| Train loss: 0.44256| roc_auc: 0.83985
Total roc_auc: 0.83593

```

# 예측

In [15]:
test_df = make_dataset.get_test_data()
test_dataset = CustomDataset(df = test_df)
test_data_loader = DataLoader(
    test_dataset,
    batch_size = 1, 
    shuffle = False, 
    drop_last = False,
    collate_fn = train_make_batch,
    num_workers = num_workers)

pred_list = []

model = SASRec(
    num_assessmentItemID = make_dataset.num_assessmentItemID, 
    num_testId = make_dataset.num_testId,
    num_large_paper_number = make_dataset.num_large_paper_number,
    num_hour = make_dataset.num_hour,
    num_dayofweek = make_dataset.num_dayofweek,
    num_cols = train_dataset.num_cols,
    cat_cols = train_dataset.cat_cols,
    emb_size = emb_size, 
    hidden_units = hidden_units, 
    num_heads = num_heads, 
    num_layers = num_layers, 
    dropout_rate = dropout_rate, 
    device = device).to(device)

for oof in make_dataset.oof_user_set.keys():
    model.load_state_dict(torch.load(os.path.join(MODEL_PATH, f'oof_{oof}_' + model_name)))
    pred = predict(model = model, data_loader = test_data_loader)
    pred_list.append(pred)

pred_list = np.array(pred_list).mean(axis = 0)

In [16]:
submission = pd.DataFrame(data = np.array(pred_list), columns = ['prediction'])
submission['id'] = submission.index
submission = submission[['id', 'prediction']]
submission.to_csv(os.path.join(SUBMISSION_PATH, 'OOF-Ensemble-' + submission_name), index = False)