# Import

In [3]:
import os
import random
import time
from datetime import datetime
import gc 
import numpy as np
import pandas as pd
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import warnings

warnings.filterwarnings(action='ignore')
gc.collect()

0

In [4]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Data Load

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [6]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# DATA PREPROCESSING

In [7]:
class DTset():
    def __init__(self, DATA_PATH):
        self.preprocessing(DATA_PATH)
        self.oof_ID_set = self.split_data()
    
    def split_data(self):
        user_list = self.df['userID'].unique().tolist()
        oof_ID_set = {}
        kfold = KFold(n_splits = 5, shuffle = True, random_state = 4444)
        for i, (t, v) in enumerate(kfold.split(user_list)):
            oof_ID_set[i] = v.tolist()
        
        return oof_ID_set
    
    def preprocessing(self, DATA_PATH):

        dtype = {
            'userID': 'int16',
            'answerCode': 'int8',
            'KnowledgeTag': 'int16',

        }
        
        train_df = pd.read_csv(os.path.join(DATA_PATH, 'train_data.csv'), dtype=dtype, parse_dates=['Timestamp'])
        # train_df = train_df.drop('Unnamed: 0', axis=1)
        test_df = pd.read_csv(os.path.join(DATA_PATH, 'test_data.csv'), dtype=dtype, parse_dates=['Timestamp'])
        # test_df = test_df.drop('Unnamed: 0', axis=1)
        # test_df = test_df.rename({'assessment_level' : 'beta'},axis=1)
        # test_df = test_df.rename({'student_Level' : 'student_level'}, axis=1)

        def FE(df: pd.DataFrame) -> pd.DataFrame:
            def chg_idx(lst):
                tmp = {}
                for i,j in enumerate(lst):
                    tmp[j] = i
                return tmp

            def convert_time(s: str):
                timestamp = time.mktime(s.timetuple())
                return int(timestamp)
            df["convert_time"] = df["Timestamp"].apply(convert_time)
            df['level'] = df['testId'].apply(lambda x: x[2])
            df['shift'] = df['convert_time'].shift(-1).fillna(0).astype(int)
            df['elapsed'] = df['shift'] - df['convert_time']
            df['check'] = df['userID'].shift(-1)
            df['d_check'] = df['testId'].shift(-1)
            df.loc[(df['userID'] != df['check']) | (df['testId'] != df['d_check']) | (df['elapsed'] >= 86400), 'elapsed'] = 0

            # 정답과 오답의 평균,중간 소요시간
            # collect_elp_mean = df[df['answerCode'] == 1].groupby('assessmentItemID')['elapsed'].mean()
            # df = df.join(collect_elp_mean, on='assessmentItemID',rsuffix='_1_avg')
            # wrong_elp_mean = df[df['answerCode']== 0].groupby('assessmentItemID')['elapsed'].mean()
            # df = df.join(wrong_elp_mean, on='assessmentItemID',rsuffix='_0_avg')
    
            # collect_elp_median = df[df['answerCode'] == 1].groupby('assessmentItemID')['elapsed'].median()
            # df = df.join(collect_elp_median, on='assessmentItemID',rsuffix='_1_mdn')
            # wrong_elp_median = df[df['answerCode']== 0].groupby('assessmentItemID')['elapsed'].median()
            # df = df.join(wrong_elp_median, on='assessmentItemID',rsuffix='_0_mdn')

            # 전체 표준편차
            elapsed_std = df.groupby('assessmentItemID')['elapsed'].std()
            df = df.join(elapsed_std, on='assessmentItemID', rsuffix='_std')
 
            # # 맞춘 인원의 표준편차
            # elapsed_1_std = df[df['answerCode'] == 1].groupby('assessmentItemID')['elapsed'].std()
            # df = df.join(elapsed_1_std, on='assessmentItemID', rsuffix='_1_std')
            # df = df.reset_index(drop=False)

            df = df.drop(['shift','check','d_check','convert_time'], axis=1)
            # df['Timestamp'] = df['Timestamp'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
            df['dayofweek'] = df['Timestamp'].dt.dayofweek
            answer_rate = df.groupby('assessmentItemID')['answerCode'].mean()
            df = df.join(answer_rate, on='assessmentItemID', rsuffix='_rate')

            col_ = {}

            assItemID_ = chg_idx(df['assessmentItemID'].unique().tolist())
            df['assessmentItemID_'] = df['assessmentItemID'].apply(lambda x: assItemID_[x])
            col_['assItemID_'] = assItemID_

            testID_ = chg_idx(df['testId'].unique().tolist())
            df['testId_'] = df['testId'].apply(lambda x: testID_[x])
            col_['testID_'] = testID_
            
            tag_ = chg_idx(df['KnowledgeTag'].unique().tolist())
            df['KnowledgeTag_'] = df['KnowledgeTag'].apply(lambda x: tag_[x])
            col_['tag_'] = tag_

            level_ = chg_idx(df['level'].unique().tolist())
            df['level_'] = df['level'].apply(lambda x: level_[x])
            col_['level_'] = level_
            
            # student_level_ = chg_idx(df['student_level'].unique().tolist())
            # df['student_level_'] = df['student_level'].apply(lambda x: student_level_[x])
            # col_['student_level_'] = student_level_

            # beta_ = chg_idx(df['beta'].unique().tolist())
            # df['beta_'] = df['beta'].apply(lambda x: beta_[x])
            # col_['beta_'] = beta_


            #문제를 푼 인원 
            # usr_cnt = df.groupby('assessmentItemID')['userID'].count()
            # df = df.join(usr_cnt, on='assessmentItemID', rsuffix='_cnt')
            # cnt_ = chg_idx(df['assessmentItemID_cnt'].unique().tolist())
            # df['assessmentItemID_cnt_'] = df['assessmentItemID_cnt'].apply(lambda x: cnt_[x])
            # col_['cnt_'] = cnt_

            return df, col_


        train_df, col_ = FE(train_df)
        test_df, _  = FE(test_df)

        self.assItemID_ = col_['assItemID_']
        
        self.n_assItemID_ = len(col_['assItemID_'])
        self.n_testId_ = len(col_['testID_'])
        self.n_tag_ = len(col_['tag_'])
        self.n_level_ = len(col_['level_'])
        # self.n_cnt_ = len(col_['cnt_'])
        # self.n_student_level_ = len(col_['student_level_'])
        # self.n_beta_ = len(col_['beta_'])

        self.n_dayweek = 7
        self.train_df = train_df
        self.test_df = test_df
        self.df = pd.concat([train_df, test_df[test_df['answerCode'] != -1]]).reset_index(drop=True)
        self.cat_cols = ['assessmentItemID_','testId_','KnowledgeTag_','level_','dayofweek']#'student_level_', 'beta_'
        self.con_cols = ['elapsed','answerCode_rate','elapsed_std']
        
    def get_oof(self, oof):
        oof_ID_set_v = self.oof_ID_set[oof]

        train = []
        valid = []

        grob = self.df.groupby('userID')
        for usr, df in grob:
            if usr in oof_ID_set_v:
                train.append(df.iloc[:-1,:])
                valid.append(df.copy())
            else:
                train.append(df)

        train = pd.concat(train).reset_index(drop = True)
        valid = pd.concat(valid).reset_index(drop = True)

        return train, valid
    
    def get_test_data(self):
        return self.test_df.copy()

In [8]:
class Custom(DTset):
    def __init__(self,
                 df,
                 cat_cols = ['assessmentItemID_','testId_','KnowledgeTag_','level_','dayofweek'],#'student_level_', 'beta_'
                 con_cols = ['elapsed', 'answerCode_rate','elapsed_std']):
        
        self.cat_cols = cat_cols
        self.con_cols = con_cols
        self.get_df = df.groupby('userID')
        self.user_lst = df['userID'].unique().tolist()
    
    def __len__(self):
        return len(self.user_lst)
    
    def __getitem__(self, idx):
        user = self.user_lst[idx]
        get_df = self.get_df.get_group(user)

        prsnt_df = get_df.iloc[1:,:]
        prsnt_cat = prsnt_df[self.cat_cols].values
        prsnt_con = prsnt_df[self.con_cols].values
        prsnt_answerCode = prsnt_df['answerCode'].values

        past_df = get_df.iloc[:-1,:]
        past_cat = past_df[self.cat_cols].values
        past_con = past_df[self.con_cols].values
        past_answerCode = past_df['answerCode'].values

        return {'past_cat' : past_cat,
                'past_con' : past_con,
                'past_answerCode' : past_answerCode,
                'prsnt_cat' : prsnt_cat,
                'prsnt_con' : prsnt_con,
                'prsnt_answerCode' : prsnt_answerCode}
    




def pad_sequence(seq, max_len, padding_value = 0):
    try:
        seq_len, col = seq.shape
        padding = np.zeros((max_len - seq_len, col)) + padding_value
    except:
        seq_len = seq.shape[0]
        padding = np.zeros((max_len - seq_len, )) + padding_value

    padding_seq = np.concatenate([padding, seq])

    return padding_seq

def train_make_batch(samples):
    max_len = 0
    for sample in samples:
        seq_len, _ = sample['past_cat'].shape
        if max_len < seq_len:
            max_len = seq_len
    
    past_cat = []
    past_con = []
    past_answerCode = []
    prsnt_cat = []
    prsnt_con = []
    prsnt_answerCode = []

    for sample in samples:
        past_cat += [pad_sequence(sample['past_cat'] + 1, max_len = max_len, padding_value = 0)]
        past_con += [pad_sequence(sample['past_con'], max_len = max_len, padding_value = 0)]
        past_answerCode += [pad_sequence(sample['past_answerCode'] + 1, max_len = max_len, padding_value = 0)]
        prsnt_cat += [pad_sequence(sample['prsnt_cat'] + 1, max_len = max_len, padding_value = 0)]
        prsnt_con += [pad_sequence(sample['prsnt_con'], max_len = max_len, padding_value = 0)]
        prsnt_answerCode += [pad_sequence(sample['prsnt_answerCode'], max_len = max_len, padding_value = -1)]

    return torch.tensor(past_cat, dtype = torch.long), torch.tensor(past_con, dtype = torch.float32), torch.tensor(past_answerCode, dtype = torch.long), torch.tensor(prsnt_cat, dtype = torch.long), torch.tensor(prsnt_con, dtype = torch.float32), torch.tensor(prsnt_answerCode, dtype = torch.float32)

# Model: Transformer + LSTM

In [16]:
class ScaledDotProductAttention(nn.Module):
    def __init__(self, hidden_dim, dropout_ratio):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_ratio)

    def forward(self, q, k, v, mask):
        score = torch.matmul(q, k.permute(0,1,3,2)) / math.sqrt(self.hidden_dim)
        score = score.masked_fill(mask==0, -1e10)
        att_d = self.dropout(F.softmax(score, dim=-1))
        output = torch.matmul(att_d, v)
        return output, att_d


class MultiHeadAttention(nn.Module):
    def __init__(self, n_heads, hidden_dim, dropout_ratio):
        super().__init__()
        self.n_heads = n_heads
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_ratio)
        self.head_dim = self.hidden_dim // self.n_heads
        self.attention = ScaledDotProductAttention(hidden_dim, dropout_ratio)
        self.layerNorm = nn.LayerNorm(hidden_dim)

        self.w_q = nn.Linear(hidden_dim, hidden_dim,bias=False)
        self.w_k = nn.Linear(hidden_dim, hidden_dim,bias=False)
        self.w_v = nn.Linear(hidden_dim, hidden_dim,bias=False)
        self.w_o = nn.Linear(hidden_dim, hidden_dim,bias=False)


    def forward(self, src, mask):

        resicnect = src
        batch_size, seq_len = src.size(0), src.size(1)
        q = self.w_q(src).view(batch_size, seq_len, self.n_heads, self.head_dim).permute(0,2,1,3)
        k = self.w_k(src).view(batch_size, seq_len, self.n_heads, self.head_dim).permute(0,2,1,3)
        v = self.w_v(src).view(batch_size, seq_len, self.n_heads, self.head_dim).permute(0,2,1,3)
        
        
        output, att_d = self.attention(q, k, v, mask)
        output = output.transpose(1,2).contiguous()
        output = output.view(batch_size, seq_len, -1)

        output = self.layerNorm(self.dropout(self.w_o(output)) + resicnect)

        return output, att_d


class PositionWiseFeedForwardNetwork(nn.Module):
    def __init__(self, hidden_dim, dropout_ratio):
        super().__init__()
        self.layerNorm = nn.LayerNorm(hidden_dim)
        self.dropout = nn.Dropout(dropout_ratio)

        self.fc_1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc_2 = nn.Linear(hidden_dim, hidden_dim)


    def forward(self, x):
        resicnect = x
        output = self.fc_2(torch.relu(self.dropout(self.fc_1(x))))
        output = self.layerNorm(self.dropout(output)+ resicnect)

        return output


class SasRec(nn.Module):
    def __init__(self, n_heads, hidden_dim, dropout_ratio):
        super().__init__()
        self.attention = MultiHeadAttention(n_heads, hidden_dim, dropout_ratio)
        self.FFN = PositionWiseFeedForwardNetwork(hidden_dim, dropout_ratio)

    def forward(self, src, mask):
        output, att_d = self.attention(src, mask)
        output = self.FFN(output)
        return output, att_d


class sasreclstm(nn.Module):
    def __init__(self, 
                 n_assID, 
                 n_testID, 
                 n_tag, 
                 n_level, 
                 n_dayweek,
                 con_cols, 
                 cat_cols, 
                 hidden_dim, 
                 emb_size, 
                 n_heads, 
                 n_layers, 
                 dropout_ratio, 
                 device):
        super().__init__()

        self.n_assID = n_assID
        self.n_testID = n_testID
        self.n_tag = n_tag
        self.n_level = n_level
        self.n_dayweek = n_dayweek
        self.con_cols = con_cols
        self.cat_cols = cat_cols
        self.hidden_dim = hidden_dim
        self.emb_size = emb_size
        self.n_heads = n_heads
        self.n_layers = n_layers
        self.dropout = nn.Dropout(dropout_ratio)
        self.device = device

        past_embd = {}
        past_embd['assessmentItemID_'] = nn.Embedding(self.n_assID + 1, self.emb_size, padding_idx=0)
        past_embd['testId_'] = nn.Embedding(self.n_testID + 1, self.emb_size, padding_idx=0)
        past_embd['KnowledgeTag_'] = nn.Embedding(self.n_tag + 1, self.emb_size, padding_idx=0)
        past_embd['level_'] = nn.Embedding(self.n_level + 1, self.emb_size, padding_idx= 0)
        past_embd['dayofweek'] = nn.Embedding(self.n_dayweek + 1, self.emb_size, padding_idx=0)


        self.past_embd_dict = nn.ModuleDict(past_embd)

        self.past_answerCode_embd = nn.Embedding(3, self.hidden_dim, padding_idx=0)

        self.past_embd_cat = nn.Sequential(nn.Linear(len(cat_cols) * self.emb_size, self.hidden_dim // 2), nn.LayerNorm(self.hidden_dim // 2))

        self.past_embd_con = nn.Sequential(nn.Linear(len(con_cols),self.hidden_dim// 2), nn.LayerNorm(self.hidden_dim //2))

        self.embd_layernorm = nn.LayerNorm(self.hidden_dim)

        self.past_lstm = nn.LSTM(
            input_size = self.hidden_dim,
            hidden_size = self.hidden_dim,
            num_layers = self.n_layers,
            batch_first = True,
            bidirectional = False,
            dropout = dropout_ratio
        )
        
        self.past_blocks = nn.ModuleList([SasRec(self.n_heads, self.hidden_dim, dropout_ratio) for _ in range(self.n_layers)])


        prsnt_embd = {}
        prsnt_embd['assessmentItemID_'] = nn.Embedding(self.n_assID + 1, self.emb_size, padding_idx=0)
        prsnt_embd['testId_'] = nn.Embedding(self.n_testID + 1, self.emb_size, padding_idx=0)
        prsnt_embd['KnowledgeTag_'] = nn.Embedding(self.n_tag + 1, self.emb_size, padding_idx=0)
        prsnt_embd['level_'] = nn.Embedding(self.n_level + 1, self.emb_size, padding_idx= 0)
        prsnt_embd['dayofweek'] = nn.Embedding(self.n_dayweek + 1, self.emb_size, padding_idx=0)

        self.prsnt_embd_dict = nn.ModuleDict(prsnt_embd)

        self.prsnt_embd_cat = nn.Sequential(nn.Linear(len(cat_cols) * self.emb_size, self.hidden_dim // 2), nn.LayerNorm(self.hidden_dim // 2))

        self.prsnt_embd_con = nn.Sequential(nn.Linear(len(con_cols),self.hidden_dim// 2), nn.LayerNorm(self.hidden_dim //2))

        self.prsnt_lstm = nn.LSTM(
            input_size = self.hidden_dim,
            hidden_size = self.hidden_dim,
            num_layers = self.n_layers,
            batch_first = True,
            bidirectional = False,
            dropout = dropout_ratio
        )

        self.prsnt_blocks = nn.ModuleList([SasRec(self.n_heads, self.hidden_dim, dropout_ratio) for _ in range(self.n_layers)])

        self.predict_layer = nn.Sequential(nn.Linear(self.hidden_dim*2, 1), nn.Sigmoid())


    def forward(self, past_cat, past_con, past_answerCode, prsnt_cat, prsnt_con):

        mask_pad = torch.BoolTensor(past_answerCode > 0).unsqueeze(1).unsqueeze(1)
        mask_time = (1 - torch.triu(torch.ones((1,1,past_answerCode.size(1))), diagonal=1)).bool()
        mask = (mask_pad & mask_time).to(self.device)


        past_embd_cat_lst = []
        for i, cat in enumerate(self.cat_cols):
            past_embd_cat_lst.append(self.past_embd_dict[cat](past_cat[:,:,i]))

    
        past_cat_embd = torch.concat(past_embd_cat_lst, dim=-1)
        past_cat_embd = self.past_embd_cat(past_cat_embd)
        past_con_embd = self.past_embd_con(past_con)

        past_embd = torch.concat([past_cat_embd, past_con_embd], dim=-1)
        past_embd += self.past_answerCode_embd(past_answerCode.to(self.device))
        past_embd = self.embd_layernorm(past_embd)

        for b in self.past_blocks:
            past_embd, _ = b(past_embd, mask)
        
        past_embd, _ = self.past_lstm(past_embd)



        prsnt_embd_cat_lst = []

        for i,cat in enumerate(self.cat_cols):
            prsnt_embd_cat_lst.append(self.prsnt_embd_dict[cat](prsnt_cat[:,:,i]))

        prsnt_cat_embd = torch.concat(prsnt_embd_cat_lst, dim=-1)
        prsnt_cat_embd = self.prsnt_embd_cat(prsnt_cat_embd)
        prsnt_con_embd = self.prsnt_embd_con(prsnt_con)

        prsnt_embd = torch.concat([prsnt_cat_embd, prsnt_con_embd], dim=-1)
        
        for b in self.prsnt_blocks:
            prsnt_embd, _ = b(prsnt_embd, mask)
        
        prsnt_embd, _ = self.prsnt_lstm(prsnt_embd)


        embd = torch.concat([self.dropout(past_embd), self.dropout(prsnt_embd)], dim =-1)

        output = self.predict_layer(embd)

        return output

        
    

In [17]:
def train(model, data_loader, criterion, optimizer):
    model.train()
    loss_val = 0

    for past_cat, past_con,past_answerCode,prsnt_cat,prsnt_con,prsnt_answerCode in data_loader:
        past_cat, past_con = past_cat.to(device), past_con.to(device)
        prsnt_cat,prsnt_con,prsnt_answerCode = prsnt_cat.to(device), prsnt_con.to(device), prsnt_answerCode.to(device)
        optimizer.zero_grad()

        output = model(past_cat, past_con,past_answerCode,prsnt_cat,prsnt_con).squeeze(2)
        loss = criterion(output[prsnt_answerCode != -1], prsnt_answerCode[prsnt_answerCode != -1])

        loss.backward()
        optimizer.step()

        loss_val += loss.item()

    loss_val /= len(data_loader)

    return loss_val

def evaluate(model, data_loader):
    model.eval()

    target = []
    pred = []

    with torch.no_grad():
        for past_cat, past_con,past_answerCode,prsnt_cat,prsnt_con,prsnt_answerCode in data_loader:

            past_cat, past_con = past_cat.to(device), past_con.to(device)
            prsnt_cat,prsnt_con,prsnt_answerCode = prsnt_cat.to(device), prsnt_con.to(device), prsnt_answerCode.to(device)

            output = model(past_cat, past_con,past_answerCode,prsnt_cat,prsnt_con).squeeze(2)

            target.extend(prsnt_answerCode[:,-1].cpu().numpy().tolist())
            pred.extend(output[:,-1].cpu().numpy().tolist())
    
    roc_auc = roc_auc_score(target, pred)

    return roc_auc


def predict(model, data_loader):
    model.eval()

    pred = []

    with torch.no_grad():
        for past_cat, past_con,past_answerCode,prsnt_cat,prsnt_con,prsnt_answerCode in data_loader:
            past_cat, past_con = past_cat.to(device), past_con.to(device)
            prsnt_cat,prsnt_con = prsnt_cat.to(device), prsnt_con.to(device)
            output = model(past_cat, past_con,past_answerCode,prsnt_cat,prsnt_con).squeeze(2)

            pred.extend(output[:,-1].cpu().numpy().tolist())

    return pred

In [18]:
BATCH_SIZE = 32
EPOCHS = 5
HIDDEN_DIM = 128
EMB_SIZE = 64
N_HEADS = 2
N_LAYERS = 1
DROPOUT_RATIO = 0.3
NUM_WORKERS = 8



LEARNING_RATE = 0.001
SEED = 4444

DATA_PATH = '/opt/ml/ephemeral/data'
MODEL_PATH = '/opt/ml/ephemeral/model'
SUBMISSION_PATH = '/opt/ml/ephemeral/submission'

model_name = 'sasrec+lstm.pt'
submission_name = 'sasrec+lstm.csv'


In [19]:
if not os.path.isdir(MODEL_PATH):
    os.mkdir(MODEL_PATH)
if not os.path.isdir(SUBMISSION_PATH):
    os.mkdir(SUBMISSION_PATH)

dataset_ = DTset(DATA_PATH=DATA_PATH)


In [13]:
dataset_.con_cols

['elapsed', 'answerCode_rate', 'elapsed_std']

In [14]:
dataset_.cat_cols

['assessmentItemID_', 'testId_', 'KnowledgeTag_', 'level_', 'dayofweek']

# Ensemble

In [20]:
oof_roc_auc = 0

for oof in dataset_.oof_ID_set.keys():
    train_df,valid_df = dataset_.get_oof(oof)

    seed_everything(4444+ oof)

    train_dataset = Custom(df = train_df,)
    train_data_loader = DataLoader(
        train_dataset,
        batch_size = BATCH_SIZE,
        shuffle=True,
        drop_last=False,
        collate_fn = train_make_batch,
        num_workers=NUM_WORKERS)
    
    valid_dataset = Custom(df = valid_df,)
    valid_data_loader = DataLoader(
        valid_dataset,
        batch_size = 1,
        shuffle = False, 
        drop_last = False,
        collate_fn = train_make_batch,
        num_workers = NUM_WORKERS)

    model = sasreclstm(
        n_assID=dataset_.n_assItemID_,
        n_testID=dataset_.n_testId_,
        n_tag=dataset_.n_tag_,
        n_level=dataset_.n_level_,
        n_dayweek=dataset_.n_dayweek,
        con_cols=dataset_.con_cols,
        cat_cols=dataset_.cat_cols,
        hidden_dim=HIDDEN_DIM,
        emb_size=EMB_SIZE,
        n_heads=N_HEADS,
        n_layers=N_LAYERS,
        dropout_ratio=DROPOUT_RATIO,
        device = device).to(device)


    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCELoss()

    best_epoch = 0
    best_train_loss = 0
    best_roc_auc = 0


    for epoch in range(1, EPOCHS + 1):

        for _ in range(1):
            train_loss = train(model = model, data_loader = train_data_loader, criterion = criterion, optimizer = optimizer)
            roc_auc = evaluate(model = model, data_loader = valid_data_loader)
            if best_roc_auc < roc_auc:
                best_epoch = epoch
                best_train_loss = train_loss
                best_roc_auc = roc_auc
                torch.save(model.state_dict(), os.path.join(MODEL_PATH, f'oof_{oof}_' + model_name))

    
    print(f'BEST OOF-{oof}| Epoch: {best_epoch:3d}| Train loss: {best_train_loss:.5f}| roc_auc: {best_roc_auc:.5f}')

    oof_roc_auc += best_roc_auc

print(f'Total roc_auc: {oof_roc_auc / len(dataset_.oof_ID_set.keys()):.5f}')




RuntimeError: CUDA error: device-side assert triggered

In [None]:
test_df = dataset_.get_test_data()
test_dataset = Custom(df = test_df,)
test_data_loader = DataLoader(
    test_dataset,
    batch_size = 1, 
    shuffle = False, 
    drop_last = False,
    collate_fn = train_make_batch,
    num_workers = NUM_WORKERS)

pred_list = []

model = sasreclstm(
    n_assID=dataset_.n_assItemID_,
    n_testID=dataset_.n_testId_,
    n_tag=dataset_.n_tag_,
    n_level=dataset_.n_level_,
    n_dayweek=dataset_.n_dayweek,
    con_cols=dataset_.con_cols,
    cat_cols=dataset_.cat_cols,
    hidden_dim=HIDDEN_DIM,
    emb_size=EMB_SIZE,
    n_heads=N_HEADS,
    n_layers=N_LAYERS,
    dropout_ratio=DROPOUT_RATIO,
    device = device
).to(device)

#    n_student_level = dataset_.n_student_level_,
    #n_beta = dataset_.n_beta_,

for oof in dataset_.oof_ID_set.keys():
    model.load_state_dict(torch.load(os.path.join(MODEL_PATH, f'oof_{oof}_' + model_name)))
    pred = predict(model = model, data_loader = test_data_loader)
    pred_list.append(pred)

pred_list = np.array(pred_list).mean(axis = 0)

In [None]:
submission = pd.DataFrame(data = np.array(pred_list), columns = ['prediction'])
submission['id'] = submission.index
submission = submission[['id', 'prediction']]
submission.to_csv(os.path.join(SUBMISSION_PATH, 'OOF-Ensemble-' + submission_name), index = False)