# 연속형변수 추가한 baseline코드

In [1]:
import argparse

import os
import numpy as np
import pandas as pd
from datetime import datetime
import time
import tqdm
import random

import pdb
import wandb

from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from transformers.models.bert.modeling_bert import BertConfig, BertEncoder, BertModel

from dkt.dataloader import Preprocess
from dkt import trainer
from dkt.utils import setSeeds, increment_path, delete_model
from dkt.optimizer import get_optimizer
from dkt.scheduler import get_scheduler
from dkt.trainer import compute_loss, update_params, get_lr, save_checkpoint
from dkt.metric import get_metric
from dkt.criterion import get_criterion

In [2]:
namespace = {
    'seed' : 42,
    'device' : 'cuda',
    'data_dir' : '/opt/ml/input/data/train_dataset',
    'asset_dir' : '/opt/ml/asset/',
    'file_name' : 'train_data.csv',
    'model_dir' : '/opt/ml/models/',
    'model_name' : 'model.pt',
    'output_dir' : '/opt/ml/output/',
    'test_file_name' : 'test_data.csv',
    'max_seq_len' : 128,
    'num_workers' : 1,
    'hidden_dim' : 512,
    'n_layers' : 2,
    'n_heads' : 2,
    'drop_out' : 0.2,
    'n_epochs' : 200,
    'batch_size' : 64,
    'lr' : 0.0001,
    'clip_grad' : 10,
    'patience' : 15,
    'log_steps' : 50,
    'model' : 'lstm',
    'optimizer' : 'adam',
    'scheduler' : 'plateau'
}

args = argparse.Namespace(**namespace)
device = "cuda" if torch.cuda.is_available() else "cpu"
args.device = device
setSeeds(args.seed)

## preprocess

 preprocess

- __feature_engineering함수에서 연속형 변수 처리해줬고 그에 맞게 다른 함수도 처리했습니다

In [7]:
class Preprocess:
    def __init__(self, args):
        self.args = args
        self.train_data = None
        self.test_data = None
        

    def get_train_data(self):
        return self.train_data

    def get_test_data(self):
        return self.test_data

    def split_data(self, data, ratio=0.7, shuffle=True, seed=0):
        """
        split data into two parts with a given ratio.
        """
        if shuffle:
            random.seed(seed) # fix to default seed 0
            random.shuffle(data)

        size = int(len(data) * ratio)
        data_1 = data[:size]
        data_2 = data[size:]

        return data_1, data_2

    def __save_labels(self, encoder, name):
        le_path = os.path.join(self.args.asset_dir, name + '_classes.npy')
        np.save(le_path, encoder.classes_)

    def __preprocessing(self, df, is_train = True):
        cate_cols = ['assessmentItemID', 'testId', 'KnowledgeTag']

        if not os.path.exists(self.args.asset_dir):
            os.makedirs(self.args.asset_dir)
            
        for col in cate_cols:
            
            le = LabelEncoder()
            if is_train:
                #For UNKNOWN class
                a = df[col].unique().tolist() + ['unknown']
                le.fit(a)
                self.__save_labels(le, col)
            else:
                label_path = os.path.join(self.args.asset_dir,col+'_classes.npy')
                le.classes_ = np.load(label_path)
                
                df[col] = df[col].apply(lambda x: x if str(x) in le.classes_ else 'unknown')

            #모든 컬럼이 범주형이라고 가정
            df[col]= df[col].astype(str)
            test = le.transform(df[col])
            df[col] = test
            

        def convert_time(s):
            timestamp = time.mktime(datetime.strptime(s, '%Y-%m-%d %H:%M:%S').timetuple())
            return int(timestamp)

        df['Timestamp'] = df['Timestamp'].apply(convert_time)
        
        return df

    def __feature_engineering(self, df):
        # TODO
        def percentile(s):
            return np.sum(s) / len(s)
        
        # 큰 카테고리
        df['big_features'] = df['testId'].apply(lambda x : x[2]).astype(int)

        # 큰 카테고리별 정답률
        stu_groupby = df.groupby('big_features').agg({
            'assessmentItemID': 'count',
            'answerCode': percentile
        }).rename(columns = {'answerCode' : 'answer_rate'})

        # tag별 정답률
        stu_tag_groupby = df.groupby(['big_features', 'KnowledgeTag']).agg({
            'assessmentItemID': 'count',
            'answerCode': percentile
        }).rename(columns = {'answerCode' : 'answer_rate'})

        # 시험지별 정답률
        stu_test_groupby = df.groupby(['big_features', 'testId']).agg({
            'assessmentItemID': 'count',
            'answerCode': percentile
        }).rename(columns = {'answerCode' : 'answer_rate'})
                                                                    
        # 문항별 정답률
        stu_assessment_groupby = df.groupby(['big_features', 'assessmentItemID']).agg({
            'assessmentItemID': 'count',
            'answerCode': percentile
        }).rename(columns = {'assessmentItemID' : 'assessment_count', 'answerCode' : 'answer_rate'})

        df = df.sort_values(by=['userID','Timestamp'], axis=0)

        # 정답 - 큰 카테고리별 정답률 
        '''ex)
        맞은 문제의 큰 카테고리별 정답률이 0.7 이면 1 - 0.7 = 0.3이 됨)
        틀린 문제의 큰 카테고리별 정답률이 0.7 이면 0 - 0.7 = -0.7이 됨)
        '''
        temp = pd.merge(df, stu_groupby.reset_index()[['big_features', 'answer_rate']], on = ['big_features'])
        temp = temp.sort_values(by=['userID','Timestamp'], axis=0).reset_index()
        df['answer_delta'] = temp['answerCode'] - temp['answer_rate']

        # 정답 - 태그별 정답률
        temp = pd.merge(df, stu_tag_groupby.reset_index()[['answer_rate', 'KnowledgeTag']], on = ['KnowledgeTag'])
        temp = temp.sort_values(by=['userID','Timestamp'], axis=0).reset_index()
        df['tag_delta'] = temp['answerCode'] - temp['answer_rate']

        # 정답 - 시험별 정답률
        temp = pd.merge(df, stu_test_groupby.reset_index()[['answer_rate', 'testId']], on = ['testId'])
        temp = temp.sort_values(by=['userID','Timestamp'], axis=0).reset_index()
        df['test_delta'] = temp['answerCode'] - temp['answer_rate']

        # 정답 - 문항별 정답률
        temp = pd.merge(df, stu_assessment_groupby.reset_index()[['answer_rate', 'assessmentItemID']], on = ['assessmentItemID'])
        temp = temp.sort_values(by=['userID','Timestamp'], axis=0).reset_index()
        df['assess_delta'] = temp['answerCode'] - temp['answer_rate']

        return df

    def load_data_from_file(self, file_name, is_train=True):
        csv_file_path = os.path.join(self.args.data_dir, file_name)
        df = pd.read_csv(csv_file_path)#, nrows=100000)
        df = self.__feature_engineering(df)
        df = self.__preprocessing(df, is_train)

        # 추후 feature를 embedding할 시에 embedding_layer의 input 크기를 결정할때 사용

        self.args.n_questions = len(np.load(os.path.join(self.args.asset_dir,'assessmentItemID_classes.npy')))
        self.args.n_test = len(np.load(os.path.join(self.args.asset_dir,'testId_classes.npy')))
        self.args.n_tag = len(np.load(os.path.join(self.args.asset_dir,'KnowledgeTag_classes.npy')))
        self.args.n_big_features = 9


        df = df.sort_values(by=['userID','Timestamp'], axis=0)

        columns = ['userID', 'assessmentItemID', 'testId', 'answerCode', 'KnowledgeTag', 'big_features', 'answer_delta', 'tag_delta', 'test_delta', 'assess_delta']

        group = df[columns].groupby('userID').apply(
                lambda r: (
                    r['testId'].values, 
                    r['assessmentItemID'].values,
                    r['KnowledgeTag'].values,
                    r['answerCode'].values,
                    r['big_features'].values,
                    r['answer_delta'].values,
                    r['tag_delta'].values,
                    r['test_delta'].values,
                    r['assess_delta'].values,
                )
            )

        return group.values

    def load_train_data(self, file_name):
        self.train_data = self.load_data_from_file(file_name)

    def load_test_data(self, file_name):
        self.test_data = self.load_data_from_file(file_name, is_train= False)

In [4]:
preprocess = Preprocess(args)
preprocess.load_train_data(args.file_name)
train_data = preprocess.get_train_data()

train_data, valid_data = preprocess.split_data(train_data)

## DKTDataset

In [17]:
class DKTDataset(torch.utils.data.Dataset):
    def __init__(self, data, args):
        self.data = data
        self.args = args

    def __getitem__(self, index):
        row = self.data[index]

        # 각 data의 sequence length
        seq_len = len(row[0])

        test, question, tag, correct, big_features, answer_delta, tag_delta, test_delta, assess_delta = row

        # category변수와 continuout변수를 나눠줌        
        cate_cols = [test, question, tag, correct, big_features]
        cont_cols = [answer_delta, tag_delta, test_delta, assess_delta]

        # max seq len을 고려하여서 이보다 길면 자르고 아닐 경우 그대로 냅둔다
        if seq_len > self.args.max_seq_len:
            for i, col in enumerate(cate_cols):
                cate_cols[i] = col[-self.args.max_seq_len:]
            mask = np.ones(self.args.max_seq_len, dtype=np.int16)
        else:
            mask = np.zeros(self.args.max_seq_len, dtype=np.int16)
            mask[-seq_len:] = 1

        # mask도 columns 목록에 포함시킴
        cate_cols.append(mask)

        if seq_len > self.args.max_seq_len:
            for i, col in enumerate(cont_cols):
                cont_cols[i] = col[-self.args.max_seq_len:]

        # np.array -> torch.tensor 형변환
        for i, col in enumerate(cate_cols):
            cate_cols[i] = torch.tensor(col)

        # np.array -> torch.tensor 형변환
        for i, col in enumerate(cont_cols):
            cont_cols[i] = torch.tensor(col)

        return cate_cols, cont_cols

    def __len__(self):
        return len(self.data)


from torch.nn.utils.rnn import pad_sequence

def collate(batch):
    # cate변수에서 했던 처리를 cont에서도 똑같이 해줌
    cate_col_n = len(batch[0][0])
    cont_col_n = len(batch[0][1])

    cate_col_list = [[] for _ in range(cate_col_n)]
    cont_col_list = [[] for _ in range(cont_col_n)]

    max_seq_len = len(batch[0][0][-1])

        
    # batch의 값들을 각 column끼리 그룹화
    for row in batch:
        for i, col in enumerate(row[0]):
            pre_padded = torch.zeros(max_seq_len)
            pre_padded[-len(col):] = col
            cate_col_list[i].append(pre_padded)
        for i, col in enumerate(row[1]):
            pre_padded = torch.zeros(max_seq_len)
            pre_padded[-len(col):] = col
            cont_col_list[i].append(pre_padded)


    for i, _ in enumerate(cate_col_list):
        cate_col_list[i] =torch.stack(cate_col_list[i])
    
    for i, _ in enumerate(cont_col_list):
        cont_col_list[i] =torch.stack(cont_col_list[i])

    return tuple(cate_col_list), tuple(cont_col_list)


def get_loaders(args, train, valid):

    pin_memory = False
    train_loader, valid_loader = None, None
    
    if train is not None:
        trainset = DKTDataset(train, args)
        train_loader = torch.utils.data.DataLoader(trainset, num_workers=args.num_workers, shuffle=True,
                            batch_size=args.batch_size, pin_memory=pin_memory, collate_fn=collate)
    if valid is not None:
        valset = DKTDataset(valid, args)
        valid_loader = torch.utils.data.DataLoader(valset, num_workers=args.num_workers, shuffle=False,
                            batch_size=args.batch_size, pin_memory=pin_memory, collate_fn=collate)

    return train_loader, valid_loader

In [6]:
train_loader, valid_loader = get_loaders(args, train_data, valid_data)

### process_batch



In [19]:
def process_batch(batch, args):

    (test, question, tag, correct, big_features, mask), cont_features = batch    
    
    # change to float
    mask = mask.type(torch.FloatTensor)
    correct = correct.type(torch.FloatTensor)
    big_features = big_features.type(torch.FloatTensor)

    temp = []

    interaction = correct + 1 # 패딩을 위해 correct값에 1을 더해준다.
    interaction = interaction.roll(shifts=1, dims=1)
    interaction[:, 0] = 0 # set padding index to the first sequence
    interaction = (interaction * mask).to(torch.int64)
    test = ((test + 1) * mask).to(torch.int64)
    question = ((question + 1) * mask).to(torch.int64)
    tag = ((tag + 1) * mask).to(torch.int64)
    big_features = (big_features * mask).to(torch.int64)

    # interaction과 동일하게 rolling을 해서 이전 정보를 사용할 수 있도록 함
    for cont_feature in cont_features:
        cont_feature = cont_feature.type(torch.FloatTensor)
        cont_feature = cont_feature.roll(shifts=1, dims=1)
        cont_feature[:, 0] = 0
        cont_feature = (cont_feature * mask).unsqueeze(-1)
        temp.append(cont_feature)
    
    # device memory로 이동
    test = test.to(args.device)
    question = question.to(args.device)
    tag = tag.to(args.device)
    correct = correct.to(args.device)
    mask = mask.to(args.device)
    interaction = interaction.to(args.device)
    big_features = big_features.to(args.device)

    # 연속형 변수들을 concat해줌
    cont_features = torch.cat(temp, dim=-1).to(args.device)

    return (test, question,
            tag, correct, mask,
            interaction, big_features), cont_features

## model

- lstm, bert 모델을 사용했습니다

In [10]:
class CustomLSTM(nn.Module):

    def __init__(self, args):
        super(CustomLSTM, self).__init__()
        self.args = args
        self.device = args.device

        self.hidden_dim = self.args.hidden_dim
        self.n_layers = self.args.n_layers

        # Embedding 
        # interaction은 현재 correct로 구성되어있다. correct(1, 2) + padding(0)
        self.embedding_interaction = nn.Embedding(3, self.hidden_dim//3)
        self.embedding_test = nn.Embedding(self.args.n_test + 1, self.hidden_dim//3)
        self.embedding_question = nn.Embedding(self.args.n_questions + 1, self.hidden_dim//3)
        self.embedding_tag = nn.Embedding(self.args.n_tag + 1, self.hidden_dim//3)

        # 큰 카테고리 embedding 추가
        self.embedding_big = nn.Embedding(self.args.n_big_features + 1, self.hidden_dim//3)
        
        # embedding combination projection
        self.comb_proj = nn.Sequential(
            nn.Linear((self.hidden_dim//3)*5, self.hidden_dim//2),
            nn.LayerNorm(self.hidden_dim//2)
        )

        # cont features
        self.cont_embed = nn.Sequential(
            nn.Linear(4, self.hidden_dim//2),
            nn.LayerNorm(self.hidden_dim//2)
        )
        
        self.lstm = nn.LSTM(self.hidden_dim,
                            self.hidden_dim,
                            self.n_layers,
                            batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(self.hidden_dim, 1)

        self.activation = nn.Sigmoid()

    def init_hidden(self, batch_size):
        h = torch.zeros(
            self.n_layers,
            batch_size,
            self.hidden_dim)
        h = h.to(self.device)

        c = torch.zeros(
            self.n_layers,
            batch_size,
            self.hidden_dim)
        c = c.to(self.device)

        return (h, c)

    def forward(self, input):
        (test, question, tag, _, mask, interaction, big_features), cont_features = input

        batch_size = interaction.size(0)

        # Embedding
        embed_interaction = self.embedding_interaction(interaction)
        embed_test = self.embedding_test(test)
        embed_question = self.embedding_question(question)
        embed_tag = self.embedding_tag(tag)
        embed_big = self.embedding_big(big_features)
        

        embed = torch.cat([embed_interaction,
                           embed_test,
                           embed_question,
                           embed_big,
                           embed_tag,
                           ], 2)

        cate_embed = self.comb_proj(embed)
        cont_embed = self.cont_embed(cont_features)

        # cate변수와 cont변수를 concat해서 lstm input으로 넣어줌        
        X = torch.cat([cate_embed, cont_embed], 2)
        
        hidden = self.init_hidden(batch_size)
        out, hidden = self.lstm(X, hidden)
        out = out.contiguous().view(batch_size, -1, self.hidden_dim)

        out = self.fc(out)
        preds = self.activation(out).view(batch_size, -1)

        return preds

In [31]:
class CustomBert(nn.Module):

    def __init__(self, args):
        super(CustomBert, self).__init__()
        self.args = args
        self.device = args.device

        # Defining some parameters
        self.hidden_dim = self.args.hidden_dim
        self.n_layers = self.args.n_layers

        # Embedding 
        # interaction은 현재 correct으로 구성되어있다. correct(1, 2) + padding(0)
        self.embedding_interaction = nn.Embedding(3, self.hidden_dim//3)
        self.embedding_test = nn.Embedding(self.args.n_test + 1, self.hidden_dim//3)
        self.embedding_question = nn.Embedding(self.args.n_questions + 1, self.hidden_dim//3)
        self.embedding_tag = nn.Embedding(self.args.n_tag + 1, self.hidden_dim//3)
        # 큰 카테고리 embedding 추가
        self.embedding_big = nn.Embedding(self.args.n_big_features + 1, self.hidden_dim//3)

        # embedding combination projection
        self.comb_proj = nn.Sequential(
            nn.Linear((self.hidden_dim//3)*5, self.hidden_dim//2),
            nn.LayerNorm(self.hidden_dim//2)
        )

        # cont features
        self.cont_embed = nn.Sequential(
            nn.Linear(4, self.hidden_dim//2),
            nn.LayerNorm(self.hidden_dim//2)
        )

        # Bert config
        self.config = BertConfig( 
            3, # not used
            hidden_size=self.hidden_dim,
            num_hidden_layers=self.args.n_layers,
            num_attention_heads=self.args.n_heads,
            max_position_embeddings=self.args.max_seq_len          
        )

        # Defining the layers
        # Bert Layer
        self.encoder = BertModel(self.config)  

        # Fully connected layer
        self.fc = nn.Linear(self.args.hidden_dim, 1)
       
        self.activation = nn.Sigmoid()


    def forward(self, input):
        (test, question, tag, _, mask, interaction, big_features), cont_features = input

        batch_size = interaction.size(0)

        # 신나는 embedding
        embed_interaction = self.embedding_interaction(interaction)
        embed_test = self.embedding_test(test)
        embed_question = self.embedding_question(question)
        embed_tag = self.embedding_tag(tag)
        embed_big = self.embedding_big(big_features)
        

        embed = torch.cat([embed_interaction,
                           embed_test,
                           embed_question,
                           embed_big,
                           embed_tag,
                           ], 2)

        cate_embed = self.comb_proj(embed)
        cont_embed = self.cont_embed(cont_features)

        # cate변수와 cont변수를 concat해서 bert의 input에 넣어줌        
        X = torch.cat([cate_embed, cont_embed], 2)

        # Bert
        encoded_layers = self.encoder(inputs_embeds=X, attention_mask=mask)
        out = encoded_layers[0]
        out = out.contiguous().view(batch_size, -1, self.hidden_dim)
        out = self.fc(out)
        preds = self.activation(out).view(batch_size, -1)

        return preds

In [12]:
def compute_loss(preds, targets):
    """
    Args :
        preds   : (batch_size, max_seq_len)
        targets : (batch_size, max_seq_len)

    """
    loss = get_criterion(preds, targets)
    #마지막 시퀀드에 대한 값만 loss 계산
    # loss = loss[:,-1]
    loss = torch.mean(loss)
    return loss

## train

In [13]:
def train(train_loader, model, optimizer, args):
    model.train()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in enumerate(train_loader):
        input = process_batch(batch, args)
        preds = model(input)

        # cont변수가 추가되었으므로 처리가 필요함 어쨋든 ground truth 인 정답
        targets = input[0][3] # correct

        loss = compute_loss(preds, targets)
        update_params(loss, model, optimizer, args)

        if step % args.log_steps == 0:
            print(f"Training steps: {step} Loss: {str(loss.item())}")
        
        # predictions
        preds = preds[:,-1]
        targets = targets[:,-1]

        if args.device == 'cuda':
            preds = preds.to('cpu').detach().numpy()
            targets = targets.to('cpu').detach().numpy()
        else: # cpu
            preds = preds.detach().numpy()
            targets = targets.detach().numpy()
        
        total_preds.append(preds)
        total_targets.append(targets)
        losses.append(loss)
      

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    auc, acc = get_metric(total_targets, total_preds)
    loss_avg = sum(losses)/len(losses)
    print(f'TRAIN AUC : {auc} ACC : {acc}')
    return auc, acc, loss_avg

def validate(valid_loader, model, args):
    model.eval()

    total_preds = []
    total_targets = []
    for step, batch in enumerate(valid_loader):
        input = process_batch(batch, args)

        preds = model(input)

        # 마찬가지로 정답
        targets = input[0][3] # correct


        # predictions
        preds = preds[:,-1]
        targets = targets[:,-1]
    
        if args.device == 'cuda':
            preds = preds.to('cpu').detach().numpy()
            targets = targets.to('cpu').detach().numpy()
        else: # cpu
            preds = preds.detach().numpy()
            targets = targets.detach().numpy()

        total_preds.append(preds)
        total_targets.append(targets)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    auc, acc = get_metric(total_targets, total_preds)
    
    print(f'VALID AUC : {auc} ACC : {acc}\n')

    return auc, acc, total_preds, total_targets

In [14]:
args.model = 'custombert'

if args.model == 'customlstm':
    model = CustomLSTM(args)
elif args.model == 'custombert':
    model = CustomBert(args)
model.to(device)
print()




In [15]:
# 저는 추가한 feature를 wandb에 기록해주었습니다
args.add_features = ["bigfeature","answer_delta","tag_delta","test_delta","assess_delta"]
name = 'fe4_maxseq128_hiddendim512_custombert'

wandb.login()

wandb.init(project='dkt', config=vars(args))
wandb.run.name = name

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcha-no[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.31 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [18]:
args.model_dir = increment_path(os.path.join(args.model_dir, args.model))
os.makedirs(args.model_dir, exist_ok=True)    

# only when using warmup scheduler
args.total_steps = int(len(train_loader.dataset) / args.batch_size) * (args.n_epochs)
args.warmup_steps = args.total_steps // 10
        
optimizer = get_optimizer(model, args)
scheduler = get_scheduler(optimizer, args)

best_auc = -1
early_stopping_counter = 0

for epoch in range(args.n_epochs):

    print(f"Start Training: Epoch {epoch + 1}")
    
    ### TRAIN
    train_auc, train_acc, train_loss = train(train_loader, model, optimizer, args)
    
    ### VALID
    auc, acc,_ , _ = validate(valid_loader, model, args)

    lr = get_lr(optimizer)
    ### TODO: model save or early stopping
    wandb.log({"epoch": epoch, "train_loss": train_loss, "train_auc": train_auc, "train_acc":train_acc,
                "valid_auc":auc, "valid_acc":acc, "lr":lr})
    if auc > best_auc:
        best_auc = auc
        # # torch.nn.DataParallel로 감싸진 경우 원래의 model을 가져옵니다.
        model_to_save = model.module if hasattr(model, 'module') else model
        
        # 이함수는 제가 추가한 함수라서 주석처리해주시면 됩니다
        # delete_model(args.model_dir)

        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model_to_save.state_dict(),
            },
            args.model_dir, f'model_{epoch + 1}.pt',
        )
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= args.patience:
            print(f'EarlyStopping counter: {early_stopping_counter} out of {args.patience}')
            break

    # scheduler
    if args.scheduler == 'plateau':
        scheduler.step(best_auc)
    else:
        scheduler.step()

wandb.finish()


Start Training: Epoch 1
Training steps: 0 Loss: 0.7785154581069946
Training steps: 50 Loss: 0.46498364210128784
TRAIN AUC : 0.6951391503564113 ACC : 0.6279863481228669
VALID AUC : 0.7348494090319868 ACC : 0.6467661691542289

saving model ...
Start Training: Epoch 2
Training steps: 0 Loss: 0.40450945496559143
Training steps: 50 Loss: 0.42555931210517883
TRAIN AUC : 0.7167595794356313 ACC : 0.6380119453924915
VALID AUC : 0.7384189212912133 ACC : 0.6477611940298508

saving model ...
Start Training: Epoch 3
Training steps: 0 Loss: 0.4592132270336151
Training steps: 50 Loss: 0.47890979051589966
TRAIN AUC : 0.720167160935248 ACC : 0.6431313993174061
VALID AUC : 0.7380142641637433 ACC : 0.6606965174129353

Start Training: Epoch 4
Training steps: 0 Loss: 0.4624432921409607
Training steps: 50 Loss: 0.44153642654418945
TRAIN AUC : 0.7171360926678377 ACC : 0.6446245733788396
VALID AUC : 0.7389346607674004 ACC : 0.6597014925373135

saving model ...
Start Training: Epoch 5
Training steps: 0 Loss: 0

0,1
epoch,71.0
train_loss,0.39894
train_auc,0.77447
train_acc,0.7067
valid_auc,0.78318
valid_acc,0.70299
lr,1e-05
_runtime,633.0
_timestamp,1622190131.0
_step,71.0


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▆▆▆▅▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_auc,▁▃▃▃▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇█▇███████████████
train_acc,▁▂▂▂▂▃▄▅▅▆▆▆▆▇▇▇▇▇▆▇▇▇▇█▇▇▇███▇▇█▇██████
valid_auc,▁▂▂▂▃▅▆▆▆▇▇▆▇▆▇▇█▇▇█▇█▇██▇▇▇████████████
valid_acc,▁▁▂▂▃▃▅▅▅▆▆▆▇▅▇▇▇▇▆▇█▇▇▇▇█▇▇▇▇██▇█▇▇▇██▇
lr,███████████████████▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▁▁
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


## inference

In [8]:
preprocess = Preprocess(args)
preprocess.load_test_data(args.test_file_name)
test_data = preprocess.get_test_data()

In [18]:
_, test_loader = get_loaders(args, None, test_data)

In [23]:
def load_model(args):    
    model_path = os.path.join(args.model_dir, args.model_name)
    print("Loading Model from:", model_path)
    load_state = torch.load(model_path)

    if args.model == 'custombert':
        model = CustomBert(args)

    # 1. load model state
    model.load_state_dict(load_state['state_dict'], strict=True)
    
    print("Loading Model from:", model_path, "...Finished.")
    return model

In [32]:
args.model_name = 'model_57.pt'
model = load_model(args)
model.to(device)
print()

Loading Model from: /opt/ml/models/custombert/model_57.pt
Loading Model from: /opt/ml/models/custombert/model_57.pt ...Finished.



In [36]:
def inference(args, test_data):
    model.eval()
    _, test_loader = get_loaders(args, None, test_data)
    
    
    total_preds = []
    
    for step, batch in enumerate(test_loader):
        input = process_batch(batch, args)

        preds = model(input)
        

        # predictions
        preds = preds[:,-1]
        

        if args.device == 'cuda':
            preds = preds.to('cpu').detach().numpy()
        else: # cpu
            preds = preds.detach().numpy()
            
        total_preds+=list(preds)
    
    os.makedirs(os.path.join(args.output_dir, args.model), exist_ok=True)
    write_path = os.path.join(args.output_dir, args.model, "output.csv")
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)    
    print(write_path)
    with open(write_path, 'w', encoding='utf8') as w:
        print("writing prediction : {}".format(write_path))
        w.write("id,prediction\n")
        for id, p in enumerate(total_preds):
            w.write('{},{}\n'.format(id,p))

In [38]:
inference(args, test_data)

/opt/ml/output/custombert/output.csv
writing prediction : /opt/ml/output/custombert/output.csv
