In [1]:
import sys
sys.path.append("/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/")

from fe.agg import (
    MakeCorrectCount, 
    MakeCorrectPercent, 
    MakeQuestionCount, 
    MakeTopNCorrectPercent
)

from fe.seq import (
    SplitAssessmentItemID,
    MakeFirstClass,
    MakeSecondClass,
    MakeYMD,
    ConvertTime
)

from dkt_dataset import Preprocess
from utils import get_args, get_root_dir
from fe.feature import FEPipeline
import easydict

In [2]:
args = get_args()
args.root_dir = get_root_dir(
    '/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test'
)

In [3]:
args.data_dir = "../../input/data/train_dataset/"

In [4]:
fe_pipeline = FEPipeline(args, [
    SplitAssessmentItemID,
    ConvertTime,
    MakeFirstClass,
    MakeSecondClass,
    MakeCorrectCount,
    MakeQuestionCount,
    MakeCorrectPercent,
    MakeTopNCorrectPercent
])
fe_pipeline.debug()

In [5]:
fe_pipeline.description()

[Feature Descriptions]

feature name : base_feature
feature type : seq
 - userID               : 사용자의 고유 번호입니다. 총 7,442명의 학생이 있습니다
 - assessmentItemID     : 사용자가 푼 문항의 일련 번호입니다.
 - testID               : 사용자가 푼 문항이 포함된 시험지의 일련 번호입니다.
 - answerCode           : 사용자가 푼 문항의 정답 여부를 담고 있는 이진 (0/1) 데이터입니다.
 - Timestamp            : 사용자가 문항을 푼 시간 정보입니다.
 - KnowledgeTag         : 사용자가 푼 문항의 고유 태그가 담겨져 있습니다.

feature name : split_assessmentitem_id
feature type : seq
 - testPaper            : 시험지 번호입니다.
 - testPaperCnt         : 시험지의 문항 번호입니다.

feature name : convert_time
feature type : seq
 - timeSec              : 사용자가 문항을 푼 타임스태프 정보입니다.

feature name : make_first_class
feature type : seq
 - firstClass           : 대분류에 해당합니다.

feature name : make_second_class
feature type : seq
 - secondClass          : 중분류에 해당합니다.

feature name : make_correct_count
feature type : agg
 - correctCnt           : 사용자가 맞춘 문항수를 나타냅니다.

feature name : make_question_count
feature type : agg
 - quesCnt              : 사

In [6]:
columns = ['userID', 'answerCode', 
           'testPaper', 'timeSec', 'firstClass', 'secondClass', 
           'correctPer', 'top10CorrectPer']
pre_encoders = {
    'label': ['testPaper', 'firstClass', 'secondClass'],
    'min_max': ['top10CorrectPer', 'correctPer'],
    'std': ['timeSec']
}

preprocess = Preprocess(args, fe_pipeline, columns)

In [7]:
preprocess.feature_engineering()
preprocess.split_data()
preprocess.preprocessing(pre_encoders)
preprocess.data_augmentation(choices=[1, 3])

Feature Engineering Start ... 


load features /home/j-gunmo/features/train_split_assessmentitem_id.pkl to dataframe ... 



Feature Engineering Name: split_assessmentitem_id



testPaper       : 시험지 번호입니다.


dtype: object


[Examples]


INDEX 0000: 060001


INDEX 1000: 080012


INDEX 2000: 060041


INDEX 3000: 080041


INDEX 4000: 060091


INDEX 5000: 060101


INDEX 6000: 080100


INDEX 7000: 080112


INDEX 8000: 040049


INDEX 9000: 090011



testPaperCnt    : 시험지의 문항 번호입니다.


dtype: object


[Examples]


INDEX 0000: 001


INDEX 1000: 001


INDEX 2000: 007


INDEX 3000: 005


INDEX 4000: 003


INDEX 5000: 003


INDEX 6000: 004


INDEX 7000: 002


INDEX 8000: 003


INDEX 9000: 007


load features /home/j-gunmo/features/train_convert_time.pkl to dataframe ... 



Feature Engineering Name: convert_time



timeSec         : 사용자가 문항을 푼 타임스태프 정보입니다.


dtype: int64


[Examples]


INDEX 0000: 1584976631


INDEX 1000: 1588175753


INDEX 2000: 1590428625


INDEX 3000: 1592332726


INDEX 4000: 1594832366


INDEX 5000: 1599493405


INDEX 6000: 1602576550


INDEX 7000: 1604429286


INDEX 8000: 1581193055


INDEX 9000: 1584613367


load features /home/j-gunmo/features/train_make_first_class.pkl to dataframe ... 



Feature Engineering Name: make_first_class



firstClass      : 대분류에 해당합니다.


dtype: object


[Examples]


INDEX 0000: 6


INDEX 1000: 8


INDEX 2000: 6


INDEX 3000: 8


INDEX 4000: 6


INDEX 5000: 6


INDEX 6000: 8


INDEX 7000: 8


INDEX 8000: 4


INDEX 9000: 9


load features /home/j-gunmo/features/train_make_second_class.pkl to dataframe ... 



Feature Engineering Name: make_second_class



secondClass     : 중분류에 해당합니다.


dtype: object


[Examples]


INDEX 0000: 7224


INDEX 1000: 4659


INDEX 2000: 602


INDEX 3000: 4795


INDEX 4000: 628


INDEX 5000: 706


INDEX 6000: 7171


INDEX 7000: 2711


INDEX 8000: 2071


INDEX 9000: 5261


load features /home/j-gunmo/features/train_make_correct_count.pkl to dataframe ... 



Feature Engineering Name: make_correct_count



correctCnt      : 사용자가 맞춘 문항수를 나타냅니다.


dtype: int64


[Examples]


INDEX 0000: 470


INDEX 1000: 470


INDEX 2000: 470


INDEX 3000: 470


INDEX 4000: 470


INDEX 5000: 470


INDEX 6000: 470


INDEX 7000: 470


INDEX 8000: 796


INDEX 9000: 796


load features /home/j-gunmo/features/train_make_question_count.pkl to dataframe ... 



Feature Engineering Name: make_question_count



quesCnt         : 사용자가 푼 문항수를 나타냅니다.


dtype: int64


[Examples]


INDEX 0000: 745


INDEX 1000: 745


INDEX 2000: 745


INDEX 3000: 745


INDEX 4000: 745


INDEX 5000: 745


INDEX 6000: 745


INDEX 7000: 745


INDEX 8000: 933


INDEX 9000: 933


load features /home/j-gunmo/features/train_make_correct_percent.pkl to dataframe ... 



Feature Engineering Name: make_correct_percent



correctPer      : 사용자가 푼 전체 문항에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.6308724832214765


INDEX 1000: 0.6308724832214765


INDEX 2000: 0.6308724832214765


INDEX 3000: 0.6308724832214765


INDEX 4000: 0.6308724832214765


INDEX 5000: 0.6308724832214765


INDEX 6000: 0.6308724832214765


INDEX 7000: 0.6308724832214765


INDEX 8000: 0.8531618435155413


INDEX 9000: 0.8531618435155413


load features /home/j-gunmo/features/train_make_topn_correct_percent.pkl to dataframe ... 



Feature Engineering Name: make_topn_correct_percent



top10CorrectPer : 사용자가 최근 푼 TOP-10개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.5


INDEX 1000: 0.5


INDEX 2000: 0.5


INDEX 3000: 0.5


INDEX 4000: 0.5


INDEX 5000: 0.5


INDEX 6000: 0.5


INDEX 7000: 0.5


INDEX 8000: 0.9


INDEX 9000: 0.9



top30CorrectPer : 사용자가 최근 푼 TOP-30개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.6333333333333333


INDEX 1000: 0.6333333333333333


INDEX 2000: 0.6333333333333333


INDEX 3000: 0.6333333333333333


INDEX 4000: 0.6333333333333333


INDEX 5000: 0.6333333333333333


INDEX 6000: 0.6333333333333333


INDEX 7000: 0.6333333333333333


INDEX 8000: 0.8333333333333334


INDEX 9000: 0.8333333333333334



top50CorrectPer : 사용자가 최근 푼 TOP-50개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.58


INDEX 1000: 0.58


INDEX 2000: 0.58


INDEX 3000: 0.58


INDEX 4000: 0.58


INDEX 5000: 0.58


INDEX 6000: 0.58


INDEX 7000: 0.58


INDEX 8000: 0.82


INDEX 9000: 0.82



top100CorrectPer : 사용자가 최근 푼 TOP-100개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.66


INDEX 1000: 0.66


INDEX 2000: 0.66


INDEX 3000: 0.66


INDEX 4000: 0.66


INDEX 5000: 0.66


INDEX 6000: 0.66


INDEX 7000: 0.66


INDEX 8000: 0.85


INDEX 9000: 0.85


Feature Engineering End ... 


Original DataFrame Keywords: Index(['userID', 'assessmentItemID', 'testId', 'answerCode', 'Timestamp',
       'KnowledgeTag'],
      dtype='object')


Feature Added DataFrame Keywords: Index(['userID', 'assessmentItemID', 'testId', 'answerCode', 'Timestamp',
       'KnowledgeTag', 'testPaper', 'testPaperCnt', 'timeSec', 'firstClass',
       'secondClass', 'correctCnt', 'quesCnt', 'correctPer', 'top10CorrectPer',
       'top30CorrectPer', 'top50CorrectPer', 'top100CorrectPer'],
      dtype='object')


Feature Engineering Start ... 



Feature Engineering Name: split_assessmentitem_id


load features /home/j-gunmo/features/test_split_assessmentitem_id.pkl to dataframe ... 



testPaper       : 시험지 번호입니다.


dtype: object


[Examples]


INDEX 0000: 050023


INDEX 1000: 020035


INDEX 2000: 050006


INDEX 3000: 020037


INDEX 4000: 050009


INDEX 5000: 050045


INDEX 6000: 050072


INDEX 7000: 050089


INDEX 8000: 050089


INDEX 9000: 050105



testPaperCnt    : 시험지의 문항 번호입니다.


dtype: object


[Examples]


INDEX 0000: 001


INDEX 1000: 003


INDEX 2000: 001


INDEX 3000: 003


INDEX 4000: 003


INDEX 5000: 001


INDEX 6000: 001


INDEX 7000: 003


INDEX 8000: 006


INDEX 9000: 004



Feature Engineering Name: convert_time


load features /home/j-gunmo/features/test_convert_time.pkl to dataframe ... 



timeSec         : 사용자가 문항을 푼 타임스태프 정보입니다.


dtype: int64


[Examples]


INDEX 0000: 1578534991


INDEX 1000: 1582080021


INDEX 2000: 1583876850


INDEX 3000: 1586022801


INDEX 4000: 1588743149


INDEX 5000: 1591764737


INDEX 6000: 1594353821


INDEX 7000: 1595904396


INDEX 8000: 1597130998


INDEX 9000: 1599087082



Feature Engineering Name: make_first_class


load features /home/j-gunmo/features/test_make_first_class.pkl to dataframe ... 



firstClass      : 대분류에 해당합니다.


dtype: object


[Examples]


INDEX 0000: 5


INDEX 1000: 2


INDEX 2000: 5


INDEX 3000: 2


INDEX 4000: 5


INDEX 5000: 5


INDEX 6000: 5


INDEX 7000: 5


INDEX 8000: 5


INDEX 9000: 5



Feature Engineering Name: make_second_class


load features /home/j-gunmo/features/test_make_second_class.pkl to dataframe ... 



secondClass     : 중분류에 해당합니다.


dtype: object


[Examples]


INDEX 0000: 2626


INDEX 1000: 7693


INDEX 2000: 2617


INDEX 3000: 7924


INDEX 4000: 2618


INDEX 5000: 3729


INDEX 6000: 3827


INDEX 7000: 395


INDEX 8000: 394


INDEX 9000: 5269



Feature Engineering Name: make_correct_count


load features /home/j-gunmo/features/test_make_correct_count.pkl to dataframe ... 



correctCnt      : 사용자가 맞춘 문항수를 나타냅니다.


dtype: int64


[Examples]


INDEX 0000: 716


INDEX 1000: 716


INDEX 2000: 716


INDEX 3000: 716


INDEX 4000: 716


INDEX 5000: 716


INDEX 6000: 716


INDEX 7000: 716


INDEX 8000: 716


INDEX 9000: 716


load features /home/j-gunmo/features/test_make_question_count.pkl to dataframe ... 



Feature Engineering Name: make_question_count



quesCnt         : 사용자가 푼 문항수를 나타냅니다.


dtype: int64


[Examples]


INDEX 0000: 1036


INDEX 1000: 1036


INDEX 2000: 1036


INDEX 3000: 1036


INDEX 4000: 1036


INDEX 5000: 1036


INDEX 6000: 1036


INDEX 7000: 1036


INDEX 8000: 1036


INDEX 9000: 1036


load features /home/j-gunmo/features/test_make_correct_percent.pkl to dataframe ... 



Feature Engineering Name: make_correct_percent



correctPer      : 사용자가 푼 전체 문항에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.6911196911196911


INDEX 1000: 0.6911196911196911


INDEX 2000: 0.6911196911196911


INDEX 3000: 0.6911196911196911


INDEX 4000: 0.6911196911196911


INDEX 5000: 0.6911196911196911


INDEX 6000: 0.6911196911196911


INDEX 7000: 0.6911196911196911


INDEX 8000: 0.6911196911196911


INDEX 9000: 0.6911196911196911


load features /home/j-gunmo/features/test_make_topn_correct_percent.pkl to dataframe ... 



Feature Engineering Name: make_topn_correct_percent



top10CorrectPer : 사용자가 최근 푼 TOP-10개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.6


INDEX 1000: 0.6


INDEX 2000: 0.6


INDEX 3000: 0.6


INDEX 4000: 0.6


INDEX 5000: 0.6


INDEX 6000: 0.6


INDEX 7000: 0.6


INDEX 8000: 0.6


INDEX 9000: 0.6



top30CorrectPer : 사용자가 최근 푼 TOP-30개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.6666666666666666


INDEX 1000: 0.6666666666666666


INDEX 2000: 0.6666666666666666


INDEX 3000: 0.6666666666666666


INDEX 4000: 0.6666666666666666


INDEX 5000: 0.6666666666666666


INDEX 6000: 0.6666666666666666


INDEX 7000: 0.6666666666666666


INDEX 8000: 0.6666666666666666


INDEX 9000: 0.6666666666666666



top50CorrectPer : 사용자가 최근 푼 TOP-50개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.52


INDEX 1000: 0.52


INDEX 2000: 0.52


INDEX 3000: 0.52


INDEX 4000: 0.52


INDEX 5000: 0.52


INDEX 6000: 0.52


INDEX 7000: 0.52


INDEX 8000: 0.52


INDEX 9000: 0.52



top100CorrectPer : 사용자가 최근 푼 TOP-100개에 대한 정답률입니다.


dtype: float64


[Examples]


INDEX 0000: 0.58


INDEX 1000: 0.58


INDEX 2000: 0.58


INDEX 3000: 0.58


INDEX 4000: 0.58


INDEX 5000: 0.58


INDEX 6000: 0.58


INDEX 7000: 0.58


INDEX 8000: 0.58


INDEX 9000: 0.58


Feature Engineering End ... 


Original DataFrame Keywords: Index(['userID', 'assessmentItemID', 'testId', 'answerCode', 'Timestamp',
       'KnowledgeTag'],
      dtype='object')


Feature Added DataFrame Keywords: Index(['userID', 'assessmentItemID', 'testId', 'answerCode', 'Timestamp',
       'KnowledgeTag', 'testPaper', 'testPaperCnt', 'timeSec', 'firstClass',
       'secondClass', 'correctCnt', 'quesCnt', 'correctPer', 'top10CorrectPer',
       'top30CorrectPer', 'top50CorrectPer', 'top100CorrectPer'],
      dtype='object')


Split based on User


Original Train Dataset: 2266586


Split Train Dataset: 2047600


Split Valid Dataset: 218986


Preprocessing Labels .. 


Label Columns: ['testPaper', 'firstClass', 'secondClass']



Length of testPaper            : 1538


Before : 0    060001
1    060001
2    060001
3    060001
4    060001
5    060001
6    060003
7    060003
8    060003
9    060003
Name: testPaper, dtype: object


After : 0    975
1    975
2    975
3    975
4    975
5    975
6    977
7    977
8    977
9    977
Name: testPaper, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.datas["test"][k] = self.datas["test"][k].apply(lambda x: x if x in labels else "unknown")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.datas["test"][k] = self.datas["test"][k].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.datas["test"][k] = encoders[k].transform(self.d

Before : 0    6
1    6
2    6
3    6
4    6
5    6
6    6
7    6
8    6
9    6
Name: firstClass, dtype: object


After : 0    5
1    5
2    5
3    5
4    5
5    5
6    5
7    5
8    5
9    5
Name: firstClass, dtype: int64



Length of secondClass          : 913


Before : 0    7224
1    7225
2    7225
3    7225
4    7225
5    7225
6    7226
7    7226
8    7226
9    7226
Name: secondClass, dtype: object


After : 0    618
1    619
2    619
3    619
4    619
5    619
6    620
7    620
8    620
9    620
Name: secondClass, dtype: int64


Preprocessing Min Max .. 


Min Max Columns: ['top10CorrectPer', 'correctPer']


MAX: [1. 1.] MIN: [0. 0.]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.datas["test"][mm_cols] = mm_encoder.transform(self.datas["test"][mm_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())
Preprocessing Min Max .. 


Standard Columns: ['timeSec']


MEAN: [1.59502972e+09] VAR: [4.92225516e+13]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.datas["test"][std_cols] = std_encoder.transform(self.datas["test"][std_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())
Use the test datast for data augmentation


Before Length: 2047600


After Length: 2306970


Group By (userID, firstClass)


Group By (userID, firstClass) Length: 17967


Group By (userID, firstClass) Length: 1772


Group By (userID, firstClass) Length: 1987


In [8]:
train_dataset = preprocess.get_data('train_grouped')
valid_dataset = preprocess.get_data('valid_grouped')
test_dataset = preprocess.get_data('test_grouped')

In [9]:
import os
import json
import logging
import os.path as p
from datetime import datetime

import wandb
import torch
import numpy as np
import pandas as pd
from ray import tune
from torchinfo import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

from logger import get_logger
from dkt_dataset import DKTDataset
from utils import get_args, get_criterion, get_optimizer, get_scheduler, set_seeds


class CustomStopper(tune.Stopper):
    def __init__(self, args):
        self.should_stop = False
        self.args = args

    def __call__(self, trial_id, result):
        if not self.should_stop and result["valid_auc"] > 0.83:
            self.should_stop = True

        return self.should_stop or result["training_iteration"] >= self.args.n_epochs

    def stop_all(self):
        return self.should_stop


class DKTTrainer:
    def __init__(self, args, Model):
        self.args = get_args()
        self.args.update(**args)
        self.create_model = Model

        self._helper_init()

    def _helper_init(self):
        self.prefix_save_path = datetime.now().strftime("[%m.%d_%H:%M]")
        self.prefix_save_path = p.join(self.args.root_dir, f"LOG_{self.prefix_save_path}")

        os.mkdir(self.prefix_save_path)

    def _save_config(self, args, filename="run_config.json"):
        save_path = p.join(self.prefix_save_path, filename)

        with open(save_path, "w") as writer:
            writer.write(json.dumps(args, indent=4, ensure_ascii=False) + "\n")

    def _get_model(self):
        model = self.create_model(self.args).to(self.args.device)
        return model

    def _update_params(self, loss, model, optimizer):
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), self.args.clip_grad)
        optimizer.step()
        optimizer.zero_grad()

    def _collate_fn(self, batches):
        """ key값으로 batch 형성 """
        new_batches = {k: [] for k in batches[0].keys()}

        max_seq_len = 20

        # batch의 값들을 각 column끼리 그룹화
        for k in batches[0].keys():
            for batch in batches:
                pre_padded = torch.zeros(max_seq_len)
                pre_padded[-len(batch[k]) :] = batch[k]
                new_batches[k].append(pre_padded)

        for k in batches[0].keys():
            new_batches[k] = torch.stack(new_batches[k])

        return new_batches

    def _get_loaders(self, train_data, valid_data):
        trainset = DKTDataset(train_data, self.args, self.args.columns)
        validset = DKTDataset(valid_data, self.args, self.args.columns)

        train_loader = torch.utils.data.DataLoader(
            trainset,
            num_workers=self.args.num_workers,
            shuffle=True,
            batch_size=self.args.batch_size,
            pin_memory=True,
            collate_fn=self._collate_fn,
        )

        valid_loader = torch.utils.data.DataLoader(
            validset,
            num_workers=self.args.num_workers,
            shuffle=False,
            batch_size=self.args.batch_size,
            pin_memory=True,
            collate_fn=self._collate_fn,
        )

        return train_loader, valid_loader

    def _to_numpy(self, preds):
        if self.args.device == "cuda":
            preds = preds.to("cpu").detach().numpy()
        else:  # cpu
            preds = preds.detach().numpy()
        return preds

    def _save_model(self, model, prefix=None):
        save_path = p.join(self.args.root_dir, self.prefix_save_path)
        assert p.exists(save_path), f"{save_path} does not exist"

        # get original model if use torch.nn.DataParallel
        model = model.module if hasattr(model, "module") else model
        save_path = f"{save_path}/{prefix}_model.pth" if prefix else f"{save_path}/model.pth"
        torch.save(model.state_dict(), save_path)

    def _load_model(self, prefix=None):
        load_path = p.join(self.args.root_dir, self.prefix_save_path)
        load_path = f"{load_path}/{prefix}_model.pth" if prefix else f"{load_path}/model.pth"
        assert p.exists(load_path), f"{load_path} does not exist"

        model = self._get_model()
        # strict=False, 일치하지 않는 키들을 무시
        model.load_state_dict(torch.load(load_path), strict=False)
        return model

    def _get_metric(self, targets, preds):
        auc = roc_auc_score(targets, preds)
        acc = accuracy_score(targets, np.where(preds >= 0.5, 1, 0))
        return auc, acc

    def _compute_loss(self, preds, targets):
        loss = get_criterion(preds, targets)

        # 마지막 Sequence에 대한 값만 Loss를 계산한다.
        loss = loss[:, -1]
        loss = torch.mean(loss)
        return loss

    def _process_batch(self, batch):
        raise NotImplementedError

    def _hyper(self, checkpoint_dir):
        step = 0
        checkpoint_path = p.join(checkpoint_dir, "checkpoint")

        model = self._get_model()
        optimizer = get_optimizer(model, self.args)
        scheduler = get_scheduler(optimizer, self.args)

        if checkpoint_dir is not None:
            checkpoint = torch.load(checkpoint_path)
            model.load_state_dict(checkpoint["model"])
            optimizer.load_state_dict(checkpoint["optimizer"])
            scheduler.load_state_dict(checkpoint["scheduler"])
            step = checkpoint["step"]

        while True:
            train_auc, train_acc, train_loss = self._train(model, self.train_loader, optimizer)
            valid_auc, valid_acc, _, _ = self._validate(model, self.valid_loader)

            tune.report(
                valid_auc=valid_auc,
                valid_acc=valid_acc,
                train_auc=train_auc,
                train_acc=train_acc,
                train_loss=train_loss,
            )

            step += 1

            with tune.checkpoint_dir(step=step) as checkpoint_dir:
                torch.save(
                    {
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict(),
                        "scheduler": scheduler.state_dict(),
                        "step": step,
                    },
                    checkpoint_path,
                )

    def hyper(self, args, tune_args, train_data, valid_data):
        self.train_loader, self.valid_loader = self._get_loaders(train_data, valid_data)

        pbt_scheduler = tune.schedulers.PopulationBasedTraining(
            time_attr="training_iteration", 
            **tune_args
        )

        stopper = CustomStopper(self.args)

        analysis = tune.run(
            self._hyper,
            name="pbt_lstm",
            stop=stopper,
            max_failures=3,
            num_samples=4,
            metric="",
            scheduler=pbt_scheduler,
            keep_checkpoints_num=2,
            local_dir="~/ray_results",
            checkpoint_score_attr="max-valid_auc",
            resources_per_trial={"cpu": 3, "gpu": 1},
            config=self.args,  # custom search algorithm may ignore this
        )

        return analysis

    def _train(self, model, train_loader, optimizer):
        model.train()

        total_preds, total_targets = [], []
        losses = []

        for step, batch in enumerate(train_loader):
            batch = self._process_batch(batch)
            preds = model(batch)
            targets = batch["answerCode"]  # correct

            loss = self._compute_loss(preds, targets)
            self._update_params(loss, model, optimizer)

            if step % self.args.log_steps == 0:
                print(f"Training steps: {step} Loss: {str(loss.item())}")
                wandb.log({"step_train_loss": loss})

            preds, targets = preds[:, -1], targets[:, -1]

            if self.args.device == "cuda":
                preds = preds.to("cpu").detach().numpy()
                targets = targets.to("cpu").detach().numpy()
            else:
                preds = preds.detach().numpy()
                targets = targets.detach().numpy()

            total_preds.append(preds)
            total_targets.append(targets)
            losses.append(loss)

        total_preds = np.concatenate(total_preds)
        total_targets = np.concatenate(total_targets)

        # Train AUC / ACC
        auc, acc = self._get_metric(total_targets, total_preds)
        loss_avg = sum(losses) / len(losses)

        return auc, acc, loss_avg

    def _validate(self, model, valid_loader):
        model.eval()

        total_preds = []
        total_targets = []

        for step, batch in enumerate(valid_loader):
            batch = self._process_batch(batch)

            preds = model(batch)
            targets = batch["answerCode"]  # correct

            # predictions
            preds = preds[:, -1]
            targets = targets[:, -1]

            if self.args.device == "cuda":
                preds = preds.to("cpu").detach().numpy()
                targets = targets.to("cpu").detach().numpy()
            else:  # cpu
                preds = preds.detach().numpy()
                targets = targets.detach().numpy()

            total_preds.append(preds)
            total_targets.append(targets)

        total_preds = np.concatenate(total_preds)
        total_targets = np.concatenate(total_targets)

        # Train AUC / ACC
        auc, acc = self._get_metric(total_targets, total_preds)
        print(f"VALID AUC : {auc} ACC : {acc}\n")

        return auc, acc, total_preds, total_targets

    def _inference(self, test_data, prefix=None):
        model = self._load_model(prefix)  # loaded best model to self.model
        model.eval()

        _, test_loader = self._get_loaders(test_data, test_data)

        total_proba_preds = []

        for step, batch in enumerate(test_loader):
            batch = self._process_batch(batch)

            fancy_index = torch.where(batch["answerCode"][:, -1] == -1)
            if fancy_index[0].size(0) == 0:
                continue

            for k in batch.keys():
                batch[k] = batch[k][fancy_index]

            preds = model(batch)
            preds = preds[:, -1]

            preds = self._to_numpy(preds)
            total_proba_preds += list(preds)

        write_path = os.path.join(self.prefix_save_path, f"{prefix}_test_results.csv")

        with open(write_path, "w", encoding="utf8") as w:
            w.write("id,prediction\n")
            for idx, proba in enumerate(total_proba_preds):
                w.write(f"{idx},{proba}\n")

    def debug(self, train_data, valid_data, test_data):
        """간단한 입,출력을 테스트합니다.
        1. Model Summary
        3. 한 개 데이터가 잘 생성되는지 체크합니다.
        4. 배치 데이터가 잘 생성되는지 체크합니다.
        5. forward를 체크합니다.
        6. Loss 계산 및, Predict를 체크합니다.
        """
        debug_file_handler = logging.FileHandler(f"{self.prefix_save_path}/debug.log")
        logger = get_logger("debug")
        logger.setLevel(logging.INFO)
        logger.addHandler(debug_file_handler)

        model = self._get_model()
        logger.info("MODEl SUMMARY\n")
        logger.info(summary(model))

        logger.info("\nCHECK DATASET")

        for dataset, name in zip([train_data, valid_data, test_data], ["TRAIN", "VALID", "TEST"]):
            logger.info(f"\n{name} EXAMPLES")
            for column, data in zip(self.args.columns, dataset[0]):
                logger.info(f"{column} : {data[:10]}")

        train_loader, valid_loader = self._get_loaders(train_data, valid_data)
        _, test_loader = self._get_loaders(test_data, test_data)

        logger.info("\nCHECK BATCH SHAPE")
        for data_loader, name in zip([train_loader, test_loader, valid_loader], ["TRAIN", "TEST", "VALID"]):
            batch = next(iter(data_loader))
            logger.info(f"\n{name} BATCH TYPE : {type(batch)}")
            logger.info(f"\n{name} BATCH LEN : {len(batch)}")
            logger.info(f"\n{name} BATCH DICT VALUE SHAPE : {batch['answerCode'].shape}")

        logger.info("\nCHECK MODEL FORWARD")

        batch = self._process_batch(batch)
        preds = model(batch)

        logger.info(f"\nPREDS SHAPE: {preds.shape}")
        logger.info(f"\nPREDS EXAMPLES: {preds[0]}")

        logger.info("\nCHECK METRICS")

        gt = batch["answerCode"]
        loss = self._compute_loss(preds, gt)

        logger.info(f"\nLOSS : {loss.item()}")

        auc, acc = self._get_metric(self._to_numpy(gt[:, -1]), self._to_numpy(preds[:, -1]))
        logger.info(f"AUC: {auc} ACC: {acc}")

    def run(self, train_data, valid_data, test_data, prefix="run"):
        self._save_config(self.args)
        set_seeds(self.args.seed)

        run_file_handler = logging.FileHandler(f"{self.prefix_save_path}/{prefix}.log")
        logger = get_logger("run")
        logger.setLevel(logging.DEBUG)
        logger.addHandler(run_file_handler)

        model = self._get_model()
        wandb.init(project="p-stage-4", reinit=True)
        wandb.config.update(self.args)
        wandb.watch(model)
        wandb.run.name = f"{self.prefix_save_path}_{prefix}"

        train_loader, valid_loader = self._get_loaders(train_data, valid_data)

        self.args.total_steps = int(len(train_loader.dataset) / self.args.batch_size) * (self.args.n_epochs)
        self.args.warmup_steps = self.args.total_steps // 10

        if self.args.scheduler == "linear_warmup":
            self.args.scheduler_hp = {
                "num_training_steps": self.args.total_steps,
                "num_warmup_steps": self.args.warmup_steps,
            }

        optimizer = get_optimizer(model, self.args)
        scheduler = get_scheduler(optimizer, self.args)

        best_auc, best_acc = -1, -1
        early_stopping_counter = 0

        for epoch in range(self.args.n_epochs):
            logger.info(f"Start Training: Epoch {epoch + 1}")

            train_auc, train_acc, train_loss = self._train(model, train_loader, optimizer)
            valid_auc, valid_acc, _, _ = self._validate(model, valid_loader)

            wandb.log(
                {
                    "epoch": epoch,
                    "train_loss": train_loss,
                    "train_auc": train_auc,
                    "train_acc": train_acc,
                    "valid_auc": valid_auc,
                    "valid_acc": valid_acc,
                }
            )

            logger.info(f"TRAIN_LOSS: {train_loss}")
            logger.info(f"TRAIN AUC: {train_auc} TRAIN ACC: {train_acc}")
            logger.info(f"VALID AUC: {valid_auc} VALID ACC: {valid_acc}\n")

            if valid_auc > best_auc:
                best_auc, best_acc = valid_auc, valid_acc
                self._save_model(model, prefix)
                early_stopping_counter = 0
            else:
                early_stopping_counter += 1
                logger.info(f"EarlyStopping counter: {early_stopping_counter}")
                if early_stopping_counter >= self.args.patience:
                    logger.info(f"EarlyStopping counter: {early_stopping_counter} out of {self.args.patience}")
                    break

            if self.args.scheduler == "plateau":
                scheduler.step(best_auc)
            else:
                scheduler.step()

        self._inference(test_data, prefix)
        return best_auc, best_acc

    def run_cv(self, train_data, valid_data, test_data, test_size:float, folds: int, seeds: list):
        assert folds == len(seeds), "fold와 len(seeds)는 같은 수여야 합니다."

        total_data = np.concatenate([train_data, valid_data])
        self.args.seeds = seeds

        valid_results = {}

        for n_fold, seed in enumerate(seeds):
            self.args.seed = seed
            # TODO: User 패턴이 학습이 된다면, 충분히 데이터 유출될 수 있음
            train_data, valid_data = train_test_split(total_data, test_size=test_size, random_state=seed)
            prefix = f"cv_{n_fold}"

            best_auc, best_acc = self.run(train_data, valid_data, test_data, prefix=prefix)
            valid_results[prefix] = f"best_auc:{best_auc},best_acc:{best_acc}"

        self._save_config(valid_results, "valid_cv_results.json")

        new_df = pd.DataFrame([])

        for idx in range(folds):
            df = pd.read_csv(p.join(self.prefix_save_path, f"cv_{idx}_test_results.csv"))

            if idx == 0:
                new_df["id"] = df["id"]
                new_df["prediction"] = df["prediction"]
            else:
                new_df["prediction"] += df["prediction"]

        new_df["prediction"] /= folds
        new_df.to_csv(p.join(self.prefix_save_path, "cv_ensemble_test_results.csv"))

In [10]:
import torch
import torch.nn as nn

class EmbeddingLayer(nn.Module):
    def __init__(self, args, hidden_dim):
        super(EmbeddingLayer, self).__init__()

        self.args = args
        self.device = args.device
        self.hidden_dim = hidden_dim

        labels_dim = self.hidden_dim // (len(self.args.n_embeddings) + 1)
        interaction_dim = self.hidden_dim - (labels_dim * len(self.args.n_embeddings))

        self.embedding_interaction = nn.Embedding(3, interaction_dim)
        self.embeddings = nn.ModuleDict(
            {k: nn.Embedding(v + 1, labels_dim) for k, v in self.args.n_embeddings.items()}  # plus 1 for padding
        )

    def forward(self, batch):
        embed_interaction = self.embedding_interaction(batch["interaction"])
        embed = torch.cat(
            [embed_interaction] + [self.embeddings[k](batch[k]) for k in self.args.n_embeddings.keys()], 2
        )
        return embed


class LinearLayer(nn.Module):
    def __init__(self, args, hidden_dim):
        super(LinearLayer, self).__init__()

        self.args = args
        self.device = args.device

        self.hidden_dim = hidden_dim
        in_features = len(self.args.n_linears)
        self.fc_layer = nn.Linear(in_features, self.hidden_dim)

    def forward(self, batch):
        cont_v = torch.stack([batch[k] for k in self.args.n_linears]).permute(1, 2, 0)
        output = self.fc_layer(cont_v)
        return output


class LSTM(nn.Module):
    def __init__(self, args):
        super(LSTM, self).__init__()
        self.args = args
        self.device = args.device

        self.hidden_dim = self.args.hidden_dim
        self.n_layers = self.args.n_layers

        self.emb_layer = EmbeddingLayer(args, self.hidden_dim // 2)
        self.nli_layer = LinearLayer(args, self.hidden_dim // 2)

        self.comb_proj = nn.Linear(self.hidden_dim, self.hidden_dim)

        self.lstm = nn.LSTM(self.hidden_dim, self.hidden_dim, self.n_layers, batch_first=True)

        # Fully connected layer
        self.fc = nn.Linear(self.hidden_dim, 1)
        self.activation = nn.Sigmoid()

    def init_hidden(self, batch_size):
        h = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        h = h.to(self.device)

        c = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        c = c.to(self.device)

        return (h, c)

    def forward(self, batch):
        batch_size = batch["interaction"].size(0)

        embed = self.emb_layer(batch)
        nnbed = self.nli_layer(batch)

        embed = torch.cat([embed, nnbed], 2)
        X = self.comb_proj(embed)

        hidden = self.init_hidden(batch_size)
        out, hidden = self.lstm(X, hidden)
        out = out.contiguous().view(batch_size, -1, self.hidden_dim)

        out = self.fc(out)
        preds = self.activation(out).view(batch_size, -1)

        return preds

    

class LSTMTrainer(DKTTrainer):
    def _process_batch(self, batch):
        batch['mask'] = batch['mask'].type(torch.FloatTensor)
        batch['answerCode'] = batch['answerCode'].type(torch.FloatTensor)
        batch['correctPer'] = batch['correctPer'].type(torch.FloatTensor)
        batch['timeSec'] = batch['timeSec'].type(torch.FloatTensor)

        batch['interaction'] = batch['answerCode'] + 1
        batch['interaction'] = batch['interaction'].roll(shifts=1, dims=1)
        batch['mask'] = batch['mask'].roll(shifts=1, dims=1)
        batch['mask'][:, 0] = 0
        batch['interaction'] = (batch['interaction'] * batch['mask']).to(torch.int64)

        batch['testPaper'] = batch['testPaper'].to(torch.int64)
        batch['firstClass'] = batch['firstClass'].to(torch.int64)
        batch['secondClass'] = batch['secondClass'].to(torch.int64)
        
        for k in batch.keys():
            batch[k] = batch[k].to(self.args.device)

        return batch

In [11]:
args.columns = columns[1:]
args.hidden_dim = 512
args.n_epochs = 20
args.lr = 0.000144
args.batch_size = 60
args.n_layers = 2
args.weight_decay = 0.00096

In [12]:
trainer = LSTMTrainer(args, LSTM)

In [13]:
trainer.run_cv(train_dataset, valid_dataset, test_dataset, 
               test_size=0.5,
               folds=5,
               seeds=[0, 1, 2, 3, 4]
              )

[34m[1mwandb[0m: Currently logged in as: [33mggm1207[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.31 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6920971870422363
Training steps: 50 Loss: 0.575532853603363
Training steps: 100 Loss: 0.5530985593795776
Training steps: 150 Loss: 0.6510840654373169
VALID AUC : 0.7628190249140673 ACC : 0.7004052684903749



Training steps: 0 Loss: 0.6171114444732666
Training steps: 50 Loss: 0.639782190322876
Training steps: 100 Loss: 0.5409539341926575
Training steps: 150 Loss: 0.6039746999740601
VALID AUC : 0.7661859024091989 ACC : 0.7038500506585613



Training steps: 0 Loss: 0.6034331321716309
Training steps: 50 Loss: 0.6079415678977966
Training steps: 100 Loss: 0.622562825679779
Training steps: 150 Loss: 0.635938823223114
VALID AUC : 0.7657850577252105 ACC : 0.7050658561296859



Training steps: 0 Loss: 0.5911692380905151
Training steps: 50 Loss: 0.5026051998138428
Training steps: 100 Loss: 0.6213133335113525
Training steps: 150 Loss: 0.5442060232162476
VALID AUC : 0.7655037019149253 ACC : 0.7056737588652482



Training steps: 0 Loss: 0.48414477705955505
Training steps: 50 Loss: 0.581581175327301
Training steps: 100 Loss: 0.583236575126648
Training steps: 150 Loss: 0.6167846918106079
VALID AUC : 0.7657725005009531 ACC : 0.7019250253292807



Training steps: 0 Loss: 0.6138796806335449
Training steps: 50 Loss: 0.6201583743095398
Training steps: 100 Loss: 0.5724982023239136
Training steps: 150 Loss: 0.5251679420471191
VALID AUC : 0.7650487851245189 ACC : 0.7048632218844985



Training steps: 0 Loss: 0.5781430006027222
Training steps: 50 Loss: 0.5723870992660522
Training steps: 100 Loss: 0.6044377684593201
Training steps: 150 Loss: 0.47730836272239685
VALID AUC : 0.7655357629130294 ACC : 0.7048632218844985



VBox(children=(Label(value=' 2.23MB of 2.23MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
step_train_loss,0.47731
_runtime,34.0
_timestamp,1623099854.0
_step,34.0
epoch,6.0
train_loss,0.56871
train_auc,0.77623
train_acc,0.7099
valid_auc,0.76554
valid_acc,0.70486


0,1
step_train_loss,█▄▃▇▆▆▃▅▅▅▆▆▅▂▆▃▁▄▄▆▅▆▄▃▄▄▅▁
_runtime,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
epoch,▁▂▃▅▆▇█
train_loss,█▂▂▁▂▁▁
train_auc,▁▆▇█▇██
train_acc,▁▅▅███▇
valid_auc,▁█▇▇▇▆▇
valid_acc,▁▆▇█▃▇▇


[34m[1mwandb[0m: wandb version 0.10.31 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6939772367477417
Training steps: 50 Loss: 0.5656082630157471
Training steps: 100 Loss: 0.5424333214759827
Training steps: 150 Loss: 0.5935598015785217
VALID AUC : 0.7659575451020411 ACC : 0.7062816616008105



Training steps: 0 Loss: 0.5487687587738037
Training steps: 50 Loss: 0.6077823042869568
Training steps: 100 Loss: 0.5843160152435303
Training steps: 150 Loss: 0.5977464318275452
VALID AUC : 0.7684010886292449 ACC : 0.7092198581560284



Training steps: 0 Loss: 0.5988295674324036
Training steps: 50 Loss: 0.5057459473609924
Training steps: 100 Loss: 0.6168819069862366
Training steps: 150 Loss: 0.5458585619926453
VALID AUC : 0.7691797193011995 ACC : 0.7025329280648429



Training steps: 0 Loss: 0.5044910907745361
Training steps: 50 Loss: 0.5169557929039001
Training steps: 100 Loss: 0.6652311086654663
Training steps: 150 Loss: 0.6489245295524597
VALID AUC : 0.7686349529653443 ACC : 0.70790273556231



Training steps: 0 Loss: 0.5382675528526306
Training steps: 50 Loss: 0.5469913482666016
Training steps: 100 Loss: 0.5733548402786255
Training steps: 150 Loss: 0.6744528412818909
VALID AUC : 0.7688103614836321 ACC : 0.705775075987842



Training steps: 0 Loss: 0.5820040106773376
Training steps: 50 Loss: 0.619733989238739
Training steps: 100 Loss: 0.5875741243362427
Training steps: 150 Loss: 0.5354179739952087
VALID AUC : 0.7688594955798819 ACC : 0.70790273556231



Training steps: 0 Loss: 0.5300924181938171
Training steps: 50 Loss: 0.5471101999282837
Training steps: 100 Loss: 0.5490304827690125
Training steps: 150 Loss: 0.5400621891021729
VALID AUC : 0.7695823191170236 ACC : 0.7068895643363728



Training steps: 0 Loss: 0.45800089836120605
Training steps: 50 Loss: 0.46001774072647095
Training steps: 100 Loss: 0.5950084924697876
Training steps: 150 Loss: 0.5084618926048279
VALID AUC : 0.7697012223980025 ACC : 0.7083080040526849



Training steps: 0 Loss: 0.5743739008903503
Training steps: 50 Loss: 0.5514424443244934
Training steps: 100 Loss: 0.5078632831573486
Training steps: 150 Loss: 0.5746181011199951
VALID AUC : 0.7689036197641028 ACC : 0.7108409321175279



Training steps: 0 Loss: 0.5223076939582825
Training steps: 50 Loss: 0.4579819142818451
Training steps: 100 Loss: 0.625764787197113
Training steps: 150 Loss: 0.5936248898506165
VALID AUC : 0.7684222780932357 ACC : 0.709321175278622



Training steps: 0 Loss: 0.5156750082969666
Training steps: 50 Loss: 0.6670067310333252
Training steps: 100 Loss: 0.7118312120437622
Training steps: 150 Loss: 0.5333460569381714
VALID AUC : 0.7688714659844426 ACC : 0.7068895643363728



Training steps: 0 Loss: 0.6097995042800903
Training steps: 50 Loss: 0.545741856098175
Training steps: 100 Loss: 0.5570980310440063
Training steps: 150 Loss: 0.5551632046699524
VALID AUC : 0.7690568121970824 ACC : 0.7081053698074975



Training steps: 0 Loss: 0.6280754208564758
Training steps: 50 Loss: 0.6341596245765686
Training steps: 100 Loss: 0.5214053392410278
Training steps: 150 Loss: 0.6706625819206238
VALID AUC : 0.7685138937795988 ACC : 0.7082066869300911



VBox(children=(Label(value=' 2.23MB of 2.23MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
step_train_loss,0.67066
_runtime,61.0
_timestamp,1623099919.0
_step,64.0
epoch,12.0
train_loss,0.57294
train_auc,0.77105
train_acc,0.70402
valid_auc,0.76851
valid_acc,0.70821


0,1
step_train_loss,█▄▄▅▅▅▅▂▆▄▃▇▇▄▄▇▅▅▃▃▄▃▁▅▂▄▂▄▃▁▅▃▇▃▆▄▄▆▆▇
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
train_loss,█▂▂▁▁▁▁▁▁▁▁▁▁
train_auc,▁▇▇███▇██████
train_acc,▁▇██▇▇█▇█▇▇█▇
valid_auc,▁▆▇▆▆▆██▇▆▆▇▆
valid_acc,▄▇▁▆▄▆▅▆█▇▅▆▆


[34m[1mwandb[0m: wandb version 0.10.31 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.7001041769981384
Training steps: 50 Loss: 0.6515963673591614
Training steps: 100 Loss: 0.5495533347129822
Training steps: 150 Loss: 0.6098531484603882
VALID AUC : 0.7630983360468905 ACC : 0.7047619047619048



Training steps: 0 Loss: 0.4927167594432831
Training steps: 50 Loss: 0.5672284364700317
Training steps: 100 Loss: 0.5991107225418091
Training steps: 150 Loss: 0.5526065826416016
VALID AUC : 0.7668078543255836 ACC : 0.707193515704154



Training steps: 0 Loss: 0.5771178007125854
Training steps: 50 Loss: 0.5835651159286499
Training steps: 100 Loss: 0.47201138734817505
Training steps: 150 Loss: 0.6813755631446838
VALID AUC : 0.7680698934974888 ACC : 0.7045592705167173



Training steps: 0 Loss: 0.5463827848434448
Training steps: 50 Loss: 0.538460910320282
Training steps: 100 Loss: 0.5373997092247009
Training steps: 150 Loss: 0.5226815938949585
VALID AUC : 0.7678660327398693 ACC : 0.705369807497467



Training steps: 0 Loss: 0.6196378469467163
Training steps: 50 Loss: 0.5584770441055298
Training steps: 100 Loss: 0.6440292596817017
Training steps: 150 Loss: 0.568621039390564
VALID AUC : 0.7685795967315086 ACC : 0.7083080040526849



Training steps: 0 Loss: 0.547674834728241
Training steps: 50 Loss: 0.5732818245887756
Training steps: 100 Loss: 0.5672615170478821
Training steps: 150 Loss: 0.5543819665908813
VALID AUC : 0.7686691747132489 ACC : 0.7078014184397163



Training steps: 0 Loss: 0.540597140789032
Training steps: 50 Loss: 0.5826511979103088
Training steps: 100 Loss: 0.5686967372894287
Training steps: 150 Loss: 0.6458607316017151
VALID AUC : 0.768535095244477 ACC : 0.7056737588652482



Training steps: 0 Loss: 0.5405254364013672
Training steps: 50 Loss: 0.5855568051338196
Training steps: 100 Loss: 0.5902345776557922
Training steps: 150 Loss: 0.5645533800125122
VALID AUC : 0.7690921339319381 ACC : 0.7085106382978723



Training steps: 0 Loss: 0.605832040309906
Training steps: 50 Loss: 0.579571545124054
Training steps: 100 Loss: 0.5531253218650818
Training steps: 150 Loss: 0.5346544981002808
VALID AUC : 0.7685452194867967 ACC : 0.7060790273556231



Training steps: 0 Loss: 0.6999281644821167
Training steps: 50 Loss: 0.5503233671188354
Training steps: 100 Loss: 0.5078456997871399
Training steps: 150 Loss: 0.5061591863632202
VALID AUC : 0.7692540807370745 ACC : 0.7077001013171226



Training steps: 0 Loss: 0.6285519003868103
Training steps: 50 Loss: 0.5420950651168823
Training steps: 100 Loss: 0.48766908049583435
Training steps: 150 Loss: 0.5454977750778198
VALID AUC : 0.7694576334548667 ACC : 0.7065856129685917



Training steps: 0 Loss: 0.7043665647506714
Training steps: 50 Loss: 0.4715009331703186
Training steps: 100 Loss: 0.5665283203125
Training steps: 150 Loss: 0.5250775218009949
VALID AUC : 0.7693550767284083 ACC : 0.7068895643363728



Training steps: 0 Loss: 0.5464432239532471
Training steps: 50 Loss: 0.5235557556152344
Training steps: 100 Loss: 0.5870537757873535
Training steps: 150 Loss: 0.6034534573554993
VALID AUC : 0.7695822663689388 ACC : 0.707193515704154



Training steps: 0 Loss: 0.5449433326721191
Training steps: 50 Loss: 0.5335748791694641
Training steps: 100 Loss: 0.5351054668426514
Training steps: 150 Loss: 0.6496914029121399
VALID AUC : 0.7690167257822477 ACC : 0.705775075987842



Training steps: 0 Loss: 0.6459300518035889
Training steps: 50 Loss: 0.5506805777549744
Training steps: 100 Loss: 0.5701553225517273
Training steps: 150 Loss: 0.49873659014701843
VALID AUC : 0.7684054515492166 ACC : 0.7067882472137791



Training steps: 0 Loss: 0.5293064713478088
Training steps: 50 Loss: 0.5559449791908264
Training steps: 100 Loss: 0.5682984590530396
Training steps: 150 Loss: 0.49361392855644226
VALID AUC : 0.767981198563253 ACC : 0.7073961499493414



Training steps: 0 Loss: 0.5432067513465881
Training steps: 50 Loss: 0.6235931515693665
Training steps: 100 Loss: 0.4975557029247284
Training steps: 150 Loss: 0.5805210471153259
VALID AUC : 0.7691650366910345 ACC : 0.7066869300911854



Training steps: 0 Loss: 0.49574148654937744
Training steps: 50 Loss: 0.5355111360549927
Training steps: 100 Loss: 0.5569016337394714
Training steps: 150 Loss: 0.5846492648124695
VALID AUC : 0.76827993558764 ACC : 0.7087132725430598



VBox(children=(Label(value=' 2.23MB of 2.23MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
step_train_loss,0.58465
_runtime,82.0
_timestamp,1623100005.0
_step,89.0
epoch,17.0
train_loss,0.56631
train_auc,0.77897
train_acc,0.71274
valid_auc,0.76828
valid_acc,0.70871


0,1
step_train_loss,█▇▅▄▃▄▁▃▃▆▆▃▄▄▄▆▄▅▅▃█▂▆▃▃▁▃▃▅▃▃▆▄▃▄▂▆▄▃▄
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
train_loss,█▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁
train_auc,▁▆▇▇█▇█▇██████████
train_acc,▁▅▅▆▆▆▇▇████████▇▇
valid_auc,▁▅▆▆▇▇▇▇▇████▇▇▆█▇
valid_acc,▁▅▁▂▇▆▃█▄▆▄▅▅▃▅▆▅█


[34m[1mwandb[0m: wandb version 0.10.31 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6912305951118469
Training steps: 50 Loss: 0.6118088960647583
Training steps: 100 Loss: 0.5190432667732239
Training steps: 150 Loss: 0.5605455040931702
VALID AUC : 0.7606485065903648 ACC : 0.6977710233029382



Training steps: 0 Loss: 0.6223378777503967
Training steps: 50 Loss: 0.6943544149398804
Training steps: 100 Loss: 0.6367961168289185
Training steps: 150 Loss: 0.5247640013694763
VALID AUC : 0.7640139732034441 ACC : 0.6987841945288754



Training steps: 0 Loss: 0.582683801651001
Training steps: 50 Loss: 0.6453969478607178
Training steps: 100 Loss: 0.5457646250724792
Training steps: 150 Loss: 0.5579679012298584
VALID AUC : 0.7652149661042069 ACC : 0.7



Training steps: 0 Loss: 0.5837329626083374
Training steps: 50 Loss: 0.5753031969070435
Training steps: 100 Loss: 0.6579641103744507
Training steps: 150 Loss: 0.6144315004348755
VALID AUC : 0.7654200061583903 ACC : 0.7002026342451875



Training steps: 0 Loss: 0.47259968519210815
Training steps: 50 Loss: 0.5314704775810242
Training steps: 100 Loss: 0.5117726922035217
Training steps: 150 Loss: 0.5763505101203918
VALID AUC : 0.7654832044201073 ACC : 0.7



Training steps: 0 Loss: 0.5865265130996704
Training steps: 50 Loss: 0.5864701867103577
Training steps: 100 Loss: 0.6040176153182983
Training steps: 150 Loss: 0.6036890745162964
VALID AUC : 0.7653013117305504 ACC : 0.6965552178318136



Training steps: 0 Loss: 0.4542074501514435
Training steps: 50 Loss: 0.5062698125839233
Training steps: 100 Loss: 0.6157039999961853
Training steps: 150 Loss: 0.5447930693626404
VALID AUC : 0.7655641072322308 ACC : 0.6980749746707193



Training steps: 0 Loss: 0.5300652980804443
Training steps: 50 Loss: 0.6887621879577637
Training steps: 100 Loss: 0.4943042993545532
Training steps: 150 Loss: 0.555027425289154
VALID AUC : 0.7655860838783267 ACC : 0.6993920972644377



Training steps: 0 Loss: 0.7037352323532104
Training steps: 50 Loss: 0.6268665790557861
Training steps: 100 Loss: 0.5522401332855225
Training steps: 150 Loss: 0.5730817317962646
VALID AUC : 0.7657598020770928 ACC : 0.7005065856129686



Training steps: 0 Loss: 0.49235469102859497
Training steps: 50 Loss: 0.6359086036682129
Training steps: 100 Loss: 0.5914226174354553
Training steps: 150 Loss: 0.4957057535648346
VALID AUC : 0.7659160416535902 ACC : 0.700709219858156



Training steps: 0 Loss: 0.5625631213188171
Training steps: 50 Loss: 0.680475652217865
Training steps: 100 Loss: 0.5394958853721619
Training steps: 150 Loss: 0.5170425772666931
VALID AUC : 0.7655008473537489 ACC : 0.6995947315096251



Training steps: 0 Loss: 0.5241779685020447
Training steps: 50 Loss: 0.5585733652114868
Training steps: 100 Loss: 0.509527325630188
Training steps: 150 Loss: 0.5723485350608826
VALID AUC : 0.7656945499232173 ACC : 0.6988855116514691



Training steps: 0 Loss: 0.5363513231277466
Training steps: 50 Loss: 0.6622883677482605
Training steps: 100 Loss: 0.6162733435630798
Training steps: 150 Loss: 0.5894209742546082
VALID AUC : 0.766073944882736 ACC : 0.7004052684903749



Training steps: 0 Loss: 0.5096485018730164
Training steps: 50 Loss: 0.5241687297821045
Training steps: 100 Loss: 0.5340245962142944
Training steps: 150 Loss: 0.481601357460022
VALID AUC : 0.7658122174049778 ACC : 0.696757852077001



Training steps: 0 Loss: 0.5083456039428711
Training steps: 50 Loss: 0.6289308071136475
Training steps: 100 Loss: 0.4640214443206787
Training steps: 150 Loss: 0.5439263582229614
VALID AUC : 0.7650075640740412 ACC : 0.6995947315096251



Training steps: 0 Loss: 0.6096076369285583
Training steps: 50 Loss: 0.5665196776390076
Training steps: 100 Loss: 0.5685922503471375
Training steps: 150 Loss: 0.5799347162246704
VALID AUC : 0.7656642960917226 ACC : 0.6996960486322188



Training steps: 0 Loss: 0.5412495136260986
Training steps: 50 Loss: 0.423947274684906
Training steps: 100 Loss: 0.5943970084190369
Training steps: 150 Loss: 0.6731114983558655
VALID AUC : 0.7650756095212524 ACC : 0.7017223910840932



Training steps: 0 Loss: 0.4657229483127594
Training steps: 50 Loss: 0.5562278032302856
Training steps: 100 Loss: 0.5812209844589233
Training steps: 150 Loss: 0.5252683758735657
VALID AUC : 0.7651389926332639 ACC : 0.701418439716312



VBox(children=(Label(value=' 2.23MB of 2.23MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
step_train_loss,0.52527
_runtime,84.0
_timestamp,1623100093.0
_step,89.0
epoch,17.0
train_loss,0.56435
train_auc,0.77945
train_acc,0.71314
valid_auc,0.76514
valid_acc,0.70142


0,1
step_train_loss,█▆▄█▄▇▄▅▇▂▃▅▅▅▃▄█▃█▄▃▅▄▇▃▄▅▇▆▃▄▃▂▆▅▅▁▇▄▄
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
train_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▂▁
train_auc,▁▆▇▇▇▇█▇████████▇█
train_acc,▁▅▇▇▇▇▇▇▇▇█▇██▇▇▆▇
valid_auc,▁▅▇▇▇▇▇▇██▇███▇▇▇▇
valid_acc,▃▄▆▆▆▁▃▅▆▇▅▄▆▁▅▅██


[34m[1mwandb[0m: wandb version 0.10.31 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6939135193824768
Training steps: 50 Loss: 0.5450733304023743
Training steps: 100 Loss: 0.55817711353302
Training steps: 150 Loss: 0.527776837348938
VALID AUC : 0.767287344968185 ACC : 0.703242147922999



Training steps: 0 Loss: 0.5818948149681091
Training steps: 50 Loss: 0.6454010605812073
Training steps: 100 Loss: 0.5357780456542969
Training steps: 150 Loss: 0.6046144366264343
VALID AUC : 0.7680727333961042 ACC : 0.704660587639311



Training steps: 0 Loss: 0.5527403354644775
Training steps: 50 Loss: 0.542732834815979
Training steps: 100 Loss: 0.6141574382781982
Training steps: 150 Loss: 0.5232219696044922
VALID AUC : 0.7703990192142419 ACC : 0.7065856129685917



Training steps: 0 Loss: 0.5261073112487793
Training steps: 50 Loss: 0.6449102759361267
Training steps: 100 Loss: 0.6048198342323303
Training steps: 150 Loss: 0.6135616898536682
VALID AUC : 0.7700187388401223 ACC : 0.7073961499493414



Training steps: 0 Loss: 0.6172245144844055
Training steps: 50 Loss: 0.6562529802322388
Training steps: 100 Loss: 0.5579624176025391
Training steps: 150 Loss: 0.5857707262039185
VALID AUC : 0.7700323540193016 ACC : 0.7024316109422493



Training steps: 0 Loss: 0.5508071184158325
Training steps: 50 Loss: 0.592018187046051
Training steps: 100 Loss: 0.6317665576934814
Training steps: 150 Loss: 0.5763674974441528
VALID AUC : 0.7702307700995881 ACC : 0.7044579533941236



Training steps: 0 Loss: 0.6042513847351074
Training steps: 50 Loss: 0.5858402848243713
Training steps: 100 Loss: 0.6575174927711487
Training steps: 150 Loss: 0.6711153984069824
VALID AUC : 0.7696744781104679 ACC : 0.7054711246200608



Training steps: 0 Loss: 0.5623224973678589
Training steps: 50 Loss: 0.5081357359886169
Training steps: 100 Loss: 0.5935376882553101
Training steps: 150 Loss: 0.6056705117225647
VALID AUC : 0.7696602673955477 ACC : 0.704660587639311



In [14]:
def metric_result(prefix_path, fold):
    print(prefix_path)
    file_path = f"{prefix_path}/valid_cv_results.json"
    total_auc = 0
    
    with open(file_path, "r") as f:
        temp = json.load(f)
        
    for v in temp.values():
        print(v)
        auc = float(v.split(",")[0].split(":")[1])
        print(auc)
        total_auc += auc
        
    return total_auc / fold

**(folds=5, test_size=0.5)**

In [16]:
metric_result(trainer.prefix_save_path, 5)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.08_06:02]
best_auc:0.7661859024091989,best_acc:0.7038500506585613
0.7661859024091989
best_auc:0.7697012223980025,best_acc:0.7083080040526849
0.7697012223980025
best_auc:0.7695822663689388,best_acc:0.707193515704154
0.7695822663689388
best_auc:0.766073944882736,best_acc:0.7004052684903749
0.766073944882736
best_auc:0.7703990192142419,best_acc:0.7065856129685917
0.7703990192142419


0.7683884710546237

**(folds=3, test_size=0.1)**

'/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]'

In [47]:
metric_result(trainer.prefix_save_path, 3)

best_auc:0.7787203082239915,best_acc:0.7031408308004052
0.7787203082239915
best_auc:0.7568773386852548,best_acc:0.6930091185410334
0.7568773386852548
best_auc:0.7832042699233832,best_acc:0.7173252279635258
0.7832042699233832


0.7729339722775431

**(folds=3, test_size=0.2)**

In [51]:
metric_result(trainer.prefix_save_path, 3)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7711663795329549,best_acc:0.7021276595744681
0.7711663795329549
best_auc:0.7694992957225927,best_acc:0.708966565349544
0.7694992957225927
best_auc:0.7688831714676145,best_acc:0.705420466058764
0.7688831714676145


0.7698496155743874

**(folds=3, test_size=0.3)**

In [53]:
metric_result(trainer.prefix_save_path, 3)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7716193510849351,best_acc:0.706855791962175
0.7716193510849351
best_auc:0.7674919575633128,best_acc:0.7007767646065518
0.7674919575633128
best_auc:0.7670384501490044,best_acc:0.707531239446133
0.7670384501490044


0.7687165862657507

**(folds=5, test_size=0.1)**

In [55]:
metric_result(trainer.prefix_save_path, 5)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7787203082239915,best_acc:0.7031408308004052
0.7787203082239915
best_auc:0.7568773386852548,best_acc:0.6930091185410334
0.7568773386852548
best_auc:0.7832042699233832,best_acc:0.7173252279635258
0.7832042699233832
best_auc:0.763088549745266,best_acc:0.6990881458966566
0.763088549745266
best_auc:0.7603983572895276,best_acc:0.7001013171225937
0.7603983572895276


0.7684577647734846

**(folds=5, test_size=0.2)**

In [58]:
metric_result(trainer.prefix_save_path, 5)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7711662510614954,best_acc:0.7021276595744681
0.7711662510614954
best_auc:0.7694992957225927,best_acc:0.708966565349544
0.7694992957225927
best_auc:0.7688831714676148,best_acc:0.705420466058764
0.7688831714676148
best_auc:0.7611421154394541,best_acc:0.7008611955420466
0.7611421154394541
best_auc:0.7683923069322051,best_acc:0.7031408308004052
0.7683923069322051


0.7678166281246723

**(folds=5, test_size=0.3)**

In [60]:
metric_result(trainer.prefix_save_path, 5)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7716193510849351,best_acc:0.706855791962175
0.7716193510849351
best_auc:0.7674919575633128,best_acc:0.7007767646065518
0.7674919575633128
best_auc:0.7670384501490044,best_acc:0.707531239446133
0.7670384501490044
best_auc:0.761875755153294,best_acc:0.698919284025667
0.761875755153294
best_auc:0.7712231208940146,best_acc:0.7060114826072272
0.7712231208940146


0.7678497269689121

**(folds=10, test_size=0.1)**

In [62]:
metric_result(trainer.prefix_save_path, 10)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7787203082239915,best_acc:0.7031408308004052
0.7787203082239915
best_auc:0.7568773386852548,best_acc:0.6930091185410334
0.7568773386852548
best_auc:0.7832042699233832,best_acc:0.7173252279635258
0.7832042699233832
best_auc:0.763088549745266,best_acc:0.6990881458966566
0.763088549745266
best_auc:0.7603983572895276,best_acc:0.7001013171225937
0.7603983572895276
best_auc:0.7687280830111568,best_acc:0.7102330293819655
0.7687280830111568
best_auc:0.7894953798767967,best_acc:0.7264437689969605
0.7894953798767967
best_auc:0.7855520736077601,best_acc:0.7137791286727457
0.7855520736077601
best_auc:0.7867072490505839,best_acc:0.7137791286727457
0.7867072490505839
best_auc:0.76207450000411,best_acc:0.7036474164133738
0.76207450000411


0.773484610941783

**(folds=10, test_size=0.2)**

In [64]:
metric_result(trainer.prefix_save_path, 10)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7711662510614954,best_acc:0.7021276595744681
0.7711662510614954
best_auc:0.7694992957225927,best_acc:0.708966565349544
0.7694992957225927
best_auc:0.7688831714676145,best_acc:0.705420466058764
0.7688831714676145
best_auc:0.761142115439454,best_acc:0.7008611955420466
0.761142115439454
best_auc:0.7683923069322051,best_acc:0.7031408308004052
0.7683923069322051
best_auc:0.772597449538329,best_acc:0.7102330293819655
0.772597449538329
best_auc:0.7781345440173286,best_acc:0.7193515704154002
0.7781345440173286
best_auc:0.7746736038092428,best_acc:0.7074468085106383
0.7746736038092428
best_auc:0.7780290602025899,best_acc:0.7079533941236069
0.7780290602025899
best_auc:0.7647608510883205,best_acc:0.7084599797365755
0.7647608510883205


0.7707278649279171

**(folds=10, test_size=0.3)**

In [66]:
metric_result(trainer.prefix_save_path, 10)

/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]
best_auc:0.7716193510849351,best_acc:0.706855791962175
0.7716193510849351
best_auc:0.7674919575633128,best_acc:0.7007767646065518
0.7674919575633128
best_auc:0.7670384501490044,best_acc:0.707531239446133
0.7670384501490044
best_auc:0.761875755153294,best_acc:0.698919284025667
0.761875755153294
best_auc:0.7712231208940146,best_acc:0.7060114826072272
0.7712231208940146
best_auc:0.7697955602890236,best_acc:0.7012833502195205
0.7697955602890236
best_auc:0.7769459937007558,best_acc:0.7102330293819655
0.7769459937007558
best_auc:0.7756501346153429,best_acc:0.7051671732522796
0.7756501346153429
best_auc:0.7749429001964473,best_acc:0.704322863897332
0.7749429001964473
best_auc:0.7714736603336819,best_acc:0.7066869300911854
0.7714736603336819


0.7708056883979812

In [71]:
df = pd.read_csv(
    "../models/lstm/hyper_test/LOG_[06.07_16:15]/cv_ensemble_test_results.csv",
    index_col=['Unnamed: 0']
)

In [72]:
df.head()

Unnamed: 0,id,prediction
0,0,0.589862
1,1,0.594878
2,2,0.262907
3,3,0.788662
4,4,0.433151


In [75]:
trainer.prefix_save_path

'/home/j-gunmo/desktop/00.my-project/17.P-Stage-T1003/4-STAGE/models/lstm/hyper_test/LOG_[06.07_16:15]'

In [74]:
df.to_csv(
    "../models/lstm/hyper_test/LOG_[06.07_16:15]/cv_ensembles_test_results.csv",
    index=False
)

In [73]:
df.to_csv??