# data train val test를 나눌때 주의

증상 : 만약 학습을 시키고 점수가 모두 nan이 나온다면

길이 2이하인 데이터를 지우는 클렌징을 나누고나서 진행해줘야한다

아니면 2이하가 안생기게 나눠주는 방법을 고민해보자

# 


In [51]:
import pandas
import tensorflow

print(pandas.__version__)
print(tensorflow.__version__)

1.3.3
2.6.0


In [52]:
import datetime as dt
from pathlib import Path
import os
import time
from datetime import datetime
from IPython.display import display

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [53]:
data_path = Path(os.getenv('HOME')+'/aiffel/yoochoose/data/') 
train_path = data_path / 'ratings.dat'

def load_data(data_path: Path, nrows=None):
    data = pd.read_csv(data_path, sep='::', header=None, usecols=[0, 1, 2, 3], dtype={0: np.int32, 1: np.int32, 2: np.int32}, nrows=nrows)
    data.columns = ['UserId', 'ItemId', 'Rating', 'Time']
    # time 형식을 기존과 맞게 고쳐보기
    data['Time'] = pd.to_datetime(data['Time'], unit='s')
    return data

data = load_data(train_path, None)
data.sort_values(['UserId', 'Time'], inplace=True)  # data를 id와 시간 순서로 정렬해줍니다.
data

Unnamed: 0,UserId,ItemId,Rating,Time
31,1,3186,4,2000-12-31 22:00:19
22,1,1270,5,2000-12-31 22:00:55
27,1,1721,4,2000-12-31 22:00:55
37,1,1022,5,2000-12-31 22:00:55
24,1,2340,3,2000-12-31 22:01:43
...,...,...,...,...
1000019,6040,2917,4,2001-08-10 14:40:29
999988,6040,1921,4,2001-08-10 14:41:04
1000172,6040,1784,3,2001-08-10 14:41:04
1000167,6040,161,3,2001-08-10 14:41:26


In [54]:
# 이부분을 이 데이터에 맞게 고치기

# short_session을 제거한 다음 unpopular item을 제거하면 다시 길이가 1인 session이 생길 수 있습니다.
# 이를 위해 반복문을 통해 지속적으로 제거 합니다.
def cleanse_recursive(data: pd.DataFrame, shortest, least_click) -> pd.DataFrame:
    while True:
        before_len = len(data)
        data = cleanse_short_session(data, shortest)
        data = cleanse_unpopular_item(data, least_click)
        after_len = len(data)
        if before_len == after_len:
            break
    return data


def cleanse_short_session(data: pd.DataFrame, shortest):
    session_len = data.groupby('UserId').size()
    session_use = session_len[session_len >= shortest].index
    data = data[data['UserId'].isin(session_use)]
    return data


def cleanse_unpopular_item(data: pd.DataFrame, least_click):
    item_popular = data.groupby('ItemId').size()
    item_use = item_popular[item_popular >= least_click].index
    data = data[data['ItemId'].isin(item_use)]
    return data

In [55]:
data = cleanse_recursive(data, shortest=2, least_click=5)
data

Unnamed: 0,UserId,ItemId,Rating,Time
31,1,3186,4,2000-12-31 22:00:19
22,1,1270,5,2000-12-31 22:00:55
27,1,1721,4,2000-12-31 22:00:55
37,1,1022,5,2000-12-31 22:00:55
24,1,2340,3,2000-12-31 22:01:43
...,...,...,...,...
1000019,6040,2917,4,2001-08-10 14:40:29
999988,6040,1921,4,2001-08-10 14:41:04
1000172,6040,1784,3,2001-08-10 14:41:04
1000167,6040,161,3,2001-08-10 14:41:26


In [56]:
# 1) 세션 길이 분포 확인
print("검증 세션 수:", data['UserId'].nunique())
print("세션별 길이(상위 10개):")
print(data.groupby('UserId').size().sort_values(ascending=False).head(10))

# 2) 길이 1인 세션 개수
print("길이 1 세션 비율:",
      (data.groupby('UserId').size() == 1).mean())

검증 세션 수: 6040
세션별 길이(상위 10개):
UserId
4169    2277
1680    1850
4277    1740
1941    1594
1181    1521
889     1514
3618    1342
2063    1320
1150    1299
1015    1285
dtype: int64
길이 1 세션 비율: 0.0


In [58]:
def split_by_date(data: pd.DataFrame, n_days: int):
    final_time = data['Time'].max()
    cutoff_time = final_time - dt.timedelta(days=n_days)

    # Train: cutoff 이전의 데이터
    train = data[data['Time'] < cutoff_time]

    # Test: cutoff 이후의 데이터, 단 아이템은 train에 있던 것만
    test = data[(data['Time'] >= cutoff_time) & (data['ItemId'].isin(train['ItemId']))]

    return train, test

In [59]:
tr, test = split_by_date(data, n_days=2)  # 마지막 이틀만 테스트용으로 분리
tr, val = split_by_date(tr, n_days=2)  # 마지막 이틀만 테스트용으로 분리

In [60]:
# data에 대한 정보를 살펴봅니다.
def stats_info(data: pd.DataFrame, status: str):
    print(f'* {status} Set Stats Info\n'
          f'\t Events: {len(data)}\n'
          f'\t Sessions: {data["UserId"].nunique()}\n'
          f'\t Items: {data["ItemId"].nunique()}\n'
          f'\t First Time : {data["Time"].min()}\n'
          f'\t Last Time : {data["Time"].max()}\n')

In [61]:
stats_info(tr, 'train')
stats_info(val, 'valid')
stats_info(test, 'test')

* train Set Stats Info
	 Events: 999321
	 Sessions: 6040
	 Items: 3416
	 First Time : 2000-04-25 23:05:32
	 Last Time : 2003-02-24 06:02:24

* valid Set Stats Info
	 Events: 229
	 Sessions: 9
	 Items: 225
	 First Time : 2003-02-24 17:02:07
	 Last Time : 2003-02-26 16:18:03

* test Set Stats Info
	 Events: 61
	 Sessions: 12
	 Items: 59
	 First Time : 2003-02-27 04:30:55
	 Last Time : 2003-02-28 17:49:50



클렌징을 추가로 진행해줘야함

In [62]:
tr = cleanse_recursive(tr, shortest=2, least_click=5)
val = cleanse_recursive(val, shortest=2, least_click=1)
test = cleanse_recursive(test, shortest=2, least_click=1)

In [63]:
stats_info(tr, 'train')
stats_info(val, 'valid')
stats_info(test, 'test')

* train Set Stats Info
	 Events: 999321
	 Sessions: 6040
	 Items: 3416
	 First Time : 2000-04-25 23:05:32
	 Last Time : 2003-02-24 06:02:24

* valid Set Stats Info
	 Events: 228
	 Sessions: 8
	 Items: 224
	 First Time : 2003-02-24 17:02:07
	 Last Time : 2003-02-26 16:18:03

* test Set Stats Info
	 Events: 56
	 Sessions: 7
	 Items: 54
	 First Time : 2003-02-27 04:30:55
	 Last Time : 2003-02-28 17:49:50



In [64]:
# train set에 없는 아이템이 val, test기간에 생길 수 있으므로 train data를 기준으로 인덱싱합니다.
id2idx = {item_id : index for index, item_id in enumerate(tr['ItemId'].unique())}

def indexing(df, id2idx):
    df['item_idx'] = df['ItemId'].map(lambda x: id2idx.get(x, -1))  # id2idx에 없는 아이템은 모르는 값(-1) 처리 해줍니다.
    return df

tr = indexing(tr, id2idx)
val = indexing(val, id2idx)
test = indexing(test, id2idx)

In [65]:
# 1) 세션 길이 분포 확인
print("검증 세션 수:", val['UserId'].nunique())
print("세션별 길이(상위 10개):")
print(val.groupby('UserId').size().sort_values(ascending=False).head(10))

# 2) 길이 1인 세션 개수
print("길이 1 세션 비율:",
      (val.groupby('UserId').size() == 1).mean())

검증 세션 수: 8
세션별 길이(상위 10개):
UserId
3391    191
3840     17
1597      7
424       4
5654      3
496       2
4277      2
5100      2
dtype: int64
길이 1 세션 비율: 0.0


In [66]:
tr

Unnamed: 0,UserId,ItemId,Rating,Time,item_idx
31,1,3186,4,2000-12-31 22:00:19,0
22,1,1270,5,2000-12-31 22:00:55,1
27,1,1721,4,2000-12-31 22:00:55,2
37,1,1022,5,2000-12-31 22:00:55,3
24,1,2340,3,2000-12-31 22:01:43,4
...,...,...,...,...,...
1000019,6040,2917,4,2001-08-10 14:40:29,1248
999988,6040,1921,4,2001-08-10 14:41:04,370
1000172,6040,1784,3,2001-08-10 14:41:04,89
1000167,6040,161,3,2001-08-10 14:41:26,464


In [67]:
class SessionDataset:
    """Credit to yhs-968/pyGRU4REC."""

    def __init__(self, data):
        self.df = data
        self.click_offsets = self.get_click_offsets()
        self.session_idx = np.arange(self.df['UserId'].nunique())  # indexing to SessionId

    def get_click_offsets(self):
        """
        Return the indexes of the first click of each session IDs,
        """
        offsets = np.zeros(self.df['UserId'].nunique() + 1, dtype=np.int32)
        offsets[1:] = self.df.groupby('UserId').size().cumsum()
        return offsets

In [68]:
tr_dataset = SessionDataset(tr)
tr_dataset.df.head(10)

Unnamed: 0,UserId,ItemId,Rating,Time,item_idx
31,1,3186,4,2000-12-31 22:00:19,0
22,1,1270,5,2000-12-31 22:00:55,1
27,1,1721,4,2000-12-31 22:00:55,2
37,1,1022,5,2000-12-31 22:00:55,3
24,1,2340,3,2000-12-31 22:01:43,4
36,1,1836,5,2000-12-31 22:02:52,5
3,1,3408,4,2000-12-31 22:04:35,6
7,1,2804,5,2000-12-31 22:11:59,7
47,1,1207,4,2000-12-31 22:11:59,8
0,1,1193,5,2000-12-31 22:12:40,9


In [69]:
tr_dataset.click_offsets

array([     0,     53,    182, ..., 998857, 998980, 999321], dtype=int32)

In [70]:
tr_dataset.session_idx

array([   0,    1,    2, ..., 6037, 6038, 6039])

In [71]:
start = tr_dataset.click_offsets[tr_dataset.session_idx[[[0,1,2,3]]]]       # data 상에서 session이 시작된 위치를 가져옵니다.
end = tr_dataset.click_offsets[tr_dataset.session_idx[[0,1,2,3]] + 1]  # session이 끝난 위치 바로 다음 위치를 가져옵니다.

start
end

array([ 53, 182, 233, 254], dtype=int32)

In [72]:
(end - start).min() -1

20

In [73]:
inp = tr_dataset.df['item_idx'].values[start + 20]
inp

array([ 20,  71, 185,  88])

In [74]:
target = tr_dataset.df['item_idx'].values[start + 20 + 1]
target

array([ 21,  72, 186,  56])

In [75]:
class SessionDataLoader:
    """Credit to yhs-968/pyGRU4REC."""


    def __init__(self, dataset: SessionDataset, batch_size=50):
        self.dataset = dataset
        
## 이부분에 batch_size 를 마지막에 남은 부분으 처리할수있게 코드 작성
#         self.batch_size = batch_size
        self.batch_size = min(batch_size, len(dataset.session_idx))  # 이 줄 추가

    def __iter__(self):
        """ Returns the iterator for producing session-parallel training mini-batches.
        Yields:
            input (B,):  Item indices that will be encoded as one-hot vectors later.
            target (B,): a Variable that stores the target item indices
            masks: Numpy array indicating the positions of the sessions to be terminated
        """

        start, end, mask, last_session, finished = self.initialize()  # initialize 메소드에서 확인해주세요.
        """
        start : Index Where Session Start
        end : Index Where Session End
        mask : indicator for the sessions to be terminated
        """

        while not finished:
            min_len = (end - start).min() - 1  # Shortest Length Among Sessions
            for i in range(min_len):
                # Build inputs & targets
                inp = self.dataset.df['item_idx'].values[start + i]
                target = self.dataset.df['item_idx'].values[start + i + 1]
                yield inp, target, mask

            start, end, mask, last_session, finished = self.update_status(start, end, min_len, last_session, finished)

    def initialize(self):
        first_iters = np.arange(self.batch_size)    # 첫 배치에 사용할 세션 Index를 가져옵니다.
        last_session = self.batch_size - 1    # 마지막으로 다루고 있는 세션 Index를 저장해둡니다.
        start = self.dataset.click_offsets[self.dataset.session_idx[first_iters]]       # data 상에서 session이 시작된 위치를 가져옵니다.
        end = self.dataset.click_offsets[self.dataset.session_idx[first_iters] + 1]  # session이 끝난 위치 바로 다음 위치를 가져옵니다.
        mask = np.array([])   # session의 모든 아이템을 다 돌은 경우 mask에 추가해줄 것입니다.
        finished = False         # data를 전부 돌았는지 기록하기 위한 변수입니다.
        return start, end, mask, last_session, finished

    def update_status(self, start: np.ndarray, end: np.ndarray, min_len: int, last_session: int, finished: bool):
        # 다음 배치 데이터를 생성하기 위해 상태를 update합니다.

        start += min_len   # __iter__에서 min_len 만큼 for문을 돌았으므로 start를 min_len 만큼 더해줍니다.
        mask = np.arange(self.batch_size)[(end - start) == 1]
        # end는 다음 세션이 시작되는 위치인데 start와 한 칸 차이난다는 것은 session이 끝났다는 뜻입니다. mask에 기록해줍니다.

        for i, idx in enumerate(mask, start=1):  # mask에 추가된 세션 개수만큼 새로운 세션을 돌것입니다.
            new_session = last_session + i
            if new_session > self.dataset.session_idx[-1]:  # 만약 새로운 세션이 마지막 세션 index보다 크다면 모든 학습데이터를 돈 것입니다.
                finished = True
                break
            # update the next starting/ending point
            start[idx] = self.dataset.click_offsets[self.dataset.session_idx[new_session]]     # 종료된 세션 대신 새로운 세션의 시작점을 기록합니다.
            end[idx] = self.dataset.click_offsets[self.dataset.session_idx[new_session] + 1]

        last_session += len(mask)  # 마지막 세션의 위치를 기록해둡니다.
        return start, end, mask, last_session, finished

In [76]:
tr_data_loader = SessionDataLoader(tr_dataset, batch_size=4)
tr_dataset.df.head(15)

Unnamed: 0,UserId,ItemId,Rating,Time,item_idx
31,1,3186,4,2000-12-31 22:00:19,0
22,1,1270,5,2000-12-31 22:00:55,1
27,1,1721,4,2000-12-31 22:00:55,2
37,1,1022,5,2000-12-31 22:00:55,3
24,1,2340,3,2000-12-31 22:01:43,4
36,1,1836,5,2000-12-31 22:02:52,5
3,1,3408,4,2000-12-31 22:04:35,6
7,1,2804,5,2000-12-31 22:11:59,7
47,1,1207,4,2000-12-31 22:11:59,8
0,1,1193,5,2000-12-31 22:12:40,9


In [77]:
iter_ex = iter(tr_data_loader)

inputs, labels, mask =  next(iter_ex)
print(f'Model Input Item Idx are : {inputs}')
print(f'Label Item Idx are : {"":5} {labels}')
print(f'Previous Masked Input Idx are {mask}')

Model Input Item Idx are : [ 0 53 65 54]
Label Item Idx are :       [ 1 54 62 24]
Previous Masked Input Idx are []


In [78]:
def mrr_k(pred, truth: int, k: int):
    indexing = np.where(pred[:k] == truth)[0]
    if len(indexing) > 0:
        return 1 / (indexing[0] + 1)
    else:
        return 0


def recall_k(pred, truth: int, k: int) -> int:
    answer = truth in pred[:k]
    return int(answer)

In [79]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, GRU
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

In [80]:
def create_model(args):
    inputs = Input(batch_shape=(args.batch_size, 1, args.num_items))
    gru, _ = GRU(args.hsz, stateful=True, return_state=True, name='GRU')(inputs)
    dropout = Dropout(args.drop_rate)(gru)
    predictions = Dense(args.num_items, activation='softmax')(dropout)
    model = Model(inputs=inputs, outputs=[predictions])
    model.compile(loss=categorical_crossentropy, optimizer=Adam(args.lr), metrics=['accuracy'])
    model.summary()
    return model

In [81]:
class Args:
    def __init__(self, tr, val, test, batch_size, hsz, drop_rate, lr, epochs, k):
        self.tr = tr
        self.val = val
        self.test = test
        self.num_items = tr['ItemId'].nunique()
        self.num_sessions = tr['UserId'].nunique()
        self.batch_size = batch_size
        self.hsz = hsz
        self.drop_rate = drop_rate
        self.lr = lr
        self.epochs = epochs
        self.k = k

## 배치사이즈를 너무 크게두면 밑에 평가 부분에서 nan이 나옵니다    
args = Args(tr, val, test, batch_size=256, hsz=50, drop_rate=0.1, lr=0.001, epochs=3, k=20)

In [82]:
model = create_model(args)

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(256, 1, 3416)]          0         
_________________________________________________________________
GRU (GRU)                    [(256, 50), (256, 50)]    520200    
_________________________________________________________________
dropout_1 (Dropout)          (256, 50)                 0         
_________________________________________________________________
dense_1 (Dense)              (256, 3416)               174216    
Total params: 694,416
Trainable params: 694,416
Non-trainable params: 0
_________________________________________________________________


In [83]:
# train 셋으로 학습하면서 valid 셋으로 검증합니다.
def train_model(model, args):
    train_dataset = SessionDataset(args.tr)
    train_loader = SessionDataLoader(train_dataset, batch_size=args.batch_size)

    tf.config.run_functions_eagerly(True)

    for epoch in range(1, args.epochs + 1):
        total_step = len(args.tr) - args.tr['UserId'].nunique()
        tr_loader = tqdm(train_loader, total=total_step // args.batch_size, desc='Train', mininterval=1)
        for feat, target, mask in tr_loader:
            reset_hidden_states(model, mask)  # 종료된 session은 hidden_state를 초기화합니다. 아래 메서드에서 확인해주세요.

            input_ohe = to_categorical(feat, num_classes=args.num_items)
            input_ohe = np.expand_dims(input_ohe, axis=1)
            target_ohe = to_categorical(target, num_classes=args.num_items)

            result = model.train_on_batch(input_ohe, target_ohe)
            tr_loader.set_postfix(train_loss=result[0], accuracy = result[1])

        val_recall, val_mrr = get_metrics(args.val, model, args, args.k)  # valid set에 대해 검증합니다.

        print(f"\t - Recall@{args.k} epoch {epoch}: {val_recall:3f}")
        print(f"\t - MRR@{args.k}    epoch {epoch}: {val_mrr:3f}\n")


def reset_hidden_states(model, mask):
    gru_layer = model.get_layer(name='GRU')  # model에서 gru layer를 가져옵니다.
    hidden_states = gru_layer.states[0].numpy()  # gru_layer의 parameter를 가져옵니다.
    for elt in mask:  # mask된 인덱스 즉, 종료된 세션의 인덱스를 돌면서
        hidden_states[elt, :] = 0  # parameter를 초기화 합니다.
    gru_layer.reset_states(states=hidden_states)


def get_metrics(data, model, args, k: int):  # valid셋과 test셋을 평가하는 코드입니다.
                                                     # train과 거의 같지만 mrr, recall을 구하는 라인이 있습니다.
    dataset = SessionDataset(data)
    loader = SessionDataLoader(dataset, batch_size=args.batch_size)
    recall_list, mrr_list = [], []

    total_step = len(data) - data['UserId'].nunique()

    for inputs, label, mask in tqdm(loader, total=total_step // args.batch_size, desc='Evaluation', mininterval=1):
        reset_hidden_states(model, mask)
        input_ohe = to_categorical(inputs, num_classes=args.num_items)
        input_ohe = np.expand_dims(input_ohe, axis=1) 
        # 
        if input_ohe.shape[0] < args.batch_size:
            pad_len = args.batch_size - input_ohe.shape[0]
            padding = np.zeros((pad_len, 1, args.num_items))
            input_ohe = np.concatenate([input_ohe, padding], axis=0)

        pred = model.predict(input_ohe, batch_size=args.batch_size)


        pred_arg = tf.argsort(pred, direction='DESCENDING')  # softmax 값이 큰 순서대로 sorting 합니다.

        length = len(inputs)
        recall_list.extend([recall_k(pred_arg[i], label[i], k) for i in range(length)])
        mrr_list.extend([mrr_k(pred_arg[i], label[i], k) for i in range(length)])
    
    
    print(recall_list)
    print(mrr_list)
    recall, mrr = np.mean(recall_list), np.mean(mrr_list)
    return recall, mrr

In [84]:
train_model(model, args)

Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.68it/s, accuracy=0.0117, train_loss=6.55] 
Evaluation: 1it [00:00, 11.49it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 1: 0.000000
	 - MRR@20    epoch 1: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.82it/s, accuracy=0.0117, train_loss=6.2]  
Evaluation: 1it [00:00, 11.70it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.08333333333333333, 0, 0, 0]
	 - Recall@20 epoch 2: 0.125000
	 - MRR@20    epoch 2: 0.010417



Train:  95%|█████████▌| 3696/3880 [02:09<00:06, 28.65it/s, accuracy=0.0273, train_loss=6.06] 
Evaluation: 1it [00:00, 11.55it/s]

[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.125, 0, 0, 0]
	 - Recall@20 epoch 3: 0.125000
	 - MRR@20    epoch 3: 0.015625






In [50]:
def test_model(model, args, test):
    test_recall, test_mrr = get_metrics(test, model, args, 20)
    print(f"\t - Recall@{args.k}: {test_recall:3f}")
    print(f"\t - MRR@{args.k}: {test_mrr:3f}\n")

test_model(model, args, test)

Evaluation: 1it [00:00, 10.92it/s]

[0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0]
	 - Recall@20: 0.000000
	 - MRR@20: 0.000000






In [85]:
# 위에서 Recall과 MRR이 0으로 나와서 valid dataset에 문제가 있는 지 확인해보는 코드
val_items_in_train = args.val['ItemId'].isin(args.tr['ItemId'].unique())
print(f"모든 검증 아이템이 학습 세트에 포함되어 있나요? -> {val_items_in_train.all()}")

모든 검증 아이템이 학습 세트에 포함되어 있나요? -> True


In [87]:
# Test 1 :  epoch를 5로 늘려봄
args = Args(tr, val, test, batch_size=256, hsz=50, drop_rate=0.1, lr=0.001, epochs=5, k=20)
model = create_model(args)

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(256, 1, 3416)]          0         
_________________________________________________________________
GRU (GRU)                    [(256, 50), (256, 50)]    520200    
_________________________________________________________________
dropout_2 (Dropout)          (256, 50)                 0         
_________________________________________________________________
dense_2 (Dense)              (256, 3416)               174216    
Total params: 694,416
Trainable params: 694,416
Non-trainable params: 0
_________________________________________________________________


In [88]:
# train 셋으로 학습하면서 valid 셋으로 검증합니다.
def train_model(model, args):
    train_dataset = SessionDataset(args.tr)
    train_loader = SessionDataLoader(train_dataset, batch_size=args.batch_size)

    tf.config.run_functions_eagerly(True)

    for epoch in range(1, args.epochs + 1):
        total_step = len(args.tr) - args.tr['UserId'].nunique()
        tr_loader = tqdm(train_loader, total=total_step // args.batch_size, desc='Train', mininterval=1)
        for feat, target, mask in tr_loader:
            reset_hidden_states(model, mask)  # 종료된 session은 hidden_state를 초기화합니다. 아래 메서드에서 확인해주세요.

            input_ohe = to_categorical(feat, num_classes=args.num_items)
            input_ohe = np.expand_dims(input_ohe, axis=1)
            target_ohe = to_categorical(target, num_classes=args.num_items)

            result = model.train_on_batch(input_ohe, target_ohe)
            tr_loader.set_postfix(train_loss=result[0], accuracy = result[1])

        val_recall, val_mrr = get_metrics(args.val, model, args, args.k)  # valid set에 대해 검증합니다.

        print(f"\t - Recall@{args.k} epoch {epoch}: {val_recall:3f}")
        print(f"\t - MRR@{args.k}    epoch {epoch}: {val_mrr:3f}\n")


def reset_hidden_states(model, mask):
    gru_layer = model.get_layer(name='GRU')  # model에서 gru layer를 가져옵니다.
    hidden_states = gru_layer.states[0].numpy()  # gru_layer의 parameter를 가져옵니다.
    for elt in mask:  # mask된 인덱스 즉, 종료된 세션의 인덱스를 돌면서
        hidden_states[elt, :] = 0  # parameter를 초기화 합니다.
    gru_layer.reset_states(states=hidden_states)


def get_metrics(data, model, args, k: int):  # valid셋과 test셋을 평가하는 코드입니다.
                                                     # train과 거의 같지만 mrr, recall을 구하는 라인이 있습니다.
    dataset = SessionDataset(data)
    loader = SessionDataLoader(dataset, batch_size=args.batch_size)
    recall_list, mrr_list = [], []

    total_step = len(data) - data['UserId'].nunique()

    for inputs, label, mask in tqdm(loader, total=total_step // args.batch_size, desc='Evaluation', mininterval=1):
        reset_hidden_states(model, mask)
        input_ohe = to_categorical(inputs, num_classes=args.num_items)
        input_ohe = np.expand_dims(input_ohe, axis=1) 
        # 
        if input_ohe.shape[0] < args.batch_size:
            pad_len = args.batch_size - input_ohe.shape[0]
            padding = np.zeros((pad_len, 1, args.num_items))
            input_ohe = np.concatenate([input_ohe, padding], axis=0)

        pred = model.predict(input_ohe, batch_size=args.batch_size)


        pred_arg = tf.argsort(pred, direction='DESCENDING')  # softmax 값이 큰 순서대로 sorting 합니다.

        length = len(inputs)
        recall_list.extend([recall_k(pred_arg[i], label[i], k) for i in range(length)])
        mrr_list.extend([mrr_k(pred_arg[i], label[i], k) for i in range(length)])
    
    
    print(recall_list)
    print(mrr_list)
    recall, mrr = np.mean(recall_list), np.mean(mrr_list)
    return recall, mrr

In [89]:
train_model(model, args)

Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.07it/s, accuracy=0.0195, train_loss=6.6]  
Evaluation: 1it [00:00, 11.75it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 1: 0.000000
	 - MRR@20    epoch 1: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.73it/s, accuracy=0.0117, train_loss=6.2]  
Evaluation: 1it [00:00, 11.92it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.05263157894736842, 0, 0, 0]
	 - Recall@20 epoch 2: 0.125000
	 - MRR@20    epoch 2: 0.006579



Train:  95%|█████████▌| 3696/3880 [02:09<00:06, 28.59it/s, accuracy=0.0312, train_loss=6.07] 
Evaluation: 1it [00:00, 11.76it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.2, 0, 0, 0]
	 - Recall@20 epoch 3: 0.125000
	 - MRR@20    epoch 3: 0.025000



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.69it/s, accuracy=0.0312, train_loss=6.01] 
Evaluation: 1it [00:00, 11.89it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.25, 0, 0, 0]
	 - Recall@20 epoch 4: 0.125000
	 - MRR@20    epoch 4: 0.031250



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.69it/s, accuracy=0.0391, train_loss=5.99] 
Evaluation: 1it [00:00, 11.82it/s]

[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.2, 0, 0, 0]
	 - Recall@20 epoch 5: 0.125000
	 - MRR@20    epoch 5: 0.025000






#Test 1 결과:  epoch를 5로 늘려본 결과, 학습이 진행되어, accuracy는 상승, train_loss는 줄어듬.   Recall은 stagnant했으나, MRR은 epoch 4까지는 상승함.  학습이 잘되었다고 판단됨.

In [93]:
# Test 2 :  epoch 5에서 MRR이 drop 되었으므로 overfitting이 아닌가 의심.  그래서 drop_rate을 0.3으로 올려봄.
args = Args(tr, val, test, batch_size=256, hsz=50, drop_rate=0.3, lr=0.001, epochs=5, k=20)
model = create_model(args)

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(256, 1, 3416)]          0         
_________________________________________________________________
GRU (GRU)                    [(256, 50), (256, 50)]    520200    
_________________________________________________________________
dropout_4 (Dropout)          (256, 50)                 0         
_________________________________________________________________
dense_4 (Dense)              (256, 3416)               174216    
Total params: 694,416
Trainable params: 694,416
Non-trainable params: 0
_________________________________________________________________


In [95]:
# train 셋으로 학습하면서 valid 셋으로 검증합니다.
def train_model(model, args):
    train_dataset = SessionDataset(args.tr)
    train_loader = SessionDataLoader(train_dataset, batch_size=args.batch_size)

    tf.config.run_functions_eagerly(True)

    for epoch in range(1, args.epochs + 1):
        total_step = len(args.tr) - args.tr['UserId'].nunique()
        tr_loader = tqdm(train_loader, total=total_step // args.batch_size, desc='Train', mininterval=1)
        for feat, target, mask in tr_loader:
            reset_hidden_states(model, mask)  # 종료된 session은 hidden_state를 초기화합니다. 아래 메서드에서 확인해주세요.

            input_ohe = to_categorical(feat, num_classes=args.num_items)
            input_ohe = np.expand_dims(input_ohe, axis=1)
            target_ohe = to_categorical(target, num_classes=args.num_items)

            result = model.train_on_batch(input_ohe, target_ohe)
            tr_loader.set_postfix(train_loss=result[0], accuracy = result[1])

        val_recall, val_mrr = get_metrics(args.val, model, args, args.k)  # valid set에 대해 검증합니다.

        print(f"\t - Recall@{args.k} epoch {epoch}: {val_recall:3f}")
        print(f"\t - MRR@{args.k}    epoch {epoch}: {val_mrr:3f}\n")


def reset_hidden_states(model, mask):
    gru_layer = model.get_layer(name='GRU')  # model에서 gru layer를 가져옵니다.
    hidden_states = gru_layer.states[0].numpy()  # gru_layer의 parameter를 가져옵니다.
    for elt in mask:  # mask된 인덱스 즉, 종료된 세션의 인덱스를 돌면서
        hidden_states[elt, :] = 0  # parameter를 초기화 합니다.
    gru_layer.reset_states(states=hidden_states)


def get_metrics(data, model, args, k: int):  # valid셋과 test셋을 평가하는 코드입니다.
                                                     # train과 거의 같지만 mrr, recall을 구하는 라인이 있습니다.
    dataset = SessionDataset(data)
    loader = SessionDataLoader(dataset, batch_size=args.batch_size)
    recall_list, mrr_list = [], []

    total_step = len(data) - data['UserId'].nunique()

    for inputs, label, mask in tqdm(loader, total=total_step // args.batch_size, desc='Evaluation', mininterval=1):
        reset_hidden_states(model, mask)
        input_ohe = to_categorical(inputs, num_classes=args.num_items)
        input_ohe = np.expand_dims(input_ohe, axis=1) 
        # 
        if input_ohe.shape[0] < args.batch_size:
            pad_len = args.batch_size - input_ohe.shape[0]
            padding = np.zeros((pad_len, 1, args.num_items))
            input_ohe = np.concatenate([input_ohe, padding], axis=0)

        pred = model.predict(input_ohe, batch_size=args.batch_size)


        pred_arg = tf.argsort(pred, direction='DESCENDING')  # softmax 값이 큰 순서대로 sorting 합니다.

        length = len(inputs)
        recall_list.extend([recall_k(pred_arg[i], label[i], k) for i in range(length)])
        mrr_list.extend([mrr_k(pred_arg[i], label[i], k) for i in range(length)])
    
    
    print(recall_list)
    print(mrr_list)
    recall, mrr = np.mean(recall_list), np.mean(mrr_list)
    return recall, mrr

In [96]:
train_model(model, args)

Train:  95%|█████████▌| 3696/3880 [02:06<00:06, 29.30it/s, accuracy=0.00781, train_loss=6.74]
Evaluation: 1it [00:00, 11.54it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 1: 0.000000
	 - MRR@20    epoch 1: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:05<00:06, 29.51it/s, accuracy=0.0312, train_loss=6.27] 
Evaluation: 1it [00:00, 11.58it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.05, 0, 0, 0]
	 - Recall@20 epoch 2: 0.125000
	 - MRR@20    epoch 2: 0.006250



Train:  95%|█████████▌| 3696/3880 [02:06<00:06, 29.20it/s, accuracy=0.0195, train_loss=6.19] 
Evaluation: 1it [00:00, 12.13it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 3: 0.000000
	 - MRR@20    epoch 3: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 28.97it/s, accuracy=0.0156, train_loss=6.17] 
Evaluation: 1it [00:00, 11.41it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 4: 0.000000
	 - MRR@20    epoch 4: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.79it/s, accuracy=0.0312, train_loss=6.08] 
Evaluation: 1it [00:00, 11.15it/s]

[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.05, 0, 0, 0]
	 - Recall@20 epoch 5: 0.125000
	 - MRR@20    epoch 5: 0.006250






#Test 2 결과:  epoch 5에, drop rate을 0.3으로 늘려 regulation을 강화했더니, 오히려 지표들이 나빠짐.  실패...

In [97]:
# Test 3 :  epoch 5에서 drop rate을 0.2로 낮추고, learning rate을 0.0005로 낮춰서 학습의 안정성을 높여봄
args = Args(tr, val, test, batch_size=256, hsz=50, drop_rate=0.2, lr=0.0005, epochs=5, k=20)
model = create_model(args)

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(256, 1, 3416)]          0         
_________________________________________________________________
GRU (GRU)                    [(256, 50), (256, 50)]    520200    
_________________________________________________________________
dropout_5 (Dropout)          (256, 50)                 0         
_________________________________________________________________
dense_5 (Dense)              (256, 3416)               174216    
Total params: 694,416
Trainable params: 694,416
Non-trainable params: 0
_________________________________________________________________


In [98]:
# train 셋으로 학습하면서 valid 셋으로 검증합니다.
def train_model(model, args):
    train_dataset = SessionDataset(args.tr)
    train_loader = SessionDataLoader(train_dataset, batch_size=args.batch_size)

    tf.config.run_functions_eagerly(True)

    for epoch in range(1, args.epochs + 1):
        total_step = len(args.tr) - args.tr['UserId'].nunique()
        tr_loader = tqdm(train_loader, total=total_step // args.batch_size, desc='Train', mininterval=1)
        for feat, target, mask in tr_loader:
            reset_hidden_states(model, mask)  # 종료된 session은 hidden_state를 초기화합니다. 아래 메서드에서 확인해주세요.

            input_ohe = to_categorical(feat, num_classes=args.num_items)
            input_ohe = np.expand_dims(input_ohe, axis=1)
            target_ohe = to_categorical(target, num_classes=args.num_items)

            result = model.train_on_batch(input_ohe, target_ohe)
            tr_loader.set_postfix(train_loss=result[0], accuracy = result[1])

        val_recall, val_mrr = get_metrics(args.val, model, args, args.k)  # valid set에 대해 검증합니다.

        print(f"\t - Recall@{args.k} epoch {epoch}: {val_recall:3f}")
        print(f"\t - MRR@{args.k}    epoch {epoch}: {val_mrr:3f}\n")


def reset_hidden_states(model, mask):
    gru_layer = model.get_layer(name='GRU')  # model에서 gru layer를 가져옵니다.
    hidden_states = gru_layer.states[0].numpy()  # gru_layer의 parameter를 가져옵니다.
    for elt in mask:  # mask된 인덱스 즉, 종료된 세션의 인덱스를 돌면서
        hidden_states[elt, :] = 0  # parameter를 초기화 합니다.
    gru_layer.reset_states(states=hidden_states)


def get_metrics(data, model, args, k: int):  # valid셋과 test셋을 평가하는 코드입니다.
                                                     # train과 거의 같지만 mrr, recall을 구하는 라인이 있습니다.
    dataset = SessionDataset(data)
    loader = SessionDataLoader(dataset, batch_size=args.batch_size)
    recall_list, mrr_list = [], []

    total_step = len(data) - data['UserId'].nunique()

    for inputs, label, mask in tqdm(loader, total=total_step // args.batch_size, desc='Evaluation', mininterval=1):
        reset_hidden_states(model, mask)
        input_ohe = to_categorical(inputs, num_classes=args.num_items)
        input_ohe = np.expand_dims(input_ohe, axis=1) 
        # 
        if input_ohe.shape[0] < args.batch_size:
            pad_len = args.batch_size - input_ohe.shape[0]
            padding = np.zeros((pad_len, 1, args.num_items))
            input_ohe = np.concatenate([input_ohe, padding], axis=0)

        pred = model.predict(input_ohe, batch_size=args.batch_size)


        pred_arg = tf.argsort(pred, direction='DESCENDING')  # softmax 값이 큰 순서대로 sorting 합니다.

        length = len(inputs)
        recall_list.extend([recall_k(pred_arg[i], label[i], k) for i in range(length)])
        mrr_list.extend([mrr_k(pred_arg[i], label[i], k) for i in range(length)])
    
    
    print(recall_list)
    print(mrr_list)
    recall, mrr = np.mean(recall_list), np.mean(mrr_list)
    return recall, mrr

In [99]:
train_model(model, args)

Train:  95%|█████████▌| 3696/3880 [02:05<00:06, 29.37it/s, accuracy=0.00391, train_loss=7.17]
Evaluation: 1it [00:00, 11.55it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 1: 0.000000
	 - MRR@20    epoch 1: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:06<00:06, 29.31it/s, accuracy=0.0156, train_loss=6.65] 
Evaluation: 1it [00:00, 11.42it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 2: 0.000000
	 - MRR@20    epoch 2: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.01it/s, accuracy=0.0273, train_loss=6.42] 
Evaluation: 1it [00:00, 11.86it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 3: 0.000000
	 - MRR@20    epoch 3: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.02it/s, accuracy=0.0273, train_loss=6.28] 
Evaluation: 1it [00:00, 11.48it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 4: 0.000000
	 - MRR@20    epoch 4: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.03it/s, accuracy=0.0234, train_loss=6.2]  
Evaluation: 1it [00:00, 11.79it/s]

[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.05263157894736842, 0, 0, 0]
	 - Recall@20 epoch 5: 0.125000
	 - MRR@20    epoch 5: 0.006579






In [None]:
#Test 3 결과:  epoch 5에, drop rate을 0.2으로 내리고, learning rate을 0.0005로 낮춰 안정성을 높여보려했으나,  
# epoch 1시작부터 accuracy와 train loss가 Test 1보다 더 낮게 시작했다.   
# epoch이 진행될 수록, accuracy와 train-loss는 개선되었으나, recall과 MRR이 epoch 5에서 비로소 상승되었다.

In [100]:
# Test 4 :  epoch을 10으로 올려보고, drop rate을 0.1로 낮추고, learning rate을 0.001로 낮춰서 좀 더 학습 시켜봄
args = Args(tr, val, test, batch_size=256, hsz=50, drop_rate=0.1, lr=0.001, epochs=10, k=20)
model = create_model(args)

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(256, 1, 3416)]          0         
_________________________________________________________________
GRU (GRU)                    [(256, 50), (256, 50)]    520200    
_________________________________________________________________
dropout_6 (Dropout)          (256, 50)                 0         
_________________________________________________________________
dense_6 (Dense)              (256, 3416)               174216    
Total params: 694,416
Trainable params: 694,416
Non-trainable params: 0
_________________________________________________________________


In [101]:
# train 셋으로 학습하면서 valid 셋으로 검증합니다.
def train_model(model, args):
    train_dataset = SessionDataset(args.tr)
    train_loader = SessionDataLoader(train_dataset, batch_size=args.batch_size)

    tf.config.run_functions_eagerly(True)

    for epoch in range(1, args.epochs + 1):
        total_step = len(args.tr) - args.tr['UserId'].nunique()
        tr_loader = tqdm(train_loader, total=total_step // args.batch_size, desc='Train', mininterval=1)
        for feat, target, mask in tr_loader:
            reset_hidden_states(model, mask)  # 종료된 session은 hidden_state를 초기화합니다. 아래 메서드에서 확인해주세요.

            input_ohe = to_categorical(feat, num_classes=args.num_items)
            input_ohe = np.expand_dims(input_ohe, axis=1)
            target_ohe = to_categorical(target, num_classes=args.num_items)

            result = model.train_on_batch(input_ohe, target_ohe)
            tr_loader.set_postfix(train_loss=result[0], accuracy = result[1])

        val_recall, val_mrr = get_metrics(args.val, model, args, args.k)  # valid set에 대해 검증합니다.

        print(f"\t - Recall@{args.k} epoch {epoch}: {val_recall:3f}")
        print(f"\t - MRR@{args.k}    epoch {epoch}: {val_mrr:3f}\n")


def reset_hidden_states(model, mask):
    gru_layer = model.get_layer(name='GRU')  # model에서 gru layer를 가져옵니다.
    hidden_states = gru_layer.states[0].numpy()  # gru_layer의 parameter를 가져옵니다.
    for elt in mask:  # mask된 인덱스 즉, 종료된 세션의 인덱스를 돌면서
        hidden_states[elt, :] = 0  # parameter를 초기화 합니다.
    gru_layer.reset_states(states=hidden_states)


def get_metrics(data, model, args, k: int):  # valid셋과 test셋을 평가하는 코드입니다.
                                                     # train과 거의 같지만 mrr, recall을 구하는 라인이 있습니다.
    dataset = SessionDataset(data)
    loader = SessionDataLoader(dataset, batch_size=args.batch_size)
    recall_list, mrr_list = [], []

    total_step = len(data) - data['UserId'].nunique()

    for inputs, label, mask in tqdm(loader, total=total_step // args.batch_size, desc='Evaluation', mininterval=1):
        reset_hidden_states(model, mask)
        input_ohe = to_categorical(inputs, num_classes=args.num_items)
        input_ohe = np.expand_dims(input_ohe, axis=1) 
        # 
        if input_ohe.shape[0] < args.batch_size:
            pad_len = args.batch_size - input_ohe.shape[0]
            padding = np.zeros((pad_len, 1, args.num_items))
            input_ohe = np.concatenate([input_ohe, padding], axis=0)

        pred = model.predict(input_ohe, batch_size=args.batch_size)


        pred_arg = tf.argsort(pred, direction='DESCENDING')  # softmax 값이 큰 순서대로 sorting 합니다.

        length = len(inputs)
        recall_list.extend([recall_k(pred_arg[i], label[i], k) for i in range(length)])
        mrr_list.extend([mrr_k(pred_arg[i], label[i], k) for i in range(length)])
    
    
    print(recall_list)
    print(mrr_list)
    recall, mrr = np.mean(recall_list), np.mean(mrr_list)
    return recall, mrr

In [102]:
train_model(model, args)

Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.02it/s, accuracy=0.00781, train_loss=6.6] 
Evaluation: 1it [00:00, 11.37it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 1: 0.000000
	 - MRR@20    epoch 1: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.01it/s, accuracy=0.0195, train_loss=6.2]  
Evaluation: 1it [00:00, 11.25it/s]


[0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]
	 - Recall@20 epoch 2: 0.000000
	 - MRR@20    epoch 2: 0.000000



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.78it/s, accuracy=0.0156, train_loss=6.07] 
Evaluation: 1it [00:00, 11.35it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.1, 0, 0, 0]
	 - Recall@20 epoch 3: 0.125000
	 - MRR@20    epoch 3: 0.012500



Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.08it/s, accuracy=0.0312, train_loss=6.01] 
Evaluation: 1it [00:00, 11.80it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.14285714285714285, 0, 0, 0]
	 - Recall@20 epoch 4: 0.125000
	 - MRR@20    epoch 4: 0.017857



Train:  95%|█████████▌| 3696/3880 [02:06<00:06, 29.28it/s, accuracy=0.0312, train_loss=5.98]
Evaluation: 1it [00:00, 11.90it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.16666666666666666, 0, 0, 0]
	 - Recall@20 epoch 5: 0.125000
	 - MRR@20    epoch 5: 0.020833



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.79it/s, accuracy=0.0391, train_loss=5.94]
Evaluation: 1it [00:00, 11.71it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.14285714285714285, 0, 0, 0]
	 - Recall@20 epoch 6: 0.125000
	 - MRR@20    epoch 6: 0.017857



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.72it/s, accuracy=0.0391, train_loss=5.93] 
Evaluation: 1it [00:00, 11.84it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.125, 0, 0, 0]
	 - Recall@20 epoch 7: 0.125000
	 - MRR@20    epoch 7: 0.015625



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.84it/s, accuracy=0.0352, train_loss=5.87]
Evaluation: 1it [00:00, 12.02it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.2, 0, 0, 0]
	 - Recall@20 epoch 8: 0.125000
	 - MRR@20    epoch 8: 0.025000



Train:  95%|█████████▌| 3696/3880 [02:07<00:06, 29.09it/s, accuracy=0.0547, train_loss=5.83]
Evaluation: 1it [00:00, 11.91it/s]


[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.1111111111111111, 0, 0, 0]
	 - Recall@20 epoch 9: 0.125000
	 - MRR@20    epoch 9: 0.013889



Train:  95%|█████████▌| 3696/3880 [02:08<00:06, 28.76it/s, accuracy=0.043, train_loss=5.83] 
Evaluation: 1it [00:00, 12.00it/s]

[0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0.1, 0, 0, 0]
	 - Recall@20 epoch 10: 0.125000
	 - MRR@20    epoch 10: 0.012500






In [None]:
# Test 4 결과:  epoch이 올라갈 수록 accuracy 상승, train_loss 하락이 보이지만,  recall은 제자리걸음이나, MRR은 상승과 하락을 반복함. 
# epoch 7부터 정체됨.    epoch을 늘리는 것이 지표 상승의 해결책은 아닌 것 같음.