## import packages and install pip
### 본 노트북은 다음과 같은 구조일 때 동작합니다
```
   upper
     ├─ /code
     |    ├─ /data
     |    ├─ /src
     |    ├─ /submit
     |    ├─ ... 
     ├─ /debug_jupyter_notebook
     |    ├─ PipeLine.ipynb
```

In [2]:
#torch 안깔려 있으면, 까셔야 합니다 ..!
import warnings
warnings.filterwarnings('ignore')
import os, sys  
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
	sys.path.append('../code/')
!pip install --upgrade pip


[0m

In [10]:

!pip install -r ../code/requirement.txt
!pip install easydict

[0mCollecting easydict
  Downloading easydict-1.11.tar.gz (6.6 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: easydict
  Building wheel for easydict (setup.py) ... [?25ldone
[?25h  Created wheel for easydict: filename=easydict-1.11-py3-none-any.whl size=6661 sha256=5428a44c0cc2ce940afc58828247e7a51a2f7fa324aef7ac4c696b8421a0d204
  Stored in directory: /home/minseo/.cache/pip/wheels/4a/da/c3/24f93937d475a9156e9b244f627b593f9c1386ce64a0cf1c65
Successfully built easydict
[0mInstalling collected packages: easydict
Successfully installed easydict-1.11
[0m

In [11]:
import time
import argparse
import pandas as pd
import easydict
from src.utils import Logger, Setting, models_load
from src.data import context_data_load, context_data_split, context_data_loader
from src.data import dl_data_load, dl_data_split, dl_data_loader
from src.data import image_data_load, image_data_split, image_data_loader
from src.data import text_data_load, text_data_split, text_data_loader
from src.train import train, test

In [16]:
args = easydict.EasyDict({
    'data_path': 'data/',  # Data path 설정
    'saved_model_path': './saved_models',  # Saved Model path 설정
    'model': "FM",  # 학습 및 예측할 모델 선택 (None으로 초기화, 사용 전에 설정 필요)
    'data_shuffle': True,  # 데이터 셔플 여부 조정
    'test_size': 0.2,  # Train/Valid split 비율 조정
    'seed': 42,  # Seed 값 조정
    'use_best_model': True,  # 검증 성능이 가장 좋은 모델 사용 여부 설정

    # TRAINING OPTION
    'batch_size': 1024,  # Batch size 조정
    'epochs': 10,  # Epoch 수 조정
    'lr': 1e-3,  # Learning Rate 조정
    'loss_fn': 'RMSE',  # 손실 함수 변경 (MSE 또는 RMSE)
    'optimizer': 'ADAM',  # 최적화 함수 변경 (SGD 또는 ADAM)
    'weight_decay': 1e-6,  # Adam optimizer에서 정규화에 사용하는 값 조정

    # GPU
    'device': 'cuda',  # 학습에 사용할 Device 조정

    # FM, FFM, NCF, WDN, DCN Common OPTION
    'embed_dim': 16,  # FM, FFM, NCF, WDN, DCN에서 embedding시킬 차원 조정
    'dropout': 0.2,  # NCF, WDN, DCN에서 Dropout rate 조정
    'mlp_dims': (16, 16),  # NCF, WDN, DCN에서 MLP Network의 차원 조정

    # DCN
    'num_layers': 3,  # Cross Network의 레이어 수 조정

    # CNN_FM
    'cnn_embed_dim': 64,  # CNN_FM에서 user와 item에 대한 embedding시킬 차원 조정
    'cnn_latent_dim': 12,  # CNN_FM에서 user/item/image에 대한 latent 차원 조정

    # DeepCoNN
    'vector_create': False,  # DEEP_CONN에서 text vector 생성 여부 조정 (최초 학습에만 True로 설정)
    'deepconn_embed_dim': 32,  # DEEP_CONN에서 user와 item에 대한 embedding시킬 차원 조정
    'deepconn_latent_dim': 10,  # DEEP_CONN에서 user/item/image에 대한 latent 차원 조정
    'conv_1d_out_dim': 50,  # DEEP_CONN에서 1D conv의 출력 크기 조정
    'kernel_size': 3,  # DEEP_CONN에서 1D conv의 kernel 크기 조정
    'word_dim': 768,  # DEEP_CONN에서 1D conv의 입력 크기 조정
    'out_dim': 32  # DEEP_CONN에서 1D conv의 출력 크기 조정
})

In [17]:

Setting.seed_everything(args.seed)


######################## DATA LOAD
print(f'--------------- {args.model} Load Data ---------------')
if args.model in ('FM', 'FFM'):
    data = context_data_load(args)
elif args.model in ('NCF', 'WDN', 'DCN'):
    data = dl_data_load(args)
elif args.model == 'CNN_FM':
    data = image_data_load(args)
elif args.model == 'DeepCoNN':
    import nltk
    nltk.download('punkt')
    data = text_data_load(args)
else:
    pass


######################## Train/Valid Split
print(f'--------------- {args.model} Train/Valid Split ---------------')
if args.model in ('FM', 'FFM'):
    data = context_data_split(args, data)
    data = context_data_loader(args, data)

elif args.model in ('NCF', 'WDN', 'DCN'):
    data = dl_data_split(args, data)
    data = dl_data_loader(args, data)

elif args.model=='CNN_FM':
    data = image_data_split(args, data)
    data = image_data_loader(args, data)

elif args.model=='DeepCoNN':
    data = text_data_split(args, data)
    data = text_data_loader(args, data)
else:
    pass

####################### Setting for Log
setting = Setting()

log_path = setting.get_log_path(args)
setting.make_dir(log_path)

logger = Logger(args, log_path)
logger.save_args()


######################## Model
print(f'--------------- INIT {args.model} ---------------')
model = models_load(args,data)


######################## TRAIN
print(f'--------------- {args.model} TRAINING ---------------')
model = train(args, model, data, logger, setting)


######################## INFERENCE
print(f'--------------- {args.model} PREDICT ---------------')
predicts = test(args, model, data, setting)


######################## SAVE PREDICT
print(f'--------------- SAVE {args.model} PREDICT ---------------')
submission = pd.read_csv(args.data_path + 'sample_submission.csv')
if args.model in ('FM', 'FFM', 'NCF', 'WDN', 'DCN', 'CNN_FM', 'DeepCoNN'):
    submission['rating'] = predicts
else:
    pass

filename = setting.get_submit_filename(args)
submission.to_csv(filename, index=False)

--------------- FM Load Data ---------------
--------------- FM Train/Valid Split ---------------
--------------- INIT FM ---------------
--------------- FM TRAINING ---------------


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Train_loss: 5.015, valid_loss: 2.834


 10%|█         | 1/10 [00:03<00:35,  3.97s/it]

Epoch: 2, Train_loss: 2.494, valid_loss: 2.560


 20%|██        | 2/10 [00:07<00:29,  3.69s/it]

Epoch: 3, Train_loss: 2.123, valid_loss: 2.484


 30%|███       | 3/10 [00:10<00:23,  3.31s/it]

Epoch: 4, Train_loss: 1.900, valid_loss: 2.465


 40%|████      | 4/10 [00:13<00:18,  3.08s/it]

Epoch: 5, Train_loss: 1.747, valid_loss: 2.462


 60%|██████    | 6/10 [00:18<00:11,  2.83s/it]

Epoch: 6, Train_loss: 1.637, valid_loss: 2.475


 70%|███████   | 7/10 [00:21<00:08,  2.76s/it]

Epoch: 7, Train_loss: 1.557, valid_loss: 2.490


 80%|████████  | 8/10 [00:23<00:05,  2.72s/it]

Epoch: 8, Train_loss: 1.497, valid_loss: 2.506


 90%|█████████ | 9/10 [00:26<00:02,  2.65s/it]

Epoch: 9, Train_loss: 1.453, valid_loss: 2.528


100%|██████████| 10/10 [00:28<00:00,  2.88s/it]

Epoch: 10, Train_loss: 1.420, valid_loss: 2.544
--------------- FM PREDICT ---------------





FileNotFoundError: [Errno 2] No such file or directory: './saved_models/20231212_012420_FM_model.pt'