# IMPORT subprocess
---
main.py를 수행시키기 위한 서브프로세스 정의   
터미널에서 직접 해당 명령을 수행하는 것과 같은 동작

In [1]:
import subprocess
from datetime import datetime

In [2]:
today = datetime.now().strftime("%Y-%m-%d")
model_name = "ncf"

# PREPARE TRAIN DATA
---
학습용 데이터 전처리 과정

In [3]:
command = f"""
python main.py \
  --base_date {today} \
  --task prepare-train-data \
  --model_name {model_name}
"""
p = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
print(p.stdout.decode("utf-8"))

INFO:root:[TASK-START] prepare-train-data
INFO:root:[TASK-END] prepare-train-data



# PREPARE INFERENCE DATA
---
추론용 데이터 전처리 과정

In [4]:
command = f"""
python main.py \
  --base_date {today} \
  --task prepare-inference-data \
  --dataset_name prepared_watch_log \
  --model_name {model_name}
"""
p = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
print(p.stdout.decode("utf-8"))

INFO:root:[TASK-START] prepare-inference-data
INFO:root:dataset_dir : /home/sagemaker-user/mlops-recommend-system/local/input/data
INFO:root:src : /home/sagemaker-user/mlops-recommend-system/local/input/data/train
INFO:root:dst : /home/sagemaker-user/mlops-recommend-system/local/input/data/inference
INFO:root:success all process!
INFO:root:[TASK-END] prepare-inference-data



# TRAIN
---
모델 학습 수행

In [5]:
command = f"""
python main.py \
  --base_date {today} \
  --task train \
  --dataset_name prepared_watch_log \
  --model_name {model_name}
"""
p = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
print(p.stdout.decode("utf-8"))

INFO:root:[TASK-START] train
INFO:root:Generating data...
INFO:root:Download NCF data
INFO:root:Download Success!
INFO:root:   user_id  contents_code  watch_seconds
0        1          10607             56
1        1        1012201           1069
2        1          82690           1643
3        1         810271           3872
4        1         146233           2910
INFO:root:Run NCF Data Preprocess
INFO:root:Preprocess Index
INFO:root:   user_id  contents_code
0        1          10607
1        1        1012201
2        1          82690
3        1         810271
4        1         146233
INFO:root:Save Index
INFO:root:user_index_dst : /home/sagemaker-user/mlops-recommend-system/local/output/data/index/user_index.csv
INFO:root:user_index_dst : /home/sagemaker-user/mlops-recommend-system/local/output/data/index/item_index.csv
INFO:root:Save Success!
INFO:root:Data Split ...
INFO:root:user_num: 474
INFO:root:item_num: 938
INFO:root:Length of train_data: 4037
INFO:root:Length of valid_da

# INFERENCE
---
학습된 모델을 통해 추론 수행

In [6]:
command = f"""
python main.py \
  --base_date {today} \
  --task inference \
  --dataset_name prepared_watch_log \
  --num_workers 0 \
  --model_name {model_name}
"""
p = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
print(p.stdout.decode("utf-8"))

INFO:root:[TASK-START] inference
INFO:root:Base Date: 2024-04-13 00:00:00
INFO:root:model : NCF(
  (embed_user_GMF): Embedding(474, 64)
  (embed_item_GMF): Embedding(938, 64)
  (embed_user_MLP): Embedding(474, 256)
  (embed_item_MLP): Embedding(938, 256)
  (MLP_layers): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=512, out_features=256, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=128, out_features=64, bias=True)
    (8): ReLU()
  )
  (predict_layer): Linear(in_features=128, out_features=1, bias=True)
  (final_act): Sigmoid()
)
INFO:root:Data Load Completed
INFO:root:Inference Completed
INFO:root:186
INFO:root:[(0,
  [{'code': 120, 'score': 0.3434965908527374},
   {'code': 280, 'score': 0.2966596484184265},
   {'code': 510, 'score': 0.276480108499527},
   {'code': 602223, 'score': 0.275749