In [1]:
import time
import datetime

# 시간 표시 함수
def format_time(elapsed):
    # 반올림
    elapsed_rounded = int(round((elapsed)))
    # hh:mm:ss으로 형태 변경
    return str(datetime.timedelta(seconds=elapsed_rounded))


"""Training GCMC model on the MovieLens data set.
The script loads the full graph to the training device.
"""
import os, time
import argparse
import logging
import random
import string
import numpy as np
import pandas as pd
import torch as th
import torch.nn as nn
from data_rotten_v2 import RottenMovie
from utils import get_activation, get_optimizer, torch_total_param_num, torch_net_info, MetricLogger

Using backend: pytorch


In [2]:
import easydict

args = easydict.EasyDict({ 
    "data_name":                      "rotten", 
    "use_one_hot_fea":                True,
    "gpu":                            0,
    "seed":                           123,
    "data_test_ratio":                0.1,
    "data_valid_ratio":               0.1,
    "model_activation":               'leaky',
    "gcn_dropout":                    0.5,
    "gcn_agg_norm_symm":              True,
    "gcn_agg_units":                  32,
    "gcn_agg_accum":                  'sum',
    "gcn_out_units":                  32, # 64, 128
    "gen_r_num_basis_func":           2,
    "train_max_epoch":                300,
    "train_log_interval":             5,
    "train_valid_interval":           5,
    "train_optimizer":                'adam',
    "train_grad_clip":                1.0,
    "train_lr":                       0.01,
    "train_min_lr":                   0.0008,
    "train_lr_decay_factor":          0.5,
    "train_decay_patience":           25,
    "train_early_stopping_patience":  50,
    "share_param":                    False,
    "mix_cpu_gpu":                    False,
    "minibatch_size":                 40000,
    "num_workers_per_gpu":            8,
    "device":                         0,
    "save_dir":                       './save/',
    "save_id":                        1,
    "train_max_iter":                 1000
})

np.random.seed(args.seed)
th.manual_seed(args.seed)

if th.cuda.is_available():
    th.cuda.manual_seed_all(args.seed)

In [3]:
from train import train

dataset = RottenMovie(                 
             train_data='./data/trainset_filtered.csv',
             test_data='./data/testset_filtered.csv',
             movie_data = './data/movie_info.csv',
             user_data = './data/user_info.csv',
             emotion=True,
             sentiment=False,

             name='rotten', 
             device=0, 
             mix_cpu_gpu=False,
             use_one_hot_fea=True, 
             symm=True,
             valid_ratio=0.1,
             )

dataset_es = RottenMovie(                 
             train_data='./data/trainset_filtered.csv',
             test_data='./data/testset_filtered.csv',
             movie_data = './data/movie_info.csv',
             user_data = './data/user_info.csv',
             emotion=False,
             sentiment=True,

             name='rotten', 
             device=0, 
             mix_cpu_gpu=False,
             use_one_hot_fea=True, 
             symm=True,
             valid_ratio=0.1,
             )

......1: 데이터 로드
......3: Train/Valid 분리
All rating pairs : 245094
	All train rating pairs : 216328
		Train rating pairs : 194695
		Valid rating pairs : 21633
	Test rating pairs  : 28766
......4: User/Movie를 Global id에 매핑
Total user number = 1112, movie number = 8521
......5: features 생성
Feature dim: 
user: (1112, 1112)
movie: (8521, 8521)
......6: Graph Encoder/Decoder 생성
rating_values :  [0.5, 1.0, 1.5, 3.5, 3.0, 5.0, 4.0, 2.0, 2.5, 4.5, 16, 17, 18, 19, 20, 21]
......7: Graph 결과 출력
Train enc graph: 	#user:1112	#movie:8521	#pairs:389390
Train dec graph: 	#user:1112	#movie:8521	#pairs:194695
Valid enc graph: 	#user:1112	#movie:8521	#pairs:389390
Valid dec graph: 	#user:1112	#movie:8521	#pairs:21633
Test enc graph: 	#user:1112	#movie:8521	#pairs:389390
Test dec graph: 	#user:1112	#movie:8521	#pairs:28766
......1: 데이터 로드
......3: Train/Valid 분리
All rating pairs : 245094
	All train rating pairs : 216328
		Train rating pairs : 194695
		Valid rating pairs : 21633
	Test rating pairs  : 28766


In [4]:
args.rating_vals = dataset.rating_values
args.gcn_dropout = 0.50

In [5]:
# bests=100
# bests_es=100
# start_time = time.time()

# for dim in [256]:
#     args.gcn_out_units = dim
#     for agg in [128]:
#         args.gcn_agg_units = agg
#         for lr in [0.006*i for i in range(10)]:
#             args.train_lr = lr
#             args.save_dir = f'./test/test'
#             args.save_id = 'new_feature'
#             best = train(args, dataset)
#             print("****************************")
#             args.save_dir = f'./test/test_es'
#             args.save_id = 'new_feature_es'
#             best_es = train(args, dataset_es)

# #             print(best,'  VS  ', best_es)
#             if bests>best:
#                 bests = best
#             if bests_es>best_es:
#                 bests_es=best_es
                
# print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))
# print(bests,'  VS  ', bests_es)

In [9]:
bests=100
bests_es=100

bests_list = list()
bests_es_list = list()
start_time = time.time()

for dim in [64,128,256,512]:
    args.gcn_out_units = dim
    print()
    print(f"dim:{dim}")
    
    for agg in [256]:
        args.gcn_agg_units = agg
        for lr in [0.06]:
            print("1번")
            args.train_lr = lr
            args.save_dir = f'./test/test'
            args.save_id = 'new_feature'
            best = train(args, dataset)
            bests_list.append(best)
            
            print("2번")
            args.save_dir = f'./test/test_es'
            args.save_id = 'new_feature_es'
            best_es = train(args, dataset_es)
            bests_es_list.append(best_es)
            
            print("****************************")
#             print(best,'  VS  ', best_es)
            if bests>best:
                bests = best
            if bests_es>best_es:
                bests_es=best_es
                
print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))
print(bests,'  VS  ', bests_es)


dim:64
1번
training...
Best Iter Idx=965, Best Valid RMSE=0.7470, Best Test RMSE=0.8422
2번
training...
Best Iter Idx=725, Best Valid RMSE=0.7364, Best Test RMSE=0.8479
****************************

dim:128
1번
training...
Best Iter Idx=800, Best Valid RMSE=0.7886, Best Test RMSE=0.8745
2번
training...
Best Iter Idx=955, Best Valid RMSE=0.7309, Best Test RMSE=0.8485
****************************

dim:256
1번
training...
Best Iter Idx=915, Best Valid RMSE=0.7478, Best Test RMSE=0.8462
2번
training...
Best Iter Idx=975, Best Valid RMSE=0.7289, Best Test RMSE=0.8365
****************************

dim:512
1번
training...
Best Iter Idx=990, Best Valid RMSE=0.7426, Best Test RMSE=0.8326
2번
training...
Best Iter Idx=895, Best Valid RMSE=0.7371, Best Test RMSE=0.8384
****************************
  Training epoch took: 0:16:03
0.8326063362575978   VS   0.8364646532921297


In [7]:
bests_list

[0.8347348269545674,
 0.8725535387811746,
 0.8336509020017829,
 0.8317154477541387]

In [8]:
bests_es_list

[0.8520771186496274, 0.8450898023434926, 0.84206848011259, 0.8380634334176944]