In [16]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Configuration
import os
import sys
rootpath = os.path.dirname(os.getcwd())
sys.path.append(rootpath)

from news import NewsIO, NewsFunc, NewsPath
newsio = NewsIO()
newsfunc = NewsFunc()
newspath = NewsPath()

import itertools
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

  and should_run_async(code)


# Arguments

In [8]:
## Parameters
TOPN = 1000
RANDOM_STATE = 42

BATCH_SIZE_LIST = [8, 16, 32]
LEARNING_RATE_LIST = [1e-3, 1e-4, 1e-5]
NUM_EPOCHS_LIST = [100, 1000, 10000]

## Filenames
fname_data_norm = f'data_w-{TOPN}_norm.pk'
fname_corr_vars = 'correlated_variables.json'

fdir_reg_model = os.path.sep.join((newspath.fdir_model, 'regression'))

  and should_run_async(code)


# Data Preparation

In [11]:
df_norm = newsio.load(fname_object=fname_data_norm, _type='data')
corr_vars = newsio.load_json(fname_object=fname_corr_vars, _type='data')
x_df = df_norm[corr_vars]
y_df = newsio.load_cci(start='200502', end='201912')['cci']

x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, shuffle=False, random_state=RANDOM_STATE)

print('X variables: {}'.format(x_df.shape))

  | fdir : /data/blank54/workspace/project/news/data
  | fname: data_w-1000_norm.pk
  | fdir : /data/blank54/workspace/project/news/data
  | fname: correlated_variables.json
X variables: (179, 342)


  and should_run_async(code)


# Evaluation

In [20]:
def MAPE(y_test, y_pred):
    return np.mean(np.abs((y_test - y_pred) / y_test)) * 100

def sort_results(results):
    return sorted(results, key=lambda x:x[5], reverse=False)

  and should_run_async(code)


In [22]:
MODEL_TYPE_LIST = ['dnn', 'cnn']
MODEL_SIZE_LIST = ['large', 'small']

results_map = {}
for MODEL_TYPE, MODEL_SIZE in itertools.product(MODEL_TYPE_LIST, MODEL_SIZE_LIST):
    for BATCH_SIZE, LEARNING_RATE, NUM_EPOCHS in itertools.product(BATCH_SIZE_LIST, LEARNING_RATE_LIST, NUM_EPOCHS_LIST):
        fname_history = f'{MODEL_TYPE}_history_{MODEL_SIZE}_B-{BATCH_SIZE}_L-{LEARNING_RATE}_E-{NUM_EPOCHS}.pk'
        history = newsio.load(fdir_object=fdir_reg_model, fname_object=fname_history, verbose=False)

        y_pred = history.model.predict(x_test)
        mape = MAPE(y_test.values, y_pred)
        model_perf = (MODEL_TYPE, MODEL_SIZE, BATCH_SIZE, LEARNING_RATE, NUM_EPOCHS, mape)
        results_map[f'{MODEL_TYPE}_{MODEL_SIZE}'] = model_perf

  and should_run_async(code)


FileNotFoundError: [Errno 2] No such file or directory: '/data/blank54/workspace/project/news/model/regression/cnn_history_small_B-8_L-0.001_E-100.pk'

In [18]:
results

  and should_run_async(code)


[('dnn', 'large', 8, 0.001, 100, 14.174953843131858),
 ('dnn', 'large', 8, 0.001, 1000, 19.30950380775334),
 ('dnn', 'large', 8, 0.001, 10000, 17.300005796456116),
 ('dnn', 'large', 8, 0.0001, 100, 66.40448199203335),
 ('dnn', 'large', 8, 0.0001, 1000, 67.86250158167866),
 ('dnn', 'large', 8, 0.0001, 10000, 69.89335771346447),
 ('dnn', 'large', 8, 1e-05, 100, 89.44422737342292),
 ('dnn', 'large', 8, 1e-05, 1000, 85.8269213757125),
 ('dnn', 'large', 8, 1e-05, 10000, 80.4422063875563),
 ('dnn', 'large', 16, 0.001, 100, 13.538047806495127),
 ('dnn', 'large', 16, 0.001, 1000, 18.10650225975726),
 ('dnn', 'large', 16, 0.001, 10000, 17.89925585106068),
 ('dnn', 'large', 16, 0.0001, 100, 65.63328543683261),
 ('dnn', 'large', 16, 0.0001, 1000, 65.5363356610164),
 ('dnn', 'large', 16, 0.0001, 10000, 65.85488985687246),
 ('dnn', 'large', 16, 1e-05, 100, 97.26230514271238),
 ('dnn', 'large', 16, 1e-05, 1000, 74.52929632270445),
 ('dnn', 'large', 16, 1e-05, 10000, 70.71308017665888),
 ('dnn', 'lar

In [21]:
sort_results(results)

  and should_run_async(code)


[('dnn', 'small', 32, 0.001, 10000, 9.39183230127909),
 ('dnn', 'small', 32, 0.001, 1000, 10.346755487497827),
 ('dnn', 'small', 8, 0.001, 10000, 10.697081276665852),
 ('dnn', 'small', 16, 0.001, 100, 11.306654674937118),
 ('dnn', 'small', 8, 0.001, 100, 12.665664933395476),
 ('dnn', 'small', 8, 0.0001, 10000, 13.23723632048688),
 ('dnn', 'small', 16, 0.001, 10000, 14.38191831155573),
 ('dnn', 'small', 16, 0.001, 1000, 15.049040880289887),
 ('dnn', 'small', 32, 0.001, 100, 17.713429344904494),
 ('dnn', 'small', 32, 0.0001, 10000, 22.568654929078203),
 ('dnn', 'small', 8, 0.001, 1000, 23.417235285092254),
 ('dnn', 'small', 8, 0.0001, 1000, 27.762653971388318),
 ('dnn', 'small', 8, 0.0001, 100, 30.81316471806965),
 ('dnn', 'small', 32, 0.0001, 1000, 62.60071835079959),
 ('dnn', 'small', 16, 0.0001, 100, 80.84846341974466),
 ('dnn', 'small', 16, 0.0001, 10000, 82.86340451910813),
 ('dnn', 'small', 32, 0.0001, 100, 93.09412846525092),
 ('dnn', 'small', 16, 1e-05, 10000, 97.49770235984009),