In [2]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Configuration
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import sys
rootpath = os.path.dirname(os.getcwd())
sys.path.append(rootpath)

from news import NewsIO, NewsFunc, NewsPath
newsio = NewsIO()
newsfunc = NewsFunc()
newspath = NewsPath()

import numpy as np
import itertools
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, SimpleRNN
from tensorflow.keras.optimizers import Adam, schedules
from tensorflow.keras.callbacks import ModelCheckpoint

  and should_run_async(code)


In [3]:
## Parameters
TOPN = 1000
RANDOM_STATE = 42

## Filenames
fname_data_norm = f'data_w-{TOPN}_norm.pk'
fname_corr_vars = 'correlated_variables.json'

fdir_reg_model = os.path.sep.join((newspath.fdir_model, 'regression'))

  and should_run_async(code)


In [4]:
df_norm = newsio.load(fname_object=fname_data_norm, _type='data')

  | fdir : /data/blank54/workspace/project/news/data
  | fname: data_w-1000_norm.pk


  and should_run_async(code)


In [5]:
corr_vars = newsio.load_json(fname_object=fname_corr_vars, _type='data')
print(f'  | # of correlated variables: {len(corr_vars)}')

  | fdir : /data/blank54/workspace/project/news/data
  | fname: correlated_variables.json
  | # of correlated variables: 342


  and should_run_async(code)


In [6]:
x_df = df_norm[corr_vars]
y_df = newsio.load_cci(start='200502', end='201912')['cci']

x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, shuffle=False, random_state=RANDOM_STATE)

print('X variables: {}'.format(x_df.shape))

X variables: (179, 342)


  and should_run_async(code)


In [9]:
def dnn_model_large(INPUT_SIZE):
    model = keras.Sequential()
    model.add(Dense(units=256, activation='relu', input_shape=(INPUT_SIZE,)))
    model.add(Dropout(0.3))
    model.add(Dense(units=128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=1, activation='linear'))
    
    return model

def dnn_model_small(INPUT_SIZE):
    model = keras.Sequential()
    model.add(Dense(units=32, activation='relu', input_shape=(INPUT_SIZE,)))
    model.add(Dropout(0.3))
    model.add(Dense(units=16, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=8, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=4, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=1, activation='linear'))
    
    return model

def cnn_model_large(INPUT_SIZE):
    model = keras.Sequential()
    model.add(Conv1D(filters=256, kernel_size=2, activation='relu', input_shape=(INPUT_SIZE,1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(units=32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=16, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=1, activation='linear'))
    
    return model

def cnn_model_small(INPUT_SIZE):
    model = keras.Sequential()
    model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(INPUT_SIZE,1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(units=16, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(units=1, activation='linear'))
    
    return model

def rnn_model_large(INPUT_SIZE):
    model = keras.Sequential()
    model.add(SimpleRNN(128, input_shape=(INPUT_SIZE, 1)))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=16, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=1, activation='linear'))
    
    return model

def rnn_model_small(INPUT_SIZE):
    model = keras.Sequential()
    model.add(SimpleRNN(64, input_shape=(INPUT_SIZE, 1)))
    model.add(Dense(units=16, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(units=1, activation='linear'))
    
    return model

def train(model, x_train, y_train, BATCH_SIZE, LEARNING_RATE, NUM_EPOCHS, fpath_model):
    model_checkpoint = ModelCheckpoint(fpath_model, monitor='val_loss', mode='min', save_best_only=True)
    lr_schedule = schedules.ExponentialDecay(initial_learning_rate=LEARNING_RATE, 
                                             decay_steps=100,
                                             decay_rate=0.9)
    
    model.compile(optimizer=Adam(learning_rate=lr_schedule), loss='mse')
    history = model.fit(x_train, y_train,
                        batch_size=BATCH_SIZE,
                        epochs=NUM_EPOCHS,
                        validation_split=0.3,
                        verbose=0,
                        callbacks=[model_checkpoint])
    
    return history

def MAPE(y_test, y_pred):
    return np.mean(np.abs((y_test - y_pred) / y_test)) * 100 

def plot_history(history, target):
    plt.figure(figsize=(16,7))

    plt.plot(range(len(history.history[target])), history.history[target], label=f'Train {target}')
    plt.plot(range(len(history.history[target])), history.history[f'val_{target}'], label=f'Valid {target}')
    
    plt.ylabel(target)
        
    plt.xlabel('Epoch')
    plt.legend()
    plt.show()

  and should_run_async(code)


In [8]:
## Model parameters
INPUT_SIZE = x_train.shape[1]

BATCH_SIZE_LIST = [8, 16, 32]
LEARNING_RATE_LIST = [1e-3, 1e-4, 1e-5]
NUM_EPOCHS_LIST = [100, 1000, 10000]

  and should_run_async(code)


In [10]:
## DNN model(small) development
mape_dnn_small_list = []
for BATCH_SIZE, LEARNING_RATE, NUM_EPOCHS in itertools.product(BATCH_SIZE_LIST, LEARNING_RATE_LIST, NUM_EPOCHS_LIST):
    fname_model = f'dnn_model_small_B-{BATCH_SIZE}_L-{LEARNING_RATE}_E-{NUM_EPOCHS}.h5'
    fname_history = f'dnn_history_small_B-{BATCH_SIZE}_L-{LEARNING_RATE}_E-{NUM_EPOCHS}.pk'
    fpath_model = os.path.sep.join((fdir_reg_model, fname_model))
    
    model = dnn_model_small(INPUT_SIZE)
    history = train(model, x_train, y_train, BATCH_SIZE, LEARNING_RATE, NUM_EPOCHS, fpath_model)
    
    y_pred = model.predict(x_test)
    mape = MAPE(y_test.values, y_pred)
    mape_dnn_small_list.append(mape)
    
    newsio.save(_object=history, fdir_object=fdir_reg_model, fname_object=fname_history)
    
    print('============================================================')
    print('Model training')
    print(f'  | Batch size: {BATCH_SIZE}')
    print(f'  | Learning rate: {LEARNING_RATE}')
    print(f'  | Num epochs: {NUM_EPOCHS}')
    print(f'  | MAPE: {mape:.03f}%')
    print('============================================================')

  and should_run_async(code)


KeyboardInterrupt: 