In [1]:
import tensorflow as tf
from keras import backend as K
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM

import math
import numpy as np
import pandas as pd
import sklearn.preprocessing as prep
import matplotlib.pyplot as plt2

import os
from datetime import date, timedelta, datetime

Using TensorFlow backend.


In [2]:
_window_ = 30 # days

_scaler_ = prep.MinMaxScaler() # StandardScaler()

_loss_ = "mean_squared_error"
_optimizer_ = "adam" # rmsprop
_verbose_ = 0

_batch_size_ = 512
_epochs_ = 500

_train_test_split_ = 1.0
_validation_split_ = 0.0

_date_ = '20180608'

In [3]:
def tbrain_loss(y_true, y_pred):
    z = np.array([[0.0], [0.0], [0.0], [0.0], [0.0]])
    w = np.array([[0.10], [0.15], [0.20], [0.25], [0.30]])
    
#     weights = np.concatenate((z,z,z,z,z,w))
    weights = np.concatenate((w,w,w,w,w,w))
    weights = tf.convert_to_tensor(weights, np.float32)
    
    squares = K.square(y_pred - y_true)
    squares = squares * weights
#     squares = K.square((y_pred - y_true) * weights)
    
    return K.mean(squares, axis=-1)

In [4]:
def build_model(shape):
    model = Sequential()
    
    model.add(LSTM(20, input_shape=shape, return_sequences=True))
    model.add(LSTM(10, input_shape=(shape[0], 20), return_sequences=True))
    model.add(Dense(1, activation="linear"))
    
    model.compile(loss=tbrain_loss, optimizer=_optimizer_, metrics=['accuracy'])

    return model

In [5]:
def scale_data(X):
    samples, nx, ny = X.shape
    X = X.reshape((samples, nx * ny))
    
    scaler = _scaler_.fit(X)
    X = scaler.transform(X)
    
    return X.reshape((samples, nx, ny))

In [6]:
def scale_backward(d, y):
    if type(_scaler_) is prep.data.MinMaxScaler:
        return y * (max(d) - min(d)) + min(d)

In [7]:
def scale_toward(d, x):
    if type(_scaler_) is prep.data.MinMaxScaler:
        return (x - min(d)) / (max(d) - min(d))

In [8]:
def preprocess_data(data, _column_):
    feature_len = len(data.columns)
    data = data.as_matrix()

    predicted_days = 5
    seq_len = _window_ + predicted_days  ## _window_s + predicted_days
    
    result = []
    for index in range(len(data) - seq_len):
        result.append(data[index : index + seq_len])
        
    result = np.array(result)
    scaled_result = scale_data(result)
    
    row = round(_train_test_split_ * scaled_result.shape[0])
    
    train = scaled_result[:int(row)]
    test = scaled_result[int(row):]
    
    X_test, y_test = [], []

    X_train = train[:, :-predicted_days, :]
    y_train = train[:, predicted_days:, _column_]  ## change

    X_test = test[:, :-predicted_days, :]
    y_test = test[:, predicted_days:, _column_]  ## change

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], feature_len))
    y_train = y_train.reshape( y_train.shape[0], y_train.shape[1], 1 )
    
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], feature_len))
    y_test = y_test.reshape( y_test.shape[0], y_test.shape[1], 1 )
    
    
    return [X_train, y_train, X_test, y_test]

In [9]:
def plot_result(pred, y):
    plt2.figure()
    plt2.plot(pred, color='red', label='Prediction')
    plt2.plot(y, color='blue', label='Ground Truth')
    plt2.legend()
    plt2.show()

def plot_postprocessing( input ):
    output = input[0][-5:]
    for i in range( 1, input.shape[0] ):
        output = np.append( output, input[i][-1] )
    return output

In [10]:
# ↓↓↓ __main__

In [11]:
def show_msg(now, last):
    diff = float( format(now, '.2f') ) - float( format(last, '.2f') )    
    if diff > 0:
        return "1"  # 漲
    elif diff < 0:
        return "-1" # 跌
    else:
        return "0"  # 平

def format_n_days(price):
    output = str(_code_) + '\t'
    for i in range(0, 5 if len(price)>=5 else len(price)):
        if i==0:
            output += '{}\t{:.2f}\t'.format(show_msg(price[i], last), price[i])
        else:
            output += '{}\t{:.2f}\t'.format(show_msg(price[i], price[i-1]), price[i])
    return output

def TBrain_score(predict_str, real_str):
    predict = predict_str.split('\t')[1:-1]
    real = real_str.split('\t')[1:-1]

    weights = [0.10, 0.15, 0.20, 0.25, 0.30]

    # (實際價格 – 絕對值(預測價格 – 實際價格)) /實際價格)*0.5
    p_score = [
        ( ( float(r) - abs(float(p) - float(r)) ) / float(r) ) * 0.5
        for (r, p) in zip(real[1::2], predict[1::2]) 
    ]

    # 預測正確得 0.5
    q_score = [
        0.5 if float(p) == float(r) else 0.0
        for (r, p) in zip(real[0::2], predict[0::2])
    ]

    return sum([ p*w + q*w for (p, q, w) in zip(p_score, q_score, weights) ])

In [12]:
df = pd.read_csv('/home/ddl/Desktop/Notebooks/TBrain/' + _date_ + '/18ETF.csv', thousands=',')
df

Unnamed: 0,代碼,中文簡稱,日期,開盤價(元),最高價(元),最低價(元),收盤價(元),成交張數(張)
0,50,元大台灣50,20180608,83.30,83.30,82.50,82.55,3469
1,50,元大台灣50,20180607,83.50,83.60,83.00,83.45,6704
2,50,元大台灣50,20180606,82.65,83.35,82.65,83.30,6398
3,50,元大台灣50,20180605,82.70,82.70,82.25,82.50,2695
4,50,元大台灣50,20180604,81.80,82.60,81.80,82.60,7081
5,50,元大台灣50,20180601,80.80,81.45,80.80,81.35,1848
6,50,元大台灣50,20180531,80.80,80.95,80.55,80.75,1605
7,50,元大台灣50,20180530,81.40,81.40,80.30,80.40,6177
8,50,元大台灣50,20180529,82.20,82.20,81.50,81.85,1388
9,50,元大台灣50,20180528,82.05,82.25,82.00,82.20,2940


In [13]:
from ipywidgets import IntProgress
from IPython.display import display

bar = IntProgress(min=0, max=18)
display(bar)

df = pd.read_csv('/home/ddl/Desktop/Notebooks/TBrain/' + _date_ + '/18ETF.csv', thousands=',')

df['sort'] = df['代碼'].astype(str)
df = df.sort_values(by='sort', ascending=True).drop(columns=['sort'])

for _code_ in df.代碼.unique():
#     _code_ = 713
#     if bar.value >= 5:
#         break
    
    data = df[df['代碼'] == _code_].sort_values(by='日期', ascending=True) \
                                    .drop(columns=['代碼', '日期', '中文簡稱'])
    
    # close price
    X_train, y_train, X_test, y_test = preprocess_data(data, 3)
    
    model_close = build_model(X_train.shape[1:])
    model_close.fit(X_train, y_train, batch_size=_batch_size_, epochs=_epochs_, validation_split=_validation_split_, verbose=_verbose_)
    model_close.save("Model/Africa-{}/{}_model_close.h5".format(_date_,_code_))
    
    bar.value += 1
    
    if _train_test_split_ != 1.0:
        testScore = model_close.evaluate(X_test, y_test, verbose=0)
        print('\n\n最低價 Test Score: %.2f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0])))
        predict = model_close.predict(X_test).reshape( y_test.shape[0], y_test.shape[1] )
        test = y_test.reshape(y_test.shape[0],y_test.shape[1])
        p = plot_postprocessing( predict )
        t = plot_postprocessing( test )
        plot_result( p, t )
    
    # predict 5 days
    dataArr = np.array(data)
    X = dataArr[-_window_:]
    X[:, 0] = scale_toward(list(dataArr[:, 0]), X[:, 0])
    X[:, 1] = scale_toward(list(dataArr[:, 1]), X[:, 1])
    X[:, 2] = scale_toward(list(dataArr[:, 2]), X[:, 2])
    X[:, 3] = scale_toward(list(dataArr[:, 3]), X[:, 3])
    X[:, 4] = scale_toward(list(dataArr[:, 4]), X[:, 4])
    X = np.expand_dims(X, axis=0)

    predict = model_close.predict(X).reshape( X.shape[0], X.shape[1] )
    predict = list(predict[0][-5:])
    predict = [ scale_backward(list(dataArr[:, 3]), i) for i in predict ]
    
    # print predict
    last = np.array(data)[-1][3]
    date = int(df[df['代碼'] == _code_].sort_values(by='日期', ascending=False).head(1).日期)
    print(format_n_days(predict))
    
    # print real
    real_date = (datetime.strptime(_date_, '%Y%m%d')  + timedelta(days=7)).strftime('%Y%m%d')
    real_path = '/home/ddl/Desktop/Notebooks/TBrain/' + real_date + '/18ETF.csv'
    if os.path.exists(real_path):
        df2 = pd.read_csv(real_path, thousands=',')
        df2 = df2[df2['代碼'] == _code_]
        df2 = list( df2[df2['日期'] > date]['收盤價(元)'] )
        print(format_n_days(df2) + '(real)')
        print(TBrain_score(format_n_days(predict), format_n_days(df2)))
    
#     break

IntProgress(value=0, max=18)

50	-1	79.54	-1	79.47	1	80.51	1	80.74	-1	80.04	
51	-1	30.46	1	30.54	1	30.97	1	31.39	-1	31.11	
52	-1	49.49	-1	49.16	1	49.73	1	49.92	-1	49.19	
53	-1	35.14	0	35.14	1	35.44	1	35.84	-1	35.60	
54	-1	22.70	-1	22.46	1	23.01	1	23.53	-1	23.28	
55	-1	16.96	1	17.23	1	17.26	1	17.44	-1	17.22	
56	-1	24.83	1	25.01	1	25.04	1	25.21	-1	24.99	
57	-1	49.46	1	49.78	1	50.30	1	50.64	-1	50.02	
58	-1	47.07	1	47.13	1	47.45	1	47.98	1	48.08	
59	-1	40.91	1	41.18	1	41.55	1	41.83	-1	41.68	
6201	-1	13.87	-1	13.72	1	13.94	1	14.00	-1	13.45	
6203	-1	35.12	1	35.49	1	35.79	1	36.47	-1	35.80	
6204	-1	52.89	1	53.16	1	54.13	1	54.35	1	54.47	
6208	-1	42.49	-1	42.27	1	42.96	1	43.27	-1	42.81	
690	-1	19.16	1	19.49	1	19.94	1	20.32	-1	20.25	
692	-1	14.60	1	15.14	1	15.46	1	15.74	-1	15.19	
701	-1	15.13	1	15.70	1	15.95	-1	15.74	-1	14.96	
713	-1	9.87	1	10.60	-1	10.58	-1	9.35	-1	7.60	
