In [1]:
_date_ = '20180601'
_model_date_ = '20180608'

In [2]:
import tensorflow as tf
from keras import backend as K
from keras.models import load_model

import math
import numpy as np
import pandas as pd
import sklearn.preprocessing as prep

import os
from datetime import date, timedelta, datetime

Using TensorFlow backend.


In [3]:
_scaler_ = prep.MinMaxScaler()

def scale_backward(d, y):
    if type(_scaler_) is prep.data.MinMaxScaler:
        return y * (max(d) - min(d)) + min(d)

def scale_toward(d, x):
    if type(_scaler_) is prep.data.MinMaxScaler:
        return (x - min(d)) / (max(d) - min(d))

In [4]:
def show_msg(now, last):
    diff = float( format(now, '.2f') ) - float( format(last, '.2f') )    
    if diff > 0:
        return "1"  # 漲
    elif diff < 0:
        return "-1" # 跌
    else:
        return "0"  # 平

def format_n_days(price):
    output = str(_code_) + '\t'
    for i in range(0, 5 if len(price)>=5 else len(price)):
        if i==0:
            output += '{}\t{:.2f}\t'.format(show_msg(price[i], last), price[i])
        else:
            output += '{}\t{:.2f}\t'.format(show_msg(price[i], price[i-1]), price[i])
    return output

def TBrain_score(predict_str, real_str):
    predict = predict_str.split('\t')[1:-1]
    real = real_str.split('\t')[1:-1]

    weights = [0.10, 0.15, 0.20, 0.25, 0.30]

    # (實際價格 – 絕對值(預測價格 – 實際價格)) /實際價格)*0.5
    p_score = [
        ( ( float(r) - abs(float(p) - float(r)) ) / float(r) ) * 0.5
        for (r, p) in zip(real[1::2], predict[1::2]) 
    ]

    # 預測正確得 0.5
    q_score = [
        0.5 if float(p) == float(r) else 0.0
        for (r, p) in zip(real[0::2], predict[0::2])
    ]

    return sum([ p*w + q*w for (p, q, w) in zip(p_score, q_score, weights) ])

In [5]:
_window_ = 30
_file_ = '18ETF.csv'

def tbrain_loss(y_true, y_pred):
    w = np.array([[0.10], [0.15], [0.20], [0.25], [0.30]])
    
    weights = np.concatenate((w,w,w,w,w,w))
    weights = tf.convert_to_tensor(weights, np.float32)
    
    squares = K.square(y_pred - y_true)
    squares = squares * weights
    
    return K.mean(squares, axis=-1)

In [6]:
df = pd.read_csv(os.path.join('/home/ddl/Desktop/Notebooks/TBrain/', _date_, _file_), thousands=',')

df['sort'] = df['代碼'].astype(str)
df = df.sort_values(by='sort', ascending=True).drop(columns=['sort'])

scoresA, scoresA_B = [], []

scoreDict = {
    code: {
        code: [] for code in df.代碼.unique()
    } for code in df.代碼.unique()
}

for model_code in df.代碼.unique():
    K.clear_session()
    
    model_Africa = load_model(
        os.path.join('/home/ddl/Desktop/Notebooks/Model/Africa-{}/{}_model_close.h5'.format(_model_date_, model_code)),
        custom_objects={'tbrain_loss': tbrain_loss}
    )

    model_Binary = load_model(
        os.path.join('/home/ddl/Desktop/Notebooks/Model/Binary-{}/{}_model_close.h5'.format(_model_date_, model_code)),
        custom_objects={'tbrain_loss': tbrain_loss}
    )
    
    for _code_ in df.代碼.unique():

        data = df[df['代碼'] == _code_].sort_values(by='日期', ascending=True) \
                                        .drop(columns=['代碼', '日期', '中文簡稱'])

        last = np.array(data)[-1][3]
        date = int(df[df['代碼'] == _code_].sort_values(by='日期', ascending=False).head(1).日期)

        # predict 5 days
        dataArr = np.array(data)
        X = dataArr[-_window_:]
        X[:, 0] = scale_toward(list(dataArr[:, 0]), X[:, 0])
        X[:, 1] = scale_toward(list(dataArr[:, 1]), X[:, 1])
        X[:, 2] = scale_toward(list(dataArr[:, 2]), X[:, 2])
        X[:, 3] = scale_toward(list(dataArr[:, 3]), X[:, 3])
        X[:, 4] = scale_toward(list(dataArr[:, 4]), X[:, 4])
        X = np.expand_dims(X, axis=0)

        predictA = model_Africa.predict(X).reshape( X.shape[0], X.shape[1] )
        predictA = list(predictA[0][-5:])
        predictA = [ scale_backward(list(dataArr[:, 3]), i) for i in predictA ]

        predictB = model_Binary.predict(X)
        predictB = [[1, -1, 0][np.argmax(i)] for i in predictB[0][-5:].tolist()]

        predictA_B = format_n_days(predictA).split('\t')
        predictA_B[1::2] = [str(i) for i in predictB+['']]
        predictA_B = '\t'.join(predictA_B)

        # print real
        real_date = (datetime.strptime(_date_, '%Y%m%d')  + timedelta(days=7)).strftime('%Y%m%d')
        real_path = '/home/ddl/Desktop/Notebooks/TBrain/' + real_date + '/18ETF.csv'
        if os.path.exists(real_path):

            df2 = pd.read_csv(real_path, thousands=',')
            df2 = df2[df2['代碼'] == _code_]
            df2 = list( df2[df2['日期'] > date]['收盤價(元)'] )

            scoreA = TBrain_score(format_n_days(predictA), format_n_days(df2))
            scoreA_B = TBrain_score(predictA_B, format_n_days(df2))
            
            scoreDict[_code_][model_code] = [scoreA, scoreA_B]
            
#             scoresA.append(scoreA)
#             scoresA_B.append(scoreA_B)
#             print(format_n_days(predictA) + '(Africa)')
#             print(predictA_B + '(Africa + Binary)')
#             print(format_n_days(df2) + '(real)')
#             print('{:.2f} {:.2f}'.format(scoreA, scoreA_B))

    #     break

#     print('Total Scores (Africa): {:.7f}'.format(sum(scoresA)))
#     print('Total Scores (Africa + Binary): {:.7f}'.format(sum(scoresA_B)))

In [7]:
# Africa

score = []

BestModel = {
    code: [] for code in df.代碼.unique()
}

for _code_ in df.代碼.unique():
    k = max(scoreDict[_code_].keys(), key=(lambda k: scoreDict[_code_][k][0]))
    k1 = max(scoreDict[_code_].keys(), key=(lambda k: scoreDict[_code_][k][1]))
    
    score_k = scoreDict[_code_][k][0]
    score_k1 = scoreDict[_code_][k1][1]
    
    if score_k > score_k1:
        score.append( score_k )
        BestModel[_code_] = [k, 'Africa']
    else:
        score.append( score_k1 )
        BestModel[_code_] = [k1, 'Binary']

print(sum(score))

print( 'BestModel =', BestModel)

15.03690182790245
BestModel = {6208: [690, 'Africa'], 692: [690, 'Africa'], 6203: [690, 'Africa'], 713: [690, 'Binary'], 57: [54, 'Binary'], 690: [6204, 'Africa'], 50: [690, 'Africa'], 51: [6203, 'Binary'], 52: [690, 'Africa'], 53: [690, 'Binary'], 54: [690, 'Africa'], 55: [57, 'Africa'], 56: [52, 'Binary'], 6201: [690, 'Binary'], 58: [54, 'Binary'], 59: [54, 'Binary'], 6204: [54, 'Binary'], 701: [52, 'Binary']}


In [9]:
# def show_msg(now, last):
#     diff = float( format(now, '.2f') ) - float( format(last, '.2f') )    
#     if diff > 0:
#         return "1"  # 漲
#     elif diff < 0:
#         return "-1" # 跌
#     else:
#         return "0"  # 平

# def format_n_days(price):
#     output = str(_code_) + '\t'
#     for i in range(0, 5 if len(price)>=5 else len(price)):
#         if i==0:
#             output += '{}\t{:.2f}\t'.format(show_msg(price[i], last), price[i])
#         else:
#             output += '{}\t{:.2f}\t'.format(show_msg(price[i], price[i-1]), price[i])
#     return output

# real_date = (datetime.strptime(_date_, '%Y%m%d')  + timedelta(days=7)).strftime('%Y%m%d')

# real_path = '/home/ddl/Desktop/Notebooks/TBrain/' + real_date + '/18ETF.csv'

# df2 = pd.read_csv(real_path, thousands=',')
# df2['sort'] = df2['代碼'].astype(str)
# df2 = df2.sort_values(by='sort', ascending=True).drop(columns=['sort'])

# for _code_ in df2.代碼.unique():
#     df = df2[df2['代碼'] == _code_]
#     df = list( df[df['日期'] > 20180601]['收盤價(元)'] )
#     print( format_n_days(df) )