In [1]:
# dependencies
import pandas as pd

In [2]:
# seed value for the notebook
from numpy.random import seed
seed(42)

In [3]:
# read data
df = pd.read_csv('./resources/google_10yrs_culr_labelled.csv')
df

Unnamed: 0,close,upper_shadow,lower_shadow,real_body,candlestick_pattern
0,295.875977,2.216705,2.346191,-1.379822,NO_PATTERN
1,301.046600,0.617676,0.000000,3.920288,NO_PATTERN
2,299.935760,2.022430,2.739716,-1.743469,NO_PATTERN
3,303.397797,0.627655,0.009949,4.483215,NO_PATTERN
4,305.604523,2.455780,0.313843,1.404724,NO_PATTERN
...,...,...,...,...,...
2512,1732.380005,15.609985,3.069946,4.270020,NO_PATTERN
2513,1738.849976,7.150024,5.890015,3.849976,NO_PATTERN
2514,1776.089966,14.638062,5.300049,24.454956,NO_PATTERN
2515,1758.719971,33.719971,31.700073,-29.070068,NO_PATTERN


In [4]:
X = df.drop(['candlestick_pattern'], axis=1)
y = df['candlestick_pattern']
print(X.shape, y.shape)

(2517, 4) (2517,)


In [5]:
labels = list(y.unique())
labels

['NO_PATTERN', 'CDLHAMMER', 'CDLEVENINGSTAR', 'CDLMORNINGSTAR']

In [6]:
# target category encoding
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
label_encoder = LabelEncoder()
label_encoder.fit(y)
y_encoded = label_encoder.transform(y)
y_categorical = to_categorical(y_encoded)
y_categorical

array([[0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       ...,
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.]], dtype=float32)

In [7]:
# split 75/25 (train/test)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, random_state=1)

In [8]:
X_train

Unnamed: 0,close,upper_shadow,lower_shadow,real_body
2514,1776.089966,14.638062,5.300049,24.454956
1403,768.789978,9.760010,5.940002,-3.920044
1470,768.700012,12.950012,14.750000,-9.500000
2422,1517.979980,7.630005,6.700073,3.309937
341,303.437653,3.601501,2.495636,1.160675
...,...,...,...,...
960,538.302063,5.086060,8.546570,-4.567505
905,561.817505,6.512146,6.881104,-4.627319
1096,529.039978,4.169006,6.339966,-2.559998
235,310.720337,2.415955,1.369843,0.388550


In [9]:
# scale with MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler(feature_range=(-1,1)).fit(X)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
X_train_scaled

array([[ 0.93477588, -0.62922825, -0.88801207,  0.43931871],
       [-0.33112432, -0.75278585, -0.87449011,  0.01365097],
       [-0.33123738, -0.67198534, -0.68833835, -0.07005678],
       ...,
       [-0.63242441, -0.89440202, -0.86603904,  0.03405371],
       [-0.9067924 , -0.93880558, -0.97105577,  0.07828636],
       [-0.59693447, -0.91032713, -0.90307001,  0.04822109]])

In [13]:
# import numpy as np
# np.savetxt("X_train_scaled.csv", X_train_scaled, delimiter=",")
# X_train_scaled

In [29]:
#
#  https://jfin-swufe.springeropen.com/articles/10.1186/s40854-020-00187-0
#  https://www.aaai.org/ocs/index.php/WS/AAAIW15/paper/viewFile/10179/10251
#  

import numpy as np
import json

X_cl = []  # close
X_up = []  # upper-shadow
X_lo = []  # lower-shadow
X_rb = []  # real-body
gam_cl = []
gam_up = []
gam_lo = []
gam_rb = []
window = 25

for i in range(window, len(X_train_scaled)):
    
    for j in range(0, window):
        X_cl.append(X_train_scaled[i-j][0])
        X_up.append(X_train_scaled[i-j][1])
        X_lo.append(X_train_scaled[i-j][2])
        X_rb.append(X_train_scaled[i-j][3])

    gam = np.outer(X_cl, X_cl) - np.outer(np.sqrt(1 - np.square(X_cl)), np.sqrt(1 - np.square(X_cl)))    
    gam_cl.append(gam)
    
    gam = np.outer(X_up, X_up) - np.outer(np.sqrt(1 - np.square(X_up)), np.sqrt(1 - np.square(X_up)))    
    gam_up.append(gam)
    
    gam = np.outer(X_lo, X_lo) - np.outer(np.sqrt(1 - np.square(X_lo)), np.sqrt(1 - np.square(X_lo)))    
    gam_lo.append(gam)
    
    gam = np.outer(X_rb, X_rb) - np.outer(np.sqrt(1 - np.square(X_rb)), np.sqrt(1 - np.square(X_rb)))    
    gam_rb.append(gam)

    X_cl = []  # close
    X_up = []  # upper-shadow
    X_lo = []  # lower-shadow
    X_rb = []  # real-body
    

In [15]:
# import numpy as np
# from tensorflow.keras.utils import to_categorical
# from cnn import SimpleCNN
# from series2gaf import GenerateGAF
# from datetime import datetime

# # -------------------------------------------------------------------
# # Generate GAF:

# # set parameters
# timeSeries = list(X_train_scaled)
# windowSize = 50
# rollingLength = 10
# fileName = 'demo_%02d_%02d'%(windowSize, rollingLength)

# # generate GAF pickle file (output by function Numpy.dump)
# GenerateGAF(all_ts = timeSeries,
#             window_size = windowSize,
#             rolling_length = rollingLength,
#             fname = fileName)



In [16]:
# # -------------------------------------------------------------------
# # CNN Example:

# # using the generated GAF in previous step
# # data shape: (15, 50, 50)
# gaf = np.load('%s_gaf.pkl'%fileName, allow_pickle=True)
# gaf = np.reshape(gaf, (gaf.shape[0], gaf.shape[1], gaf.shape[2], 1))

# # the label is consisted of numbers 1, 2 and 3
# # label shape: (15, )
# cut_point = int(gaf.shape[0]/3)
# label = np.zeros(gaf.shape[0])
# label[cut_point:cut_point*2] = 1
# label[cut_point*2:] = 2
# label = to_categorical(label)

# # get cnn model ready
# # inputs are single channel data: (15, 15, 1)
# # output size is 3 because of {1,2,3}-classes
# cnn_model = SimpleCNN(input_shape=(gaf.shape[1], gaf.shape[2], 1),
#                         output_size=3)

# # train the cnn model
# train_history = cnn_model.fit(x=gaf, y=label,
#                                 epochs=10, batch_size=10,
#                                 validation_split=0.2, verbose=2)

# # save trained model
# #cnn_model.save('model_%s.h5'%datetime.strftime(datetime.today(),'%Y%m%d%H%M'))

In [17]:
# from series2gaf import PlotHeatmap
# gaf = np.load('%s_gaf.pkl'%fileName, allow_pickle=True)
# PlotHeatmap(gaf)