In [None]:
import pandas as pd
import numpy as np
from datetime import datetime as dt

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as k

from imblearn.over_sampling import SMOTE
from sklearn.metrics.pairwise import cosine_similarity

def get_date_ordinal(cycle):
    date_ordinal = np.array([dt.strptime(t.replace(".", "/"), "%Y/%m/%d").\
    toordinal() % cycle for t in np.array(data.Date)])

    date_ordinal = (date_ordinal - np.min(date_ordinal)) \
        / (np.max(date_ordinal) - np.min(date_ordinal))
    return date_ordinal

# filename = 'data/US5001440.csv'
filename = 'data/EURUSD60.csv'
valid_idx = 3000

data = pd.read_csv(filename, names=['Date', 'Time',
                                    'Open', 'High',
                                    'Low', 'Close',
                                    'Volume'],
                   header=0)

lookbacks = 120
total_bars = 15000
num_forward_bars = 10

hightail = np.array((data.High - data.Open) / data.Open)
hightail = (hightail - np.min(hightail)) / (np.max(hightail) - np.min(hightail))

lowtail = np.array((data.Low - data.Open) / data.Open)
lowtail = (lowtail - np.min(lowtail)) / (np.max(lowtail) - np.min(lowtail))

body = np.array((data.Close - data.Open) / data.Open)
body = (body - np.min(body)) / (np.max(body) - np.min(body))

op = np.array(data.Open)
date_ordinal1 = get_date_ordinal(5)
date_ordinal2 = get_date_ordinal(10)
date_ordinal3 = get_date_ordinal(60)
date_ordinal4 = get_date_ordinal(240)
date_ordinal5 = get_date_ordinal(600)

market_data = np.stack((op, hightail, lowtail, body,
                        date_ordinal1, date_ordinal2, date_ordinal3,
                        date_ordinal4, date_ordinal5), axis=1)

df = pd.DataFrame(market_data)

for i in range(1, lookbacks):
    df['lb_ht'+str(i)] = df[1].shift(i)
    df['lb_lt'+str(i)] = df[2].shift(i)
    df['lb_body'+str(i)] = df[3].shift(i)
    df['ord1'+str(i)] = df[4].shift(i)
    df['ord2'+str(i)] = df[5].shift(i)
    df['ord3'+str(i)] = df[6].shift(i)
    df['ord4'+str(i)] = df[7].shift(i)
    df['ord5'+str(i)] = df[8].shift(i)

df = df.iloc[lookbacks:]
dt = np.array(df)
dt = dt[-total_bars:]
x = dt[:, 9:]
# x = (x - np.min(x)) / (np.max(x) - np.min(x))
x = x.reshape(-1, lookbacks-1, 8)
op = dt[:,0]

y = []
y_cat = []
for i, _ in enumerate(op[:-num_forward_bars]):
    pcnt_gain = []
    rn = op[i:i+num_forward_bars+1]
    for j, p in enumerate(rn[:-1]):
        gain = (rn[j+1] - p) / p
        pcnt_gain.append(gain)
    # yt = (rn - rn.min()) / (rn.max() - rn.min())
    y.append(pcnt_gain)
    cat = 0
    if np.argmin(rn) == 0:
        cat = 1
    elif np.argmax(rn) == 0:
        cat = 2
    y_cat.append(cat)

y_cat = np.expand_dims(y_cat, -1)
y = np.array(y)
op = op[:-num_forward_bars]

ymin, ymax = y.min(), y.max()
y = (y - ymin) / (ymax - ymin)
print(y.shape)
print(y.max(), y.min())

xtrain = x[:-num_forward_bars]
xtrain = xtrain[:-valid_idx]

ytrain = y_cat[:-valid_idx]
oversample = SMOTE(sampling_strategy='not majority')
x_over, y_over = oversample.fit_resample(xtrain.reshape(-1, (x.shape[1] * x.shape[2])), ytrain)
x_over = x_over.reshape(-1, x.shape[1], x.shape[2])

xtrain = np.array([*x_over, *xtrain])
ytrain = np.array([*y_over, *ytrain])

ytrain = ytrain.reshape(-1, 1).astype(np.float32)

In [None]:
def calculate_dd(eq):
    end = np.argmax(np.maximum.accumulate(eq) - eq)
    start = np.argmax(eq[:end])
    return eq[end] - eq[start]


def get_eq(predictions, verbose=1, reversed=False, rev_signal=False):
    add_balance = 50000
    min_elasped = 20

    # gains = []
    trades = []
    eq = []
    store_trades = []
    entry_price = 0
    entry_elasped = min_elasped
    balance = []
    closed = []

    ops = op[-valid_idx:-num_forward_bars]
    for j, (pr, o) in enumerate(zip(predictions, ops)):
        csum = np.cumsum(pr)
        lots = round(add_balance / o) * 10000
        if reversed:
            lots = -lots
        # lots = 1
        if rev_signal:
            pr = -pr
        if pr == 1 and entry_elasped >= min_elasped:
            if verbose:
                print(f"Entered: {o} / {lots} @ {j}")
            trades.append([o, lots])
            entry_elasped = 0
        elif pr == -1 and len(trades) > 0:
            # closed.append(pnl)
            trades = []
            if verbose:
                print(f"Closed: {o} / {lots} @ {j}")
            # entry_elasped = min_elasped
        pnl = 0
        for _, l in trades:
            pnl += (o - ops[j - 1]) * l
        eq.append(pnl)
        store_trades.append(trades)
        entry_elasped += 1

    eq = np.array(eq)
    return eq


def get_signals(model):
    extractor = keras.models.Model(model.input, model.layers[-3].output)
    predictions = extractor.predict(x[-valid_idx:-num_forward_bars], verbose=0)

    base = extractor.predict(x[:-valid_idx], verbose=0)

    cosine = cosine_similarity(predictions, base)

    max_signals = np.argmax(cosine, axis=1)
    denorm = y * (ymax - ymin) + ymin

    signals = denorm[max_signals]

    csum = np.cumsum(signals, axis=1)
    argmax = np.argmax(csum, axis=1)
    argmin = np.argmin(csum, axis=1)

    sigs = []

    for ma, mi in zip(argmax, argmin):
        if mi == 0:
            sigs.append(1)
        elif ma == 0:
            sigs.append(-1)
        else:
            sigs.append(0)

    return np.array(sigs)


In [None]:
model = keras.models.load_model("data/eurusd/EURUSD_best_lb30_f20_reverse.h5")

best_eq = 2
best_rev = 2

for ii in range(999999):
    model.fit(xtrain, ytrain, epochs=1,validation_split=0.9,
            batch_size=32, shuffle=True, verbose=0)
    predictions = get_signals(model)
    eq = get_eq(predictions, verbose=False)
    try:
        dd = calculate_dd(eq)
    except Exception:
        continue
    if dd == 0:
        dd = 1
    score = np.sum(eq) / abs(dd)
    if score > best_eq:
        model.save("data/EURUSD_best_lb30_f20.h5")
        best_eq = score
        print(f"BEST: {best_eq}")
    elif -score > best_rev:
        model.save("data/EURUSD_best_lb30_f20_reverse.h5")
        best_rev = -score
        print(f"BEST REVERSE: {best_rev}")
    else:
        print(f"{ii} -> {score}")



    eq = get_eq(predictions, verbose=False, rev_signal=True)
    try:
        dd = calculate_dd(eq)
    except Exception:
        continue
    if dd == 0:
        dd = 1
    score = np.sum(eq) / abs(dd)
    if score > best_eq:
        model.save("data/EURUSD_best_lb30_f20_revsig.h5")
        best_eq = score
        print(f"BEST: {best_eq}")
    elif -score > best_rev:
        model.save("data/EURUSD_best_lb30_f20_revsig_reverse.h5")
        best_rev = -score
        print(f"BEST REVERSE: {best_rev}")
    else:
        print(f"{ii} -> {score}")

In [None]:
import matplotlib.pyplot as plt

model = keras.models.load_model("data/EURUSD_best_lb30_f20.h5")

sigs = get_signals(model)
eq = get_eq(sigs, reversed=False)

model = keras.models.load_model("data/EURUSD_best_lb30_f20_reverse.h5")

sigs = get_signals(model)
eq2 = get_eq(sigs, reversed=True)

model = keras.models.load_model("data/EURUSD_best_lb30_f20_revsig.h5")

sigs = get_signals(model)
eq3 = get_eq(sigs, reversed=False, rev_signal=True)

model = keras.models.load_model("data/EURUSD_best_lb30_f20_revsig_reverse.h5")

sigs = get_signals(model)
eq4 = get_eq(sigs, reversed=True, rev_signal=True)

eq = eq + eq2 + eq3 + eq4

plt.plot(np.cumsum(eq))
plt.show()

dd = calculate_dd(eq)

np.sum(eq) / -dd

In [None]:
f"${round(dd):,}"

In [None]:
(np.sum(eq) / len(eq)) * 20

In [None]:
f"${round(np.sum(eq)):,}"

In [None]:
y.shape

In [None]:
np.save("data/eurusd/x.npy",x[-valid_idx:-num_forward_bars])
np.save("data/eurusd/y.npy",y)