In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from pyts.image import RecurrencePlot, GramianAngularField
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import load_model, Sequential
import json
import time
from collections import deque
import warnings
from pickle import dump, load
from sklearn.preprocessing import StandardScaler

file_path = '/Users/mymac/Google Drive/My Drive/Forex_Robot/'

In [2]:
# currencies = ['Usd_Chf', 'Gbp_Usd', 'Usd_Cad', 'Aud_Usd', 'Eur_Usd', 'Usd_Jpy', 'Nzd_Usd']
# currencies = ['Usd_Cad', 'Aud_Usd', 'Eur_Usd', 'Usd_Jpy', 'Gbp_Usd']
currencies = ['Eur_Usd']
years = '2012-2022'

dfs, df_longs = [], []

# news = pd.read_csv(file_path + 'events_2016-2022.csv')
# news = news.rename(columns={'Start': 'Date'})
# news.Date = pd.to_datetime(news.Date)

for currency_pair in currencies:
    # currency1, currency2 = currency_pair.split('_')
    # currency1, currency2 = currency1.upper(), currency2.upper()

    # curr_news = news.loc[(news['Currency'] == currency1) | (news['Currency'] == currency2)]
    # curr_news.drop(['Id', 'Name', 'Currency'], axis=1, inplace=True)
    # curr_news.drop(curr_news[(curr_news['Impact'] != 'LOW') & (curr_news['Impact'] != 'MEDIUM') & (curr_news['Impact'] != 'HIGH')].index, inplace=True)
    # curr_news.loc[curr_news['Impact'] == 'LOW', 'Impact'] = 1
    # curr_news.loc[curr_news['Impact'] == 'MEDIUM', 'Impact'] = 2
    # curr_news.loc[curr_news['Impact'] == 'HIGH', 'Impact'] = 3
    # curr_news = curr_news.groupby('Date')['Impact'].mean().reset_index()

    df = pd.read_csv(file_path + f'Oanda_{currency_pair}_M5_{years}.csv')
    df.Date = pd.to_datetime(df.Date)
    # df.reset_index(drop=True, inplace=True)
    # df = pd.merge(df, curr_news, how='left', on='Date')
    # df.reset_index(drop=True, inplace=True)
    # df = df.fillna(method='ffill')
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    dfs.append(df)

    df_long = pd.read_csv(file_path + f'Oanda_{currency_pair}_M30_{years}.csv')
    df_long.Date = pd.to_datetime(df_long.Date)
    df_long.dropna(inplace=True)
    df_long.reset_index(drop=True, inplace=True)
    df_longs.append(df_long)

In [3]:
def add_fractal(df, i, look_back=3):
    if i >= look_back and i < df.shape[0] - look_back:
        lows = []
        highs = []

        for j in range(1, look_back + 1):
            prev_mid_low, prev_mid_high = df.loc[df.index[i - j], ['Mid_Low', 'Mid_High']]
            future_mid_low, future_mid_high = df.loc[df.index[i + j], ['Mid_Low', 'Mid_High']]

            lows.append(float(prev_mid_low))
            lows.append(float(future_mid_low))
            highs.append(float(prev_mid_high))
            highs.append(float(future_mid_high))

        mid_low, mid_high = df.loc[df.index[i], ['Mid_Low', 'Mid_High']]

        if float(mid_low) < min(lows):
            return float(mid_low), 1.0

        elif float(mid_high) > max(highs):
            return float(mid_high), 0.0

        else:
            return np.nan, np.nan

    else:
        return np.nan, np.nan

def psar(barsdata, iaf=0.02, maxaf=0.2):
    length = len(barsdata)
    high = list(barsdata['Mid_High'])
    low = list(barsdata['Mid_Low'])
    close = list(barsdata['Mid_Close'])
    psar = close[0:len(close)]
    bull = True
    af = iaf
    hp = high[0]
    lp = low[0]
    for i in range(2, length):
        if bull:
            psar[i] = psar[i - 1] + af * (hp - psar[i - 1])
        else:
            psar[i] = psar[i - 1] + af * (lp - psar[i - 1])
        reverse = False
        if bull:
            if low[i] < psar[i]:
                bull = False
                reverse = True
                psar[i] = hp
                lp = low[i]
                af = iaf
        else:
            if high[i] > psar[i]:
                bull = True
                reverse = True
                psar[i] = lp
                hp = high[i]
                af = iaf
        if not reverse:
            if bull:
                if high[i] > hp:
                    hp = high[i]
                    af = min(af + iaf, maxaf)
                if low[i - 1] < psar[i]:
                    psar[i] = low[i - 1]
                if low[i - 2] < psar[i]:
                    psar[i] = low[i - 2]
            else:
                if low[i] < lp:
                    lp = low[i]
                    af = min(af + iaf, maxaf)
                if high[i - 1] > psar[i]:
                    psar[i] = high[i - 1]
                if high[i - 2] > psar[i]:
                    psar[i] = high[i - 2]
    return psar


def atr(high, low, close, lookback=14):
    high_low = high - low
    high_close = np.abs(high - close.shift())
    low_close = np.abs(low - close.shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)

    return true_range.rolling(lookback).sum() / lookback


def rsi(closes, periods=14):
    close_delta = closes.diff()

    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
        
    rsi = ma_up / ma_down
    rsi = 100 - (100/(1 + rsi))

    return rsi

  
def adx(high, low, close, lookback=14):
    plus_dm = high.diff()
    minus_dm = low.diff()
    plus_dm[plus_dm < 0] = 0
    minus_dm[minus_dm > 0] = 0
    
    tr1 = pd.DataFrame(high - low)
    tr2 = pd.DataFrame(abs(high - close.shift(1)))
    tr3 = pd.DataFrame(abs(low - close.shift(1)))
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis = 1, join = 'inner').max(axis = 1)
    atr = tr.rolling(lookback).mean()
    
    plus_di = 100 * (plus_dm.ewm(alpha = 1/lookback).mean() / atr)
    minus_di = abs(100 * (minus_dm.ewm(alpha = 1/lookback).mean() / atr))
    dx = (abs(plus_di - minus_di) / abs(plus_di + minus_di)) * 100
    adx = ((dx.shift(1) * (lookback - 1)) + dx) / lookback
    adx_smooth = adx.ewm(alpha = 1/lookback).mean()

    return adx_smooth


def stoch(high, low, close, lookback=14):
    high_lookback = high.rolling(lookback).max()
    low_lookback = low.rolling(lookback).min()
    slow_k = (close - low_lookback) * 100 / (high_lookback - low_lookback)
    slow_d = slow_k.rolling(3).mean()

    return slow_k, slow_d

def stoch_rsi(data, k_window=3, d_window=3, window=14):
    min_val = data.rolling(window=window, center=False).min()
    max_val = data.rolling(window=window, center=False).max()

    stoch = ((data - min_val) / (max_val - min_val)) * 100

    slow_k = stoch.rolling(window=k_window, center=False).mean()

    slow_d = slow_k.rolling(window=d_window, center=False).mean()

    return slow_k, slow_d

def n_macd(macd, macdsignal, lookback=50):
    n_macd = 2 * (((macd - macd.rolling(lookback).min()) / (macd.rolling(lookback).max() - macd.rolling(lookback).min()))) - 1
    n_macdsignal = 2 * (((macdsignal - macdsignal.rolling(lookback).min()) / (macdsignal.rolling(lookback).max() - macdsignal.rolling(lookback).min()))) - 1

    return n_macd, n_macdsignal

def chop(df, lookback=14):
    atr1 = atr(df, lookback=1)
    high, low = df['Mid_High'], df['Mid_Low']

    chop = np.log10(atr1.rolling(lookback).sum() / (high.rolling(lookback).max() - low.rolling(lookback).min())) / np.log10(lookback)

    return chop

def vo(volume, short_lookback=5, long_lookback=10):
    short_ema =  pd.Series.ewm(volume, span=short_lookback).mean()
    long_ema = pd.Series.ewm(volume, span=long_lookback).mean()

    volume_oscillator = (short_ema - long_ema) / long_ema

    return volume_oscillator

def bar_lengths(bar_lens, window=36):
    return bar_lens.rolling(window=window).mean(), bar_lens.rolling(window=window).std()

def sar_lengths(opens, sars, window=36):
    diffs = abs(opens - sars.shift(1))

    return diffs.rolling(window=window).mean(), diffs.rolling(window=window).std()

def supertrend(barsdata, atr_len=3, mult=3):
    curr_atr = atr(barsdata['Mid_High'], barsdata['Mid_Low'], barsdata['Mid_Close'], lookback=atr_len)
    highs, lows = barsdata['Mid_High'], barsdata['Mid_Low']
    hl2 = (highs + lows) / 2
    final_upperband = upper_band = hl2 + mult * curr_atr
    final_lowerband = lower_band = hl2 - mult * curr_atr

    # initialize Supertrend column to True
    supertrend = [True] * len(df)

    close = barsdata['Mid_Close']
    
    for i in range(1, len(df.index)):
        curr, prev = i, i - 1
        
        # if current close price crosses above upperband
        if close[curr] > final_upperband[prev]:
            supertrend[curr] = True

        # if current close price crosses below lowerband
        elif close[curr] < final_lowerband[prev]:
            supertrend[curr] = False

        # else, the trend continues
        else:
            supertrend[curr] = supertrend[prev]
            
            # adjustment to the final bands
            if supertrend[curr] == True and final_lowerband[curr] < final_lowerband[prev]:
                final_lowerband[curr] = final_lowerband[prev]

            if supertrend[curr] == False and final_upperband[curr] > final_upperband[prev]:
                final_upperband[curr] = final_upperband[prev]

    return supertrend, final_upperband, final_lowerband

# def supertrend(barsdata, atr_len=3, mult=3):
#     curr_atr = atr(barsdata['ha_high'], barsdata['ha_low'], barsdata['ha_close'], lookback=atr_len)
#     highs, lows = barsdata['ha_high'], barsdata['ha_low']
#     hl2 = (highs + lows) / 2
#     final_upperband = upper_band = hl2 + mult * curr_atr
#     final_lowerband = lower_band = hl2 - mult * curr_atr

#     # initialize Supertrend column to True
#     supertrend = [True] * len(df)

#     close = barsdata['ha_close']
    
#     for i in range(1, len(df.index)):
#         curr, prev = i, i - 1
        
#         # if current close price crosses above upperband
#         if close[curr] > final_upperband[prev]:
#             supertrend[curr] = True

#         # if current close price crosses below lowerband
#         elif close[curr] < final_lowerband[prev]:
#             supertrend[curr] = False

#         # else, the trend continues
#         else:
#             supertrend[curr] = supertrend[prev]
            
#             # adjustment to the final bands
#             if supertrend[curr] == True and final_lowerband[curr] < final_lowerband[prev]:
#                 final_lowerband[curr] = final_lowerband[prev]

#             if supertrend[curr] == False and final_upperband[curr] > final_upperband[prev]:
#                 final_upperband[curr] = final_upperband[prev]

#     return supertrend, final_upperband, final_lowerband

def heikin_ashi(opens, highs, lows, closes):
    ha_close = list((opens + highs + lows + closes) / 4)
    ha_opens = []

    opens_list, closes_list = list(opens), list(closes)

    for i in range(len(ha_close)):
        if i == 0:
            ha_opens.append((opens_list[i] + closes_list[i]) / 2)

        else:
            ha_opens.append((ha_opens[i - 1] + ha_close[i - 1]) / 2)

    ha_highs = list(pd.DataFrame({'ha_open': ha_opens, 'ha_close': ha_close, 'high': list(highs)}).max(axis=1))
    ha_lows = list(pd.DataFrame({'ha_open': ha_opens, 'ha_close': ha_close, 'low': list(lows)}).min(axis=1))

    return ha_opens, ha_highs, ha_lows, ha_close

def trend_indicator(opens, highs, lows, closes, ema_period=50, smoothing_period=10):
    ha_open, ha_high, ha_low, ha_close = heikin_ashi(opens, highs, lows, closes)

    ha_o_ema = pd.Series.ewm(pd.DataFrame({'ha_open': ha_open}), span=ema_period).mean()
    ha_h_ema = pd.Series.ewm(pd.DataFrame({'ha_high': ha_high}), span=ema_period).mean()
    ha_l_ema = pd.Series.ewm(pd.DataFrame({'ha_low': ha_low}), span=ema_period).mean()
    ha_c_ema = pd.Series.ewm(pd.DataFrame({'ha_close': ha_close}), span=ema_period).mean()

    return pd.Series.ewm(ha_o_ema, span=smoothing_period).mean(), pd.Series.ewm(ha_h_ema, span=smoothing_period).mean(), pd.Series.ewm(ha_l_ema, span=smoothing_period).mean(), pd.Series.ewm(ha_c_ema, span=smoothing_period).mean()

def qqe_mod(barsdata, rsi_period=6, smoothing=5, qqe_factor=5, qqe2_factor=1.61, threshold=3, mult=0.35, sma_length=50):
    wilders_period = rsi_period * 2 - 1

    curr_rsi = rsi(barsdata, periods=rsi_period)
    rsi_ema = pd.Series.ewm(curr_rsi, span=smoothing).mean()
    atr_rsi = abs(rsi_ema.shift(1) - rsi_ema)
    atr_rsi_ema = pd.Series.ewm(atr_rsi, span=wilders_period).mean()
    dar = pd.Series.ewm(atr_rsi_ema, span=wilders_period).mean() * qqe_factor

    newshortband = rsi_ema + dar
    newlongband = rsi_ema - dar

    rsi_ema_list = list(rsi_ema)

    longband = [0]
    for i in range(1, len(rsi_ema_list)):
        if rsi_ema_list[i - 1] > longband[i - 1] and rsi_ema_list[i] > longband[i - 1]:
            longband.append(max(longband[i - 1],newlongband[i]))

        else:
            longband.append(newlongband[i])

    shortband = [0]
    for i in range(1,len(rsi_ema_list)):
        if rsi_ema_list[i - 1] < shortband[i - 1] and rsi_ema_list[i] < shortband[i - 1]:
            shortband.append(min(shortband[i - 1],newshortband[i]))
            
        else:
            shortband.append(newshortband[i])

    longband = pd.Series(longband)
    shortband = pd.Series(shortband)

    trend = np.where(rsi_ema > longband.shift(1), 1, -1)    
    fastatrrsitl = pd.Series(np.where(trend == 1, longband, shortband))

    basis = (fastatrrsitl - 50).rolling(window=sma_length).mean()
    dev = (fastatrrsitl - 50).rolling(window=sma_length).std() * mult
    upper = basis + dev
    lower = basis - dev

    greenbar1 = rsi_ema - 50 > threshold
    greenbar2 = rsi_ema - 50 > upper
    redbar1 = rsi_ema - 50 < threshold
    redbar2 = rsi_ema - 50 < lower

    # uptrend = np.where((greenbar1 & greenbar2), True, False)
    # downtrend = np.where((redbar1 & redbar2), True, False)

    uptrend = np.where((greenbar2), True, False)
    downtrend = np.where((redbar2), True, False)

    return uptrend, downtrend

def williams_r(highs, lows, closes, length=21, ema_length=15):
    highest_highs = highs.rolling(window=length).max()
    lowest_lows = lows.rolling(window=length).min()

    willy = 100 * (closes - highest_highs) / (highest_highs - lowest_lows)
    willy_ema = pd.Series.ewm(willy, span=ema_length).mean()

    return willy, willy_ema

In [4]:
bid_ask_mid_prices_list = []

for df in dfs:
    # Add technical indicators (for additional features)
    df['ema200'] = pd.Series.ewm(df['Mid_Close'], span=200).mean()
    df['ema100'] = pd.Series.ewm(df['Mid_Close'], span=100).mean()

    df['atr'] = atr(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
    df['rsi'] = rsi(df['Mid_Close'])
    df['adx'] = adx(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
    df['macd'] = pd.Series.ewm(df['Mid_Close'], span=12).mean() - pd.Series.ewm(df['Mid_Close'], span=26).mean()
    df['macdsignal'] = pd.Series.ewm(df['macd'], span=9).mean()
    df['slowk_rsi'], df['slowd_rsi'] = stoch_rsi(df['rsi'])

    tups = [add_fractal(df, i) for i in range(df.shape[0])]
    key_levels, is_supports = [tup[0] for tup in tups], [tup[1] for tup in tups]
    df['key_level'], df['is_support'] = key_levels, is_supports
    df = df.fillna(method='ffill')

    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Extract the bid and ask prices and fractals and remove them from the df
    bid_ask_mid_prices = df[['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close', 'Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close']]
    df.drop(['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close', 'Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close', 'Volume'], axis=1, inplace=True)
    bid_ask_mid_prices_list.append(bid_ask_mid_prices)

    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

for df_long in df_longs:
    df_long['ema200'] = pd.Series.ewm(df_long['Mid_Close'], span=200).mean()
    df_long['ema100'] = pd.Series.ewm(df_long['Mid_Close'], span=100).mean()

    df_long['atr'] = atr(df_long['Mid_High'], df_long['Mid_Low'], df_long['Mid_Close'])
    df_long['rsi'] = rsi(df_long['Mid_Close'])
    df_long['adx'] = adx(df_long['Mid_High'], df_long['Mid_Low'], df_long['Mid_Close'])
    df_long['macd'] = pd.Series.ewm(df_long['Mid_Close'], span=12).mean() - pd.Series.ewm(df_long['Mid_Close'], span=26).mean()
    df_long['macdsignal'] = pd.Series.ewm(df_long['macd'], span=9).mean()
    df_long['slowk_rsi'], df_long['slowd_rsi'] = stoch_rsi(df_long['rsi'])

    tups = [add_fractal(df_long, i) for i in range(df_long.shape[0])]
    key_levels, is_supports = [tup[0] for tup in tups], [tup[1] for tup in tups]
    df_long['key_level'], df_long['is_support'] = key_levels, is_supports
    df_long = df_long.fillna(method='ffill')

    df_long.dropna(inplace=True)
    df_long.reset_index(drop=True, inplace=True)

    df_long.drop(['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close', 'Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close', 'Volume'], axis=1, inplace=True)

    df_long.dropna(inplace=True)
    df_long.reset_index(drop=True, inplace=True)

In [5]:
for i in range(len(dfs)):
    df, df_long = dfs[i], df_longs[i]

    df = pd.merge(df, df_long, how='left', on='Date')
    df.reset_index(drop=True, inplace=True)
    df = df.fillna(method='ffill')
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    dfs[i] = df

In [6]:
dfs[0].head()

Unnamed: 0,Date,Bid_Open_x,Bid_High_x,Bid_Low_x,Bid_Close_x,Ask_Open_x,Ask_High_x,Ask_Low_x,Ask_Close_x,Mid_Open_x,...,ema100_y,atr_y,rsi_y,adx_y,macd_y,macdsignal_y,slowk_rsi_y,slowd_rsi_y,key_level_y,is_support_y
0,2005-01-02 19:00:00,1.3564,1.3564,1.3564,1.3564,1.3574,1.3574,1.3574,1.3574,1.3569,...,1.298901,0.003416,61.639766,30.350035,0.009487,0.010028,37.370852,51.886872,1.357,0.0
1,2005-01-02 19:05:00,1.3565,1.3565,1.3564,1.3564,1.3575,1.3575,1.3574,1.3574,1.357,...,1.298901,0.003416,61.639766,30.350035,0.009487,0.010028,37.370852,51.886872,1.357,0.0
2,2005-01-02 19:10:00,1.3564,1.3564,1.3564,1.3564,1.3574,1.3574,1.3574,1.3574,1.3569,...,1.298901,0.003416,61.639766,30.350035,0.009487,0.010028,37.370852,51.886872,1.357,0.0
3,2005-01-02 19:15:00,1.356,1.3564,1.3559,1.3561,1.357,1.3574,1.3569,1.3571,1.3565,...,1.298901,0.003416,61.639766,30.350035,0.009487,0.010028,37.370852,51.886872,1.357,0.0
4,2005-01-02 19:20:00,1.3559,1.3563,1.3553,1.3554,1.3569,1.3573,1.3563,1.3564,1.3564,...,1.298901,0.003416,61.639766,30.350035,0.009487,0.010028,37.370852,51.886872,1.357,0.0


In [7]:
look_back_size = 250

In [8]:
def grab_image_data(subset):
  # rp_transformer = RecurrencePlot()
  # rp_subset = rp_transformer.transform(subset)

  # return rp_subset

  gasf_transformer = GramianAngularField(method='summation')
  gasf_subset = gasf_transformer.transform(subset)

  return gasf_subset

  # gadf_transformer = GramianAngularField(method='difference')
  # gadf_subset = gadf_transformer.transform(subset)

  # return gadf_subset

  # image_data = np.append(rp_subset, gasf_subset, axis=-1)
  # image_data = np.append(image_data, gadf_subset ,axis=-1)
  
  # return image_data

In [9]:
buys_list = []
sells_list = []
nones_list = []

value_per_pip = 1.0
amounts_per_day = [-0.00008, -0.0001, -0.00012]
spread_cutoff = 0.10
risk_reward_ratio = 1.5
each_bar = False
n_bars = 3
pip_movement = 20
use_pullback = True
lookback = n_bars + 1 if use_pullback else n_bars
lookforward = -1 if use_pullback else 0

def get_n_units(trade_type, stop_loss, ask_open, bid_open, mid_open, currency_pair):
    _, second = currency_pair.split('_')
  
    pips_to_risk = ask_open - stop_loss if trade_type == 'buy' else stop_loss - bid_open
    pips_to_risk_calc = pips_to_risk * 10000 if second != 'Jpy' else pips_to_risk * 100

    if second == 'Usd':
        per_pip = 0.0001

    else:
        per_pip = 0.0001 / mid_open if second != 'Jpy' else 0.01 / mid_open

    n_units = int(50 / (pips_to_risk_calc * per_pip))

    return n_units

def calculate_day_fees(start_date, end_date, n_units):
    curr_fee = np.random.choice(amounts_per_day, p=[0.25, 0.50, 0.25]) * n_units
    num_days = np.busday_count(start_date.date(), end_date.date())

    return num_days * curr_fee

for idx in range(len(currencies)):
    currency_pair, df, df_long, bid_ask_mid_prices = currencies[idx], dfs[idx], df_longs[idx], bid_ask_mid_prices_list[idx]
    rounding = 3 if 'Jpy' in currency_pair else 5
    pip_movement_to_use = pip_movement / 100 if 'Jpy' in currency_pair else pip_movement / 10000
    buys, sells, nones = [], [], []
    trade, prev_year = None, None

    print(f'RUNNING SIMULATION FOR {currency_pair}...')

    for i in range(look_back_size, len(df)):
        curr_date = df.loc[df.index[i], 'Date']

        if prev_year is None or curr_date.year > prev_year:
            prev_year = curr_date.year
            print(prev_year)
            print(f'Buys: {len(buys)}')
            print(f'Sells: {len(sells)}')
            print(f'Nones: {len(nones)}\n')

        curr_bid_open, curr_bid_high, curr_bid_low, curr_ask_open, curr_ask_high, curr_ask_low, curr_mid_open = bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Mid_Open']]
        spread = abs(curr_ask_open - curr_bid_open)

        mid_opens = list(bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i - lookback:i + lookforward], 'Mid_Open'])
        mid_highs = list(bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i - lookback:i + lookforward], 'Mid_High'])
        mid_lows = list(bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i - lookback:i + lookforward], 'Mid_Low'])
        mid_closes = list(bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i - lookback:i + lookforward], 'Mid_Close'])

        if each_bar:
            buy_signal = all([mid_opens[j] < mid_closes[j] and abs(mid_opens[j] - mid_closes[j]) >= pip_movement_to_use for j in range(len(mid_opens))])
            sell_signal = all([mid_opens[j] > mid_closes[j] and abs(mid_opens[j] - mid_closes[j]) >= pip_movement_to_use for j in range(len(mid_opens))])

        else:
            buy_signal = all([mid_opens[j] < mid_closes[j] for j in range(len(mid_opens))]) and abs(mid_opens[0] - mid_closes[-1]) >= pip_movement_to_use
            sell_signal = all([mid_opens[j] > mid_closes[j] for j in range(len(mid_opens))]) and abs(mid_opens[0] - mid_closes[-1]) >= pip_movement_to_use

        if use_pullback and buy_signal:
            mid_open1, mid_high1, mid_low1, mid_close1 = bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i - 1], ['Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close']]
            buy_signal = mid_open1 > mid_close1 and abs(mid_close1 - mid_open1) <= 0.25 * abs(mid_high1 - mid_low1)

        if use_pullback and sell_signal:
            mid_open1, mid_high1, mid_low1, mid_close1 = bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i - 1], ['Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close']]
            sell_signal = mid_open1 < mid_close1 and abs(mid_close1 - mid_open1) <= 0.25 * abs(mid_high1 - mid_low1)

        highest_high, lowest_low = max(mid_highs), min(mid_lows)

        if trade is None:
            if buy_signal:
                open_price = float(curr_ask_open)
                stop_loss = round(lowest_low, rounding)

                if stop_loss < open_price:
                    curr_pips_to_risk = open_price - stop_loss

                    if spread <= curr_pips_to_risk * spread_cutoff:
                        stop_gain = round(open_price + (curr_pips_to_risk * risk_reward_ratio), rounding)

                        n_units = get_n_units('buy', stop_loss, curr_ask_open, curr_bid_open, curr_mid_open, currency_pair)

                        trade = {'open_price': open_price, 'trade_type': 'buy', 'stop_loss': stop_loss,
                                                        'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                                                        'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

            elif sell_signal:
                open_price = float(curr_bid_open)
                stop_loss = round(highest_high, rounding)

                if stop_loss > open_price:
                    curr_pips_to_risk = stop_loss - open_price

                    if spread <= curr_pips_to_risk * spread_cutoff:
                        stop_gain = round(open_price - (curr_pips_to_risk * risk_reward_ratio), rounding)

                        n_units = get_n_units('sell', stop_loss, curr_ask_open, curr_bid_open, curr_mid_open, currency_pair)

                        trade = {'open_price': open_price, 'trade_type': 'sell', 'stop_loss': stop_loss,
                                'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                                'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

        if trade is not None:
            for j in range(i, len(df)):
                curr_date = df.loc[df.index[j], 'Date']
                curr_bid_open, curr_bid_high, curr_bid_low, curr_bid_close, curr_ask_open, curr_ask_high, curr_ask_low, curr_ask_close = bid_ask_mid_prices.loc[bid_ask_mid_prices.index[j], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close']]

                if trade['trade_type'] == 'buy' and curr_bid_low <= trade['stop_loss']:
                    nones.append(trade['start_date']) 

                    trade = None
                    break


                if trade['trade_type'] == 'buy' and curr_bid_high >= trade['stop_gain']:
                    trade_amount = (trade['stop_gain'] - trade['open_price']) * trade['n_units'] * value_per_pip
                    day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

                    if trade_amount + day_fees > 0:
                        buys.append(trade['start_date'])

                    trade = None
                    break

                if trade['trade_type'] == 'sell' and curr_ask_high >= trade['stop_loss']:
                    nones.append(trade['start_date'])

                    trade = None
                    break

                if trade['trade_type'] == 'sell' and curr_ask_low <= trade['stop_gain']:
                    trade_amount = (trade['open_price'] - trade['stop_gain']) * trade['n_units'] * value_per_pip
                    day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

                    if trade_amount + day_fees > 0:
                        sells.append(trade['start_date'])

                    trade = None
                    break
    
    buys_list.append(buys)
    sells_list.append(sells)
    nones_list.append(nones)

    print(f'Buys: {len(buys)}')
    print(f'Sells: {len(sells)}')
    print(f'Nones: {len(nones)}\n')

RUNNING SIMULATION FOR Eur_Usd...
2005
Buys: 0
Sells: 0
Nones: 0

2006
Buys: 20
Sells: 12
Nones: 53

2007
Buys: 33
Sells: 25
Nones: 87

2008
Buys: 43
Sells: 38
Nones: 114

2009
Buys: 124
Sells: 119
Nones: 372

2010
Buys: 192
Sells: 181
Nones: 630

2011
Buys: 233
Sells: 245
Nones: 780

2012
Buys: 296
Sells: 318
Nones: 965

2013
Buys: 314
Sells: 336
Nones: 1036

2014
Buys: 327
Sells: 359
Nones: 1103

2015
Buys: 332
Sells: 370
Nones: 1132

2016
Buys: 356
Sells: 405
Nones: 1214

2017
Buys: 366
Sells: 419
Nones: 1249

2018
Buys: 374
Sells: 422
Nones: 1274

2019
Buys: 381
Sells: 428
Nones: 1305

2020
Buys: 383
Sells: 433
Nones: 1316

2021
Buys: 396
Sells: 445
Nones: 1358

2022
Buys: 403
Sells: 449
Nones: 1364

Buys: 411
Sells: 456
Nones: 1386



In [10]:
buy_indices_list = []
sell_indices_list = []
nones_indices_list = []

for i in range(len(dfs)):
    buys, sells, nones, df = buys_list[i], sells_list[i], nones_list[i], dfs[i]
    
    buy_indices = [df.index[df['Date'] == curr_date] - 1 for curr_date in buys]
    sell_indices = [df.index[df['Date'] == curr_date] - 1 for curr_date in sells]
    nones_indices = [df.index[df['Date'] == curr_date] - 1 for curr_date in nones]

    buy_indices_list.append(buy_indices)
    sell_indices_list.append(sell_indices)
    nones_indices_list.append(nones_indices)

    print(len(buy_indices))
    print(len(sell_indices))
    print(len(nones_indices))

411
456
1386


In [11]:
for df in dfs:
    df.drop(['Bid_Open_x', 'Bid_High_x', 'Bid_Low_x', 'Bid_Close_x', 'Ask_Open_x', 'Ask_High_x', 'Ask_Low_x', 'Ask_Close_x', 'Mid_Open_x', 'Mid_High_x', 'Mid_Low_x', 'Mid_Close_x', 'Volume_x'], axis=1, inplace=True)
    df.drop(['Bid_Open_y', 'Bid_High_y', 'Bid_Low_y', 'Bid_Close_y', 'Ask_Open_y', 'Ask_High_y', 'Ask_Low_y', 'Ask_Close_y', 'Mid_Open_y', 'Mid_High_y', 'Mid_Low_y', 'Mid_Close_y', 'Volume_y'], axis=1, inplace=True)

In [12]:
# i = 5000
# foo = dfs[0].iloc[i - look_back_size + 1:i + 1, 1:]
# curr_date = dfs[0].iloc[i, 0]
# curr_long = df_longs[0].loc[df_longs[0]['Date'] < curr_date]
# foo2 = curr_long.iloc[-look_back_size - 1:-1, 1:]
# foo3 = pd.concat([foo.reset_index(drop=True), foo2.reset_index(drop=True)], axis=1, ignore_index=True)
# correct_shape = grab_image_data(foo3).shape
# correct_shape

i = 5000
foo = dfs[0].iloc[i - look_back_size + 1:i + 1, 1:]
correct_shape = grab_image_data(foo).shape
correct_shape

(250, 22, 22)

In [13]:
# def get_sequential_data():
#     no_actions = []
#     buys = []
#     sells = []

#     for z in range(len(dfs)):
#         df, df_long = dfs[z], df_longs[z]

#         buy_indices, sell_indices, nones_indices = buy_indices_list[z], sell_indices_list[z], nones_indices_list[z]

#         for i in buy_indices:
#             if len(i) == 1:
#                 i = i[0]
#                 seq1 = df.iloc[i - look_back_size + 1:i + 1, 1:]
#                 curr_date = df.iloc[i, 0]
#                 curr_long = df_long.loc[df_long['Date'] < curr_date]
#                 seq2 = curr_long.iloc[-look_back_size - 1:-1, 1:]
#                 seq = pd.concat([seq1.reset_index(drop=True), seq2.reset_index(drop=True)], axis=1, ignore_index=True)

#                 if seq.shape == correct_shape[:-1] and not seq.isnull().values.any():
#                     seq = grab_image_data(seq)
#                     buys.append([seq, np.array([0, 1, 0])])

#         for i in sell_indices:
#             if len(i) == 1:
#                 i = i[0]
#                 seq1 = df.iloc[i - look_back_size + 1:i + 1, 1:]
#                 curr_date = df.iloc[i, 0]
#                 curr_long = df_long.loc[df_long['Date'] < curr_date]
#                 seq2 = curr_long.iloc[-look_back_size - 1:-1, 1:]
#                 seq = pd.concat([seq1.reset_index(drop=True), seq2.reset_index(drop=True)], axis=1, ignore_index=True)

#                 if seq.shape == correct_shape[:-1] and not seq.isnull().values.any():
#                     seq = grab_image_data(seq)
#                     sells.append([seq, np.array([0, 0, 1])])

#         for i in nones_indices:
#             if len(i) == 1:
#                 i = i[0]
#                 seq1 = df.iloc[i - look_back_size + 1:i + 1, 1:]
#                 curr_date = df.iloc[i, 0]
#                 curr_long = df_long.loc[df_long['Date'] < curr_date]
#                 seq2 = curr_long.iloc[-look_back_size - 1:-1, 1:]
#                 seq = pd.concat([seq1.reset_index(drop=True), seq2.reset_index(drop=True)], axis=1, ignore_index=True)

#                 if seq.shape == correct_shape[:-1] and not seq.isnull().values.any():
#                     seq = grab_image_data(seq)
#                     no_actions.append([seq, np.array([1, 0, 0])])

#     np.random.shuffle(no_actions)
#     np.random.shuffle(buys)
#     np.random.shuffle(sells)

#     lower = min(len(no_actions), len(buys), len(sells))

#     no_actions = no_actions[:int(lower * 1.2)]

#     sequential_data = no_actions + buys + sells
#     np.random.shuffle(sequential_data)

#     return sequential_data

def get_sequential_data():
    no_actions = []
    buys = []
    sells = []

    for z in range(len(dfs)):
        df, df_long = dfs[z], df_longs[z]

        buy_indices, sell_indices, nones_indices = buy_indices_list[z], sell_indices_list[z], nones_indices_list[z]

        for i in buy_indices:
            if len(i) == 1:
                i = i[0]
                seq = df.iloc[i - look_back_size + 1:i + 1, 1:]

                if seq.shape == correct_shape[:-1] and not seq.isnull().values.any():
                    seq = grab_image_data(seq)
                    buys.append([seq, np.array([0, 1, 0])])

        for i in sell_indices:
            if len(i) == 1:
                i = i[0]
                seq = df.iloc[i - look_back_size + 1:i + 1, 1:]

                if seq.shape == correct_shape[:-1] and not seq.isnull().values.any():
                    seq = grab_image_data(seq)
                    sells.append([seq, np.array([0, 0, 1])])

        for i in nones_indices:
            if len(i) == 1:
                i = i[0]
                seq = df.iloc[i - look_back_size + 1:i + 1, 1:]

                if seq.shape == correct_shape[:-1] and not seq.isnull().values.any():
                    seq = grab_image_data(seq)
                    no_actions.append([seq, np.array([1, 0, 0])])

    np.random.shuffle(no_actions)
    np.random.shuffle(buys)
    np.random.shuffle(sells)

    lower = min(len(no_actions), len(buys), len(sells))

    no_actions = no_actions[:int(lower * 1.1)]

    sequential_data = no_actions + buys + sells
    np.random.shuffle(sequential_data)

    return sequential_data

In [14]:
sequential_data = get_sequential_data()

In [15]:
len(sequential_data)

1319

In [16]:
training_proportion = 0.70
train_test_cutoff_index = int(len(sequential_data) * training_proportion)

train_set = sequential_data[0:train_test_cutoff_index]
test_set = sequential_data[train_test_cutoff_index:]

print('Dataset shapes:')
print(len(train_set))
print(len(test_set))

Dataset shapes:
923
396


In [17]:
x_train = []
y_train = []

for seq, target in train_set:
  x_train.append(seq)
  y_train.append(target)

x_test = []
y_test = []

for seq, target in test_set:
  x_test.append(seq)
  y_test.append(target)

x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [18]:
x_train.shape

(923, 250, 22, 22)

In [19]:
y_train.shape

(923, 3)

In [21]:
# # Number of possible actions to take - determines the output dimension of the
# #  neural network
# n_actions = 3
# input_data_shape = x_train.shape[1:]

# model = Sequential()

# model.add(Conv2D(filters = 32, kernel_size = (3,3), padding ='Same', activation ='relu', input_shape = input_data_shape))
# model.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation ='relu'))

# model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
# model.add(Dropout(0.25))

# model.add(Flatten())
# model.add(Dense(128, activation = "relu"))
# model.add(Dropout(0.5))
# model.add(Dense(n_actions, activation = "softmax"))

n_actions = 3
input_data_shape = x_train.shape[1:]

model = Sequential()

model.add(Conv2D(filters = 16, kernel_size = (3,3), padding ='Same', activation ='relu', input_shape = input_data_shape))
model.add(Conv2D(filters = 32, kernel_size = (3,3), padding = 'Same', activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation ='relu'))

model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
# model.add(Dense(128, activation = "relu"))
# model.add(Dropout(0.5))
# model.add(Dense(128, activation = "relu"))
# model.add(Dropout(0.5))
model.add(Dense(16, activation = "relu"))
model.add(Dropout(0.25))
# model.add(Dense(32, activation = "relu"))
model.add(Dense(n_actions, activation = "softmax"))

In [22]:
# Hyperparameters
n_epochs = 500
batch_size = 32
n_steps = len(x_train) // batch_size 
mean_loss = tf.keras.metrics.Mean() 
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.categorical_crossentropy
metrics = [tf.keras.metrics.CategoricalAccuracy()]
# loss_fn = tf.keras.losses.MeanAbsoluteError
# metrics = [tf.keras.metrics.MeanAbsoluteError()]

In [23]:
early_stop = EarlyStopping(monitor='val_accuracy', verbose=1, patience=n_epochs)
model_checkpoint = ModelCheckpoint(f'/Users/mymac/forex_bar_movement_cnn_{currencies[0]}', monitor='val_accuracy', save_best_only=True, verbose=1)

optimizer = Adam()

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [24]:
history = model.fit(
    x_train, y_train,
    batch_size=batch_size,
    epochs=n_epochs,
    validation_data=(x_test, y_test),
    callbacks=[early_stop, model_checkpoint]
)

Epoch 1/500
Epoch 00001: val_accuracy improved from -inf to 0.32071, saving model to /Users/mymac/forex_bar_movement_cnn_Eur_Usd
INFO:tensorflow:Assets written to: /Users/mymac/forex_bar_movement_cnn_Eur_Usd/assets
Epoch 2/500
Epoch 00002: val_accuracy did not improve from 0.32071
Epoch 3/500
Epoch 00003: val_accuracy did not improve from 0.32071
Epoch 4/500
Epoch 00004: val_accuracy did not improve from 0.32071
Epoch 5/500
Epoch 00005: val_accuracy did not improve from 0.32071
Epoch 6/500
Epoch 00006: val_accuracy did not improve from 0.32071
Epoch 7/500
Epoch 00007: val_accuracy did not improve from 0.32071
Epoch 8/500
Epoch 00008: val_accuracy did not improve from 0.32071
Epoch 9/500
Epoch 00009: val_accuracy did not improve from 0.32071
Epoch 10/500
Epoch 00010: val_accuracy did not improve from 0.32071
Epoch 11/500
Epoch 00011: val_accuracy did not improve from 0.32071
Epoch 12/500
Epoch 00012: val_accuracy did not improve from 0.32071
Epoch 13/500
Epoch 00013: val_accuracy did no

KeyboardInterrupt: 