In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import math
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from pickle import dump, load
import json
import time
from collections import deque
import warnings
from pickle import dump, load

file_path = '/Users/mymac/Google Drive/My Drive/Forex_Robot/'

In [2]:
news = pd.read_csv(file_path + 'events_2018-2022.csv')
news = news.rename(columns={'Start': 'Date'})
news.Date = pd.to_datetime(news.Date)
news = news.loc[(news['Currency'] == 'AUD') | (news['Currency'] == 'USD')]
news.drop(['Id', 'Name', 'Currency'], axis=1, inplace=True)
news.drop(news[(news['Impact'] != 'LOW') & (news['Impact'] != 'MEDIUM') & (news['Impact'] != 'HIGH')].index, inplace=True)
news.loc[news['Impact'] == 'LOW', 'Impact'] = 1
news.loc[news['Impact'] == 'MEDIUM', 'Impact'] = 2
news.loc[news['Impact'] == 'HIGH', 'Impact'] = 3
news = news.groupby('Date')['Impact'].mean().reset_index()
news.head()

Unnamed: 0,Date,Impact
0,2018-01-01 22:30:00,2.0
1,2018-01-02 05:30:00,1.0
2,2018-01-02 14:45:00,2.0
3,2018-01-02 16:30:00,1.0
4,2018-01-02 18:00:00,1.0


In [3]:
df_files = [file_path + 'Oanda_Aud_Usd_M5_2018-2022.csv', file_path + 'Oanda_Gbp_Usd_M5_2018-2022.csv', file_path + 'Oanda_Nzd_Usd_M5_2018-2022.csv', file_path + 'Oanda_Usd_Cad_M5_2018-2022.csv', file_path + 'Oanda_Eur_Usd_M5_2018-2022.csv']
dfs = []

for df_file in df_files:
    df = pd.read_csv(df_file)
    df.Date = pd.to_datetime(df.Date)
    df.reset_index(drop=True, inplace=True)

    df = pd.merge(df, news, how='left', on='Date')
    df.reset_index(drop=True, inplace=True)
    df = df.fillna(method='ffill')
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    dfs.append(df)

In [4]:
for df in dfs:
    # Add hour of day, day of week, and month of year (for additional features)
    # Use sine and cosine to keep the cyclic nature of hour, day, and month
    #  (December is closer to January than October, midnight is closer to 1 am than
    #  10 pm, etc.)
    df['sin_hour'] = np.sin(2 * np.pi * df['Date'].dt.hour / 24)
    df['cos_hour'] = np.cos(2 * np.pi * df['Date'].dt.hour / 24)
    df['sin_day'] = np.sin(2 * np.pi * df['Date'].dt.day / 7)
    df['cos_day'] = np.cos(2 * np.pi * df['Date'].dt.day / 7)

In [5]:
def psar(barsdata, iaf=0.02, maxaf=0.2):
    length = len(barsdata)
    high = list(barsdata['Mid_High'])
    low = list(barsdata['Mid_Low'])
    close = list(barsdata['Mid_Close'])
    psar = close[0:len(close)]
    bull = True
    af = iaf
    hp = high[0]
    lp = low[0]
    for i in range(2, length):
        if bull:
            psar[i] = psar[i - 1] + af * (hp - psar[i - 1])
        else:
            psar[i] = psar[i - 1] + af * (lp - psar[i - 1])
        reverse = False
        if bull:
            if low[i] < psar[i]:
                bull = False
                reverse = True
                psar[i] = hp
                lp = low[i]
                af = iaf
        else:
            if high[i] > psar[i]:
                bull = True
                reverse = True
                psar[i] = lp
                hp = high[i]
                af = iaf
        if not reverse:
            if bull:
                if high[i] > hp:
                    hp = high[i]
                    af = min(af + iaf, maxaf)
                if low[i - 1] < psar[i]:
                    psar[i] = low[i - 1]
                if low[i - 2] < psar[i]:
                    psar[i] = low[i - 2]
            else:
                if low[i] < lp:
                    lp = low[i]
                    af = min(af + iaf, maxaf)
                if high[i - 1] > psar[i]:
                    psar[i] = high[i - 1]
                if high[i - 2] > psar[i]:
                    psar[i] = high[i - 2]
    return psar

def atr(barsdata, lookback=14):
    high_low = barsdata['Mid_High'] - barsdata['Mid_Low']
    high_close = np.abs(
        barsdata['Mid_High'] - barsdata['Mid_Close'].shift())
    low_close = np.abs(
        barsdata['Mid_Low'] - barsdata['Mid_Close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)

    return true_range.rolling(lookback).sum() / lookback

def rsi(barsdata, periods=14):
    close_delta = barsdata['Mid_Close'].diff()

    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    ma_up = up.ewm(com=periods - 1, adjust=True,
                    min_periods=periods).mean()
    ma_down = down.ewm(com=periods - 1, adjust=True,
                        min_periods=periods).mean()

    rsi = ma_up / ma_down
    rsi = 100 - (100/(1 + rsi))

    return rsi

def adx(high, low, close, lookback=14):
    plus_dm = high.diff()
    minus_dm = low.diff()
    plus_dm[plus_dm < 0] = 0
    minus_dm[minus_dm > 0] = 0

    tr1 = pd.DataFrame(high - low)
    tr2 = pd.DataFrame(abs(high - close.shift(1)))
    tr3 = pd.DataFrame(abs(low - close.shift(1)))
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis=1, join='inner').max(axis=1)
    atr = tr.rolling(lookback).mean()

    plus_di = 100 * (plus_dm.ewm(alpha=1/lookback).mean() / atr)
    minus_di = abs(100 * (minus_dm.ewm(alpha=1/lookback).mean() / atr))
    dx = (abs(plus_di - minus_di) / abs(plus_di + minus_di)) * 100
    adx = ((dx.shift(1) * (lookback - 1)) + dx) / lookback
    adx_smooth = adx.ewm(alpha=1/lookback).mean()

    return adx_smooth

def stoch(high, low, close, lookback=14):
    high_lookback = high.rolling(lookback).max()
    low_lookback = low.rolling(lookback).min()
    slow_k = (close - low_lookback) * 100 / \
        (high_lookback - low_lookback)
    slow_d = slow_k.rolling(3).mean()

    return slow_k, slow_d

def stoch_rsi(data, k_window=3, d_window=3, window=14):
    min_val = data.rolling(window=window, center=False).min()
    max_val = data.rolling(window=window, center=False).max()

    stoch = ((data - min_val) / (max_val - min_val)) * 100

    slow_k = stoch.rolling(window=k_window, center=False).mean()

    slow_d = slow_k.rolling(window=d_window, center=False).mean()

    return slow_k, slow_d

def n_macd(macd, macdsignal, lookback=50):
    n_macd = 2 * (((macd - macd.rolling(lookback).min()) /
                    (macd.rolling(lookback).max() - macd.rolling(lookback).min()))) - 1
    n_macdsignal = 2 * (((macdsignal - macdsignal.rolling(lookback).min()) / (
        macdsignal.rolling(lookback).max() - macdsignal.rolling(lookback).min()))) - 1

    return n_macd, n_macdsignal

def chop(df, lookback=14):
    atr1 = atr(df, lookback=1)
    high, low = df['Mid_High'], df['Mid_Low']

    chop = np.log10(atr1.rolling(lookback).sum(
    ) / (high.rolling(lookback).max() - low.rolling(lookback).min())) / np.log10(lookback)

    return chop

def vo(volume, short_lookback=18, long_lookback=36):
    short_ema = pd.Series.ewm(volume, span=short_lookback).mean()
    long_ema = pd.Series.ewm(volume, span=long_lookback).mean()

    volume_oscillator = (short_ema - long_ema) / long_ema

    return volume_oscillator

In [6]:
bid_ask_mid_prices_list = []

for df in dfs:
    # Add technical indicators (for additional features)
    df['ema200'] = pd.Series.ewm(df['Mid_Close'], span=200).mean()
    df['ema100'] = pd.Series.ewm(df['Mid_Close'], span=100).mean()
    df['ema50'] = pd.Series.ewm(df['Mid_Close'], span=50).mean()
    df['ema25'] = pd.Series.ewm(df['Mid_Close'], span=25).mean()

    df['atr'] = atr(df)
    df['rsi'] = rsi(df)
    df['rsi_sma'] = df['rsi'].rolling(50).mean()
    df['adx'] = adx(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
    df['macd'] = pd.Series.ewm(df['Mid_Close'], span=12).mean() - pd.Series.ewm(df['Mid_Close'], span=26).mean()
    df['macdsignal'] = pd.Series.ewm(df['macd'], span=9).mean()
    df['slowk'], df['slowd'] = stoch(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])

    df['sar'] = psar(df)

    df['chop36'] = chop(df, lookback=36)

    df['vo'] = vo(df['Volume'])

    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Extract the bid and ask prices and fractals and remove them from the df
    bid_ask_mid_prices = df[['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close', 'Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close']]
    df.drop(['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close'], axis=1, inplace=True)

    bid_ask_mid_prices_list.append(bid_ask_mid_prices)


In [7]:
buys_list = []
sells_list = []
nones_list = []

value_per_pip = 1.0
amounts_per_day = [-0.00008, -0.0001, -0.00012]
spread_cutoff = 0.00025
risk_reward_ratio = 1.5
trade = None

df_currencies = ['AUD_USD', 'GBP_USD', 'NZD_USD', 'USD_CAD', 'EUR_USD']

def calculate_day_fees(start_date, end_date, n_units):
    curr_fee = np.random.choice(amounts_per_day, p=[0.25, 0.50, 0.25]) * n_units
    num_days = np.busday_count(start_date.date(), end_date.date())

    return num_days * curr_fee

def get_n_units(trade_type, stop_loss, ask_open, bid_open, mid_open, currency_pair):
    _, second = currency_pair.split('_')
  
    pips_to_risk = ask_open - stop_loss if trade_type == 'buy' else stop_loss - bid_open
    pips_to_risk_calc = pips_to_risk * 10000 if second != 'JPY' else pips_to_risk * 100

    if second == 'USD':
        per_pip = 0.0001

    else:
        per_pip = 0.0001 / mid_open if second != 'JPY' else 0.01 / mid_open

    n_units = int(50 / (pips_to_risk_calc * per_pip))

    return n_units

for idx in range(len(dfs) - 1):
    df = dfs[idx]
    bid_ask_mid_prices = bid_ask_mid_prices_list[idx]
    buys = []
    sells = []
    nones = []

    for i in range(12, len(df)):
        curr_date = df.loc[df.index[i], 'Date']
        ema200_2, ema100_2, ema50_2, ema25_2, atr2, sar2, rsi2, rsi_sma2, mid_open2, mid_close2, mid_low2, mid_high2, vo2 = df.loc[df.index[i - 2], ['ema200', 'ema100', 'ema50', 'ema25', 'atr', 'sar', 'rsi', 'rsi_sma', 'Mid_Open', 'Mid_Close', 'Mid_Low', 'Mid_High', 'vo']]
        ema200_1, ema100_1, ema50_1, ema25_1, atr1, sar1, rsi1, rsi_sma1, mid_open1, mid_close1, mid_low1, mid_high1, vo1 = df.loc[df.index[i - 1], ['ema200', 'ema100', 'ema50', 'ema25', 'atr', 'sar', 'rsi', 'rsi_sma', 'Mid_Open', 'Mid_Close', 'Mid_Low', 'Mid_High', 'vo']]
        curr_ao = bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i], 'Ask_Open']
        curr_bo = bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i], 'Bid_Open']
        impact = df.loc[df.index[i - 1], 'Impact']
        spread = abs(curr_ao - curr_bo)
        enough_volatility = spread <= spread_cutoff
        macd2, macdsignal2 = df.loc[df.index[i - 2], ['macd', 'macdsignal']]
        macd1, macdsignal1 = df.loc[df.index[i - 1], ['macd', 'macdsignal']]
        macd_vals = [0, macd2, macdsignal2, macd1, macdsignal1]
        curr_bid_open, curr_bid_high, curr_bid_low, curr_bid_close, curr_ask_open, curr_ask_high, curr_ask_low, curr_ask_close, curr_mid_open = \
            bid_ask_mid_prices.loc[bid_ask_mid_prices.index[i], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High',
                                'Ask_Low', 'Ask_Close', 'Mid_Open']]
        chop2 = df.loc[df.index[i - 2], 'chop36']
        chop1 = df.loc[df.index[i - 1], 'chop36']

        emas_buy_signal = ema200_1 < mid_close1
        emas_sell_signal = ema200_1 > mid_close1

        # macd_buy_signal = macd2 < macdsignal2 and macd1 > macdsignal1 and max(macd_vals) == 0
        # macd_sell_signal = macd2 > macdsignal2 and macd1 < macdsignal1 and min(macd_vals) == 0
        macd_buy_signal = macd2 < macdsignal2 and macd1 > macdsignal1 and max(macd_vals) == 0
        macd_sell_signal = macd2 > macdsignal2 and macd1 < macdsignal1 and min(macd_vals) == 0

        sar_buy_signal = sar1 < min([mid_open1, mid_close1])
        sar_sell_signal = sar1 > max([mid_open1, mid_close1])

        rsi_buy_signal = rsi1 > rsi_sma1
        rsi_sell_signal = rsi1 < rsi_sma1

        # chop_signal = max([chop2, chop1]) < 0.55
        # chop_signal = chop1 < 0.50

        vo_signal = min([vo2, vo1]) > 0.0

        if trade is None:
            if macd_buy_signal and emas_buy_signal and sar_buy_signal and rsi_buy_signal and enough_volatility and vo_signal:
                open_price = float(curr_ao)
                pullback = sar1

                stop_loss = round(pullback, 5)

                if stop_loss < open_price:
                    curr_pips_to_risk = open_price - stop_loss

                    stop_gain = round(open_price + (curr_pips_to_risk * risk_reward_ratio), 5)

                    n_units = get_n_units('buy', stop_loss, curr_ask_open, curr_bid_open, curr_mid_open, df_currencies[idx])

                    trade = {'open_price': open_price, 'trade_type': 'buy', 'stop_loss': stop_loss,
                                                    'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                                                    'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

            elif macd_sell_signal and emas_sell_signal and sar_sell_signal and rsi_sell_signal and enough_volatility and vo_signal:
                open_price = float(curr_bo)
                pullback = sar1

                stop_loss = round(pullback, 5)

                if stop_loss > open_price:
                    curr_pips_to_risk = stop_loss - open_price

                    stop_gain = round(open_price - (curr_pips_to_risk * risk_reward_ratio), 5)

                    n_units = get_n_units('sell', stop_loss, curr_ask_open, curr_bid_open, curr_mid_open, df_currencies[idx])

                    trade = {'open_price': open_price, 'trade_type': 'sell', 'stop_loss': stop_loss,
                            'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                            'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

        if trade is not None:
            for j in range(i, len(df)):
                curr_date = df.loc[df.index[j], 'Date']
                curr_bid_open, curr_bid_high, curr_bid_low, curr_bid_close, curr_ask_open, curr_ask_high, curr_ask_low, curr_ask_close = bid_ask_mid_prices.loc[bid_ask_mid_prices.index[j], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close']]

                if trade['trade_type'] == 'buy' and curr_bid_low <= trade['stop_loss']:
                    nones.append(trade['start_date'])  

                    trade = None
                    break

                if trade['trade_type'] == 'buy' and curr_bid_high >= trade['stop_gain']:
                    trade_amount = 50 * risk_reward_ratio
                    day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

                    if trade_amount + day_fees > 0:
                        buys.append(trade['start_date'])

                    trade = None
                    break

                if trade['trade_type'] == 'sell' and curr_ask_high >= trade['stop_loss']:
                    nones.append(trade['start_date'])

                    trade = None
                    break

                if trade['trade_type'] == 'sell' and curr_ask_low <= trade['stop_gain']:
                    trade_amount = 50 * risk_reward_ratio
                    day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

                    if trade_amount + day_fees > 0:
                        sells.append(trade['start_date'])

                    trade = None
                    break

    buys_list.append(buys)
    sells_list.append(sells)
    nones_list.append(nones)

In [13]:
buy_indices_list = []
sell_indices_list = []
nones_indices_list = []

for i in range(len(dfs) - 1):
    df = dfs[i]
    buys, sells, nones = buys_list[i], sells_list[i], nones_list[i]
    
    buy_indices = [df.index[df['Date'] == curr_date] - 1 for curr_date in buys]
    sell_indices = [df.index[df['Date'] == curr_date] - 1 for curr_date in sells]
    nones_indices = [df.index[df['Date'] == curr_date] - 1 for curr_date in nones]

    buy_indices_list.append(buy_indices)
    sell_indices_list.append(sell_indices)
    nones_indices_list.append(nones_indices)

    print(len(buy_indices))
    print(len(sell_indices))
    print(len(nones_indices))

175
178
606
167
169
598
136
142
602
161
178
734


In [14]:
def get_sequential_data():
    no_actions = []
    buys = []
    sells = []

    for z in range(len(dfs) - 1):
        df = dfs[z]
        buy_indices, sell_indices, nones_indices = buy_indices_list[z], sell_indices_list[z], nones_indices_list[z]

        for i in buy_indices:
            if len(i) == 1:
                idx = i[0]
                seq = df.iloc[idx, 1:]
                buys.append([seq, 'buy'])

        for i in sell_indices:
            if len(i) == 1:
                idx = i[0]
                seq = df.iloc[idx, 1:]
                sells.append([seq, 'sell'])

        for i in nones_indices:
            if len(i) == 1:
                idx = i[0]
                seq = df.iloc[idx, 1:]
                no_actions.append([seq, 'none'])

    np.random.shuffle(no_actions)
    np.random.shuffle(buys)
    np.random.shuffle(sells)

    lower = min(len(no_actions), len(buys), len(sells))

    no_actions = no_actions[:int(lower * 2)]

    sequential_data = no_actions + buys + sells
    np.random.shuffle(sequential_data)

    return sequential_data

In [15]:
sequential_data = get_sequential_data()

In [16]:
len(sequential_data)

2584

In [17]:
training_proportion = 0.75
train_test_cutoff_index = int(len(sequential_data) * training_proportion)

train_set = sequential_data[0:train_test_cutoff_index]
test_set = sequential_data[train_test_cutoff_index:]

print('Dataset shapes:')
print(len(train_set))
print(len(test_set))

Dataset shapes:
1938
646


In [18]:
x_train = []
y_train = []

for seq, target in train_set:
  x_train.append(seq)
  y_train.append(target)

x_test = []
y_test = []

for seq, target in test_set:
  x_test.append(seq)
  y_test.append(target)

x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [19]:
x_train.shape

(1938, 25)

In [20]:
y_train.shape

(1938,)

In [21]:
scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [22]:
param_grid = {'C': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50],
              'gamma':[0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50], 'kernel': ['rbf', 'linear']}

In [23]:
grid_search = GridSearchCV(SVC(), param_grid, cv=6, return_train_score=True)
grid_search.fit(x_train_scaled, y_train)

GridSearchCV(cv=6, estimator=SVC(),
             param_grid={'C': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10,
                               50],
                         'gamma': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10,
                                   50],
                         'kernel': ['rbf', 'linear']},
             return_train_score=True)

In [25]:
print("Best score: {:.4f}".format(grid_search.best_score_))
print("Best parameters: {}".format(grid_search.best_params_))

Best score: 0.5243
Best parameters: {'C': 50, 'gamma': 0.01, 'kernel': 'rbf'}


In [26]:
svm = SVC(C=50, gamma=0.01, kernel='rbf', probability=True)
svm.fit(x_train_scaled, y_train)

SVC(C=50, gamma=0.01, probability=True)

In [27]:
y_pred = svm.predict(x_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.5015479876160991


In [28]:
test_df, test_bid_ask_mid_prices = dfs[-1], bid_ask_mid_prices_list[-1]

In [29]:
row = np.array(test_df.iloc[1 - 1, 1:]).reshape(1, -1)
row = scaler.transform(row)
pred = svm.predict(row)
pred[0]

'none'

In [30]:
row = np.array(test_df.iloc[1 - 1, 1:]).reshape(1, -1)
row = scaler.transform(row)
pred = svm.predict_proba(row)
pred[0]

array([0.31508767, 0.57086176, 0.11405057])

In [31]:
svm.classes_

array(['buy', 'none', 'sell'], dtype='<U4')

In [58]:
test_df = pd.read_csv(file_path + 'Oanda_Usd_Chf_M5_2018-2022.csv')
test_df.Date = pd.to_datetime(test_df.Date)
test_df.reset_index(drop=True, inplace=True)



test_df = pd.merge(test_df, news, how='left', on='Date')
test_df.reset_index(drop=True, inplace=True)
test_df = test_df.fillna(method='ffill')
test_df.dropna(inplace=True)
test_df.reset_index(drop=True, inplace=True)

test_df['sin_hour'] = np.sin(2 * np.pi * test_df['Date'].dt.hour / 24)
test_df['cos_hour'] = np.cos(2 * np.pi * test_df['Date'].dt.hour / 24)
test_df['sin_day'] = np.sin(2 * np.pi * test_df['Date'].dt.day / 7)
test_df['cos_day'] = np.cos(2 * np.pi * test_df['Date'].dt.day / 7)

In [59]:
test_df['ema200'] = pd.Series.ewm(test_df['Mid_Close'], span=200).mean()
test_df['ema100'] = pd.Series.ewm(test_df['Mid_Close'], span=100).mean()
test_df['ema50'] = pd.Series.ewm(test_df['Mid_Close'], span=50).mean()
test_df['ema25'] = pd.Series.ewm(test_df['Mid_Close'], span=25).mean()

test_df['atr'] = atr(test_df)
test_df['rsi'] = rsi(test_df)
test_df['rsi_sma'] = test_df['rsi'].rolling(50).mean()
test_df['adx'] = adx(test_df['Mid_High'], test_df['Mid_Low'], test_df['Mid_Close'])
test_df['macd'] = pd.Series.ewm(test_df['Mid_Close'], span=12).mean() - pd.Series.ewm(test_df['Mid_Close'], span=26).mean()
test_df['macdsignal'] = pd.Series.ewm(test_df['macd'], span=9).mean()
test_df['slowk'], test_df['slowd'] = stoch(test_df['Mid_High'], test_df['Mid_Low'], test_df['Mid_Close'])

test_df['sar'] = psar(test_df)

test_df['chop36'] = chop(test_df, lookback=36)

test_df['vo'] = vo(test_df['Volume'])

test_df.dropna(inplace=True)
test_df.reset_index(drop=True, inplace=True)

# Extract the bid and ask prices and fractals and remove them from the df
test_bid_ask_mid_prices = test_df[['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close', 'Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close']]
test_df.drop(['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close'], axis=1, inplace=True)

In [60]:
test_df

Unnamed: 0,Date,Mid_Open,Mid_High,Mid_Low,Mid_Close,Volume,Impact,sin_hour,cos_hour,sin_day,...,rsi,rsi_sma,adx,macd,macdsignal,slowk,slowd,sar,chop36,vo
0,2018-01-02 03:45:00,0.97328,0.97334,0.97328,0.97332,23,2.0,0.707107,0.707107,0.974928,...,33.365007,45.635326,41.645791,-0.000241,-0.000185,19.642857,23.426492,0.973872,0.445164,-0.172480
1,2018-01-02 03:50:00,0.97332,0.97332,0.97332,0.97332,1,2.0,0.707107,0.707107,0.974928,...,33.365007,45.178600,41.831298,-0.000244,-0.000197,20.183486,20.558361,0.973779,0.439119,-0.215094
2,2018-01-02 03:55:00,0.97332,0.97348,0.97332,0.97348,92,2.0,0.707107,0.707107,0.974928,...,40.036701,44.787272,41.907799,-0.000230,-0.000203,34.862385,24.896243,0.973698,0.436253,-0.162972
3,2018-01-02 04:00:00,0.97347,0.97347,0.97340,0.97345,9,2.0,0.866025,0.500000,0.974928,...,39.243313,44.394152,40.743441,-0.000219,-0.000207,36.458333,30.501402,0.973626,0.428321,-0.198010
4,2018-01-02 04:05:00,0.97346,0.97352,0.97340,0.97341,60,2.0,0.866025,0.500000,0.974928,...,38.157560,43.899134,39.636755,-0.000211,-0.000208,32.291667,34.537462,0.973563,0.410396,-0.177188
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291887,2021-12-31 21:30:00,0.91176,0.91183,0.91172,0.91180,6,1.0,-0.707107,0.707107,0.433884,...,49.162892,50.330573,14.373456,0.000082,0.000095,32.941176,42.745098,0.912356,0.464347,-0.160191
291888,2021-12-31 21:40:00,0.91184,0.91213,0.91184,0.91205,10,1.0,-0.707107,0.707107,0.433884,...,55.271343,50.673687,14.334681,0.000087,0.000094,62.352941,39.607843,0.912342,0.480360,-0.158378
291889,2021-12-31 21:45:00,0.91209,0.91217,0.91194,0.91194,17,1.0,-0.707107,0.707107,0.433884,...,52.293931,50.966351,13.710778,0.000080,0.000091,49.411765,48.235294,0.912328,0.490158,-0.124925
291890,2021-12-31 21:50:00,0.91188,0.91192,0.91188,0.91192,6,1.0,-0.707107,0.707107,0.433884,...,51.748104,51.165788,13.263456,0.000073,0.000087,47.058824,52.941176,0.912315,0.489681,-0.144342


In [61]:
proba_threshold = 0.50

value_per_pip = 1.0
amounts_per_day = [-0.00008, -0.0001, -0.00012]
spread_cutoff = 0.00025
risk_reward_ratio = 1.5
reward = 0
day_fees = 0
buys = 0
sells = 0
n_wins = 0
n_losses = 0
trade = None

def calculate_day_fees(start_date, end_date, n_units):
    curr_fee = np.random.choice(amounts_per_day, p=[0.25, 0.50, 0.25]) * n_units
    num_days = np.busday_count(start_date.date(), end_date.date())

    return num_days * curr_fee

def get_n_units(trade_type, stop_loss, ask_open, bid_open, mid_open, currency_pair):
    _, second = currency_pair.split('_')
  
    pips_to_risk = ask_open - stop_loss if trade_type == 'buy' else stop_loss - bid_open
    pips_to_risk_calc = pips_to_risk * 10000 if second != 'JPY' else pips_to_risk * 100

    if second == 'USD':
        per_pip = 0.0001

    else:
        per_pip = 0.0001 / mid_open if second != 'JPY' else 0.01 / mid_open

    n_units = int(50 / (pips_to_risk_calc * per_pip))

    return n_units

for i in range(12, len(test_df)):
    curr_date = test_df.loc[test_df.index[i], 'Date']
    ema200_2, ema100_2, ema50_2, ema25_2, atr2, sar2, rsi2, rsi_sma2, mid_open2, mid_close2, mid_low2, mid_high2, vo2 = test_df.loc[test_df.index[i - 2], ['ema200', 'ema100', 'ema50', 'ema25', 'atr', 'sar', 'rsi', 'rsi_sma', 'Mid_Open', 'Mid_Close', 'Mid_Low', 'Mid_High', 'vo']]
    ema200_1, ema100_1, ema50_1, ema25_1, atr1, sar1, rsi1, rsi_sma1, mid_open1, mid_close1, mid_low1, mid_high1, vo1 = test_df.loc[test_df.index[i - 1], ['ema200', 'ema100', 'ema50', 'ema25', 'atr', 'sar', 'rsi', 'rsi_sma', 'Mid_Open', 'Mid_Close', 'Mid_Low', 'Mid_High', 'vo']]
    curr_ao = test_bid_ask_mid_prices.loc[test_bid_ask_mid_prices.index[i], 'Ask_Open']
    curr_bo = test_bid_ask_mid_prices.loc[test_bid_ask_mid_prices.index[i], 'Bid_Open']
    spread = abs(curr_ao - curr_bo)
    enough_volatility = spread <= spread_cutoff
    macd2, macdsignal2 = test_df.loc[test_df.index[i - 2], ['macd', 'macdsignal']]
    macd1, macdsignal1 = test_df.loc[test_df.index[i - 1], ['macd', 'macdsignal']]
    macd_vals = [0, macd2, macdsignal2, macd1, macdsignal1]
    curr_bid_open, curr_bid_high, curr_bid_low, curr_bid_close, curr_ask_open, curr_ask_high, curr_ask_low, curr_ask_close, curr_mid_open = \
        test_bid_ask_mid_prices.loc[test_bid_ask_mid_prices.index[i], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High',
                            'Ask_Low', 'Ask_Close', 'Mid_Open']]
    chop2 = test_df.loc[test_df.index[i - 2], 'chop36']
    chop1 = test_df.loc[test_df.index[i - 1], 'chop36']

    emas_buy_signal = ema200_1 < mid_close1
    emas_sell_signal = ema200_1 > mid_close1

    # macd_buy_signal = macd2 < macdsignal2 and macd1 > macdsignal1 and max(macd_vals) == 0
    # macd_sell_signal = macd2 > macdsignal2 and macd1 < macdsignal1 and min(macd_vals) == 0
    macd_buy_signal = macd2 < macdsignal2 and macd1 > macdsignal1 and max(macd_vals) == 0
    macd_sell_signal = macd2 > macdsignal2 and macd1 < macdsignal1 and min(macd_vals) == 0

    sar_buy_signal = sar1 < min([mid_open1, mid_close1])
    sar_sell_signal = sar1 > max([mid_open1, mid_close1])

    rsi_buy_signal = rsi1 > rsi_sma1
    rsi_sell_signal = rsi1 < rsi_sma1

    # chop_signal = max([chop2, chop1]) < 0.55

    vo_signal = min([vo2, vo1]) > 0

    if trade is None:
        if macd_buy_signal and emas_buy_signal and sar_buy_signal and rsi_buy_signal and enough_volatility and vo_signal:
            row = np.array(test_df.iloc[i - 1, 1:]).reshape(1, -1)
            row = scaler.transform(row)
            pred = svm.predict(row)[0]
            proba = svm.predict_proba(row)[0][0]

            # if pred == 'buy' and proba > proba_threshold:
            if pred == 'buy':
                open_price = float(curr_ao)
                pullback = sar1

                stop_loss = round(pullback, 5)

                if stop_loss < open_price:
                    curr_pips_to_risk = open_price - stop_loss

                    stop_gain = round(open_price + (curr_pips_to_risk * risk_reward_ratio), 5)

                    n_units = get_n_units('buy', stop_loss, curr_ask_open, curr_bid_open, curr_mid_open, 'AUD_USD')

                    trade = {'open_price': open_price, 'trade_type': 'buy', 'stop_loss': stop_loss,
                                                    'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                                                    'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

                    buys += 1

        elif macd_sell_signal and emas_sell_signal and sar_sell_signal and rsi_sell_signal and enough_volatility and vo_signal:
            row = np.array(test_df.iloc[i - 1, 1:]).reshape(1, -1)
            row = scaler.transform(row)
            pred = svm.predict(row)[0]
            proba = svm.predict_proba(row)[0][2]

            # if pred == 'sell' and proba > proba_threshold:
            if pred == 'sell':
                open_price = float(curr_bo)
                pullback = sar1

                stop_loss = round(pullback, 5)

                if stop_loss > open_price:
                    curr_pips_to_risk = stop_loss - open_price

                    stop_gain = round(open_price - (curr_pips_to_risk * risk_reward_ratio), 5)

                    n_units = get_n_units('sell', stop_loss, curr_ask_open, curr_bid_open, curr_mid_open, 'AUD_USD')

                    trade = {'open_price': open_price, 'trade_type': 'sell', 'stop_loss': stop_loss,
                            'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                            'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

                    sells += 1

    if trade is not None:
        curr_date = test_df.loc[test_df.index[i], 'Date']
        curr_bid_open, curr_bid_high, curr_bid_low, curr_bid_close, curr_ask_open, curr_ask_high, curr_ask_low, curr_ask_close = test_bid_ask_mid_prices.loc[test_bid_ask_mid_prices.index[i], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close']]

        if trade is not None and trade['trade_type'] == 'buy' and curr_bid_low <= trade['stop_loss']:
          trade_amount = (trade['stop_loss'] - trade['open_price']) * trade['n_units'] * value_per_pip
          reward += trade_amount
          day_fees += calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

          n_losses += 1

          trade = None    

        if trade is not None and trade['trade_type'] == 'buy' and curr_bid_high >= trade['stop_gain']:
          trade_amount = (trade['stop_gain'] - trade['open_price']) * trade['n_units'] * value_per_pip
          reward += trade_amount
          day_fees += calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

          n_wins += 1

          trade = None

        if trade is not None and trade['trade_type'] == 'sell' and curr_ask_high >= trade['stop_loss']:
          trade_amount = (trade['open_price'] - trade['stop_loss']) * trade['n_units'] * value_per_pip
          reward += trade_amount
          day_fees += calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

          n_losses += 1

          trade = None

        if trade is not None and trade['trade_type'] == 'sell' and curr_ask_low <= trade['stop_gain']:
          trade_amount = (trade['open_price'] - trade['stop_gain']) * trade['n_units'] * value_per_pip
          reward += trade_amount
          day_fees += calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

          n_wins += 1

          trade = None

print(reward)
print(day_fees)
print(reward + day_fees)
print(buys)
print(sells)
print(n_wins)
print(n_losses)

-3299.90974999994
-166.39078
-3466.30052999994
211
186
132
264


In [28]:
def plot_candles(df_plot):
    fig = go.Figure()

    fig.add_trace(go.Candlestick(
        x=df_plot.Date, open=df_plot.Mid_Open, high=df_plot.Mid_High, low=df_plot.Mid_Low, close=df_plot.Mid_Close,
        line=dict(width=1), opacity=1,
        increasing_fillcolor='#24A06B',
        decreasing_fillcolor="#CC2E3C",
        increasing_line_color='#2EC886',
        decreasing_line_color='#FF3A4C'
    ))

    fig.add_trace(go.Scatter(
        x=df_plot.Date,
        y=df_plot['ema200'],
        marker=dict(color="yellow", size=10),
        name='EMA 200',
        mode='lines'
        ))

    fig.update_layout(width=1400,height=600,
        margin=dict(l=10,r=10,b=10,t=10),
        font=dict(size=10,color="#e1e1e1"),
        paper_bgcolor="#1e1e1e",
        plot_bgcolor="#1e1e1e")

    fig.update_xaxes(
        gridcolor="#1f292f",
        showgrid=True,fixedrange=True,rangeslider=dict(visible=False),
        rangebreaks=[
            dict(bounds=["sat", "mon"])
        ]
    )

    fig.update_yaxes(
        gridcolor="#1f292f",
        showgrid=True
    )

    fig1 = go.Figure()
    fig1.add_trace(go.Scatter(
        x=df_plot.Date,
        y=df_plot['macd'],
        marker=dict(color="green", size=10),
        name='MACD',
        mode='lines'
        ))

    fig1.add_trace(go.Scatter(
        x=df_plot.Date,
        y=df_plot['macdsignal'],
        marker=dict(color="red", size=10),
        name='MACDSIGNAL',
        mode='lines'
        ))

    fig1.update_layout(width=1400,height=200,
        margin=dict(l=10,r=10,b=10,t=10),
        font=dict(size=10,color="#e1e1e1"),
        paper_bgcolor="#1e1e1e",
        plot_bgcolor="#1e1e1e")

    fig1.update_xaxes(
        gridcolor="#1f292f",
        showgrid=True,fixedrange=True,rangeslider=dict(visible=False),
        rangebreaks=[
            dict(bounds=["sat", "mon"])
        ]
    )

    fig1.update_yaxes(
        gridcolor="#1f292f",
        showgrid=True
    )

    fig.show()
    fig1.show()

In [57]:
plot_candles(df.iloc[63530:63930, :])

In [56]:
sell_indices[24][0]

63537