In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [2]:
file_path = '/Users/mymac/Google Drive/My Drive/Forex_Robot/'

In [3]:
currency_pair = 'Eur_Usd'
rounding = 3 if 'Jpy' in currency_pair else 5
pips_multiplier = 100 if 'Jpy' in currency_pair else 10000
year_range = '2016-2023'

In [4]:
df = pd.read_csv(file_path + f'Oanda_{currency_pair}_M5_{year_range}.csv')
df.Date = pd.to_datetime(df.Date)
df.reset_index(drop=True, inplace=True)

df_long = pd.read_csv(file_path + f'Oanda_{currency_pair}_H1_{year_range}.csv')
df_long.Date = pd.to_datetime(df_long.Date)
df_long.reset_index(drop=True, inplace=True)

In [5]:
def adx(high, low, close, lookback=14):
    plus_dm = high.diff()
    minus_dm = low.diff()
    plus_dm[plus_dm < 0] = 0
    minus_dm[minus_dm > 0] = 0
    
    tr1 = pd.DataFrame(high - low)
    tr2 = pd.DataFrame(abs(high - close.shift(1)))
    tr3 = pd.DataFrame(abs(low - close.shift(1)))
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis = 1, join = 'inner').max(axis = 1)
    atr = tr.rolling(lookback).mean()
    
    plus_di = 100 * (plus_dm.ewm(alpha = 1/lookback).mean() / atr)
    minus_di = abs(100 * (minus_dm.ewm(alpha = 1/lookback).mean() / atr))
    dx = (abs(plus_di - minus_di) / abs(plus_di + minus_di)) * 100
    adx = ((dx.shift(1) * (lookback - 1)) + dx) / lookback
    adx_smooth = adx.ewm(alpha = 1/lookback).mean()

    return adx_smooth

def stoch(high, low, close, lookback=14):
    high_lookback = high.rolling(lookback).max()
    low_lookback = low.rolling(lookback).min()
    slow_k = (close - low_lookback) * 100 / (high_lookback - low_lookback)
    slow_d = slow_k.rolling(3).mean()

    return slow_k, slow_d

def chop(df, lookback=14):
    atr1 = atr(df['Mid_High'], df['Mid_Low'], df['Mid_Close'], lookback=1)
    high, low = df['Mid_High'], df['Mid_Low']

    chop = np.log10(atr1.rolling(lookback).sum() / (high.rolling(lookback).max() - low.rolling(lookback).min())) / np.log10(lookback)

    return chop

def vo(volume, short_lookback=5, long_lookback=10):
    short_ema =  pd.Series.ewm(volume, span=short_lookback).mean()
    long_ema = pd.Series.ewm(volume, span=long_lookback).mean()

    volume_oscillator = (short_ema - long_ema) / long_ema

    return volume_oscillator

def williams_r(highs, lows, closes, length=21, ema_length=15):
    highest_highs = highs.rolling(window=length).max()
    lowest_lows = lows.rolling(window=length).min()

    willy = 100 * (closes - highest_highs) / (highest_highs - lowest_lows)
    willy_ema = pd.Series.ewm(willy, span=ema_length).mean()

    return willy, willy_ema

def squeeze(barsdata, length=20, length_kc=20, mult=1.5):
    # Bollinger bands
    m_avg = barsdata['Mid_Close'].rolling(window=length).mean()
    m_std = barsdata['Mid_Close'].rolling(window=length).std(ddof=0)
    upper_bb = m_avg + mult * m_std
    lower_bb = m_avg - mult * m_std

    # Keltner channel
    tr0 = abs(barsdata['Mid_High'] - barsdata['Mid_Low'])
    tr1 = abs(barsdata['Mid_High'] - barsdata['Mid_Close'].shift())
    tr2 = abs(barsdata['Mid_Low'] - barsdata['Mid_Close'].shift())
    tr = pd.concat([tr0, tr1, tr2], axis=1).max(axis=1)
    range_ma = tr.rolling(window=length_kc).mean()
    upper_kc = m_avg + range_ma * mult
    lower_kc = m_avg - range_ma * mult

    # Squeeze
    squeeze_on = (lower_bb > lower_kc) & (upper_bb < upper_kc)

    return squeeze_on

def atr(high, low, close, lookback=14):
    high_low = high - low
    high_close = np.abs(high - close.shift())
    low_close = np.abs(low - close.shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)

    return true_range.rolling(lookback).mean()

def atr_bands(high, low, close, lookback=14, atr_multiplier=3):
    scaled_atr_vals = atr(high, low, close, lookback) * atr_multiplier
    lower_band = close - scaled_atr_vals
    upper_band = close + scaled_atr_vals

    return lower_band, upper_band

def rsi(closes, periods=14):
    close_delta = closes.diff()

    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
        
    rsi = ma_up / ma_down
    rsi = 100 - (100 / (1 + rsi))

    return rsi

def qqe_mod(closes, rsi_period=6, smoothing=5, qqe_factor=3, threshold=3, mult=0.35, sma_length=50):
    Rsi = rsi(closes, rsi_period)
    RsiMa = Rsi.ewm(span=smoothing).mean()
    AtrRsi = np.abs(RsiMa.shift(1) - RsiMa)
    Wilders_Period = rsi_period * 2 - 1
    MaAtrRsi = AtrRsi.ewm(span=Wilders_Period).mean()
    dar = MaAtrRsi.ewm(span=Wilders_Period).mean() * qqe_factor

    longband = pd.Series(0.0, index=Rsi.index)
    shortband = pd.Series(0.0, index=Rsi.index)
    trend = pd.Series(0, index=Rsi.index)

    DeltaFastAtrRsi = dar
    RSIndex = RsiMa
    newshortband = RSIndex + DeltaFastAtrRsi
    newlongband = RSIndex - DeltaFastAtrRsi
    longband = pd.Series(np.where((RSIndex.shift(1) > longband.shift(1)) & (RSIndex > longband.shift(1)),
                        np.maximum(longband.shift(1), newlongband), newlongband))
    shortband = pd.Series(np.where((RSIndex.shift(1) < shortband.shift(1)) & (RSIndex < shortband.shift(1)),
                        np.minimum(shortband.shift(1), newshortband), newshortband))
    cross_1 = (longband.shift(1) < RSIndex) & (longband > RSIndex)
    cross_2 = (RSIndex > shortband.shift(1)) & (RSIndex.shift(1) < shortband)
    trend = np.where(cross_2, 1, np.where(cross_1, -1, trend.shift(1).fillna(1)))
    FastAtrRsiTL = pd.Series(np.where(trend == 1, longband, shortband))

    basis = (FastAtrRsiTL - 50).rolling(sma_length).mean()
    dev = mult * (FastAtrRsiTL - 50).rolling(sma_length).std()
    upper = basis + dev
    lower = basis - dev

    Greenbar1 = RsiMa - 50 > threshold
    Greenbar2 = RsiMa - 50 > upper

    Redbar1 = RsiMa - 50 < 0 - threshold
    Redbar2 = RsiMa - 50 < lower

    Greenbar = Greenbar1 & Greenbar2
    Redbar = Redbar1 & Redbar2

    return Greenbar, Redbar, RsiMa - 50

def heikin_ashi(open_values, high_values, low_values, close_values):
    ha_close = (open_values + high_values + low_values + close_values) / 4

    ha_open = pd.Series(0.0, index=open_values.index)
    ha_open.iloc[0] = open_values.iloc[0]

    for i in range(1, len(open_values)):
        ha_open.iloc[i] = (ha_open.iloc[i - 1] + ha_close.iloc[i - 1]) / 2

    ha_high = pd.concat([ha_open, ha_close, high_values], axis=1).max(axis=1)
    ha_low = pd.concat([ha_open, ha_close, low_values], axis=1).min(axis=1)

    return ha_open, ha_high, ha_low, ha_close

def trend_indicator(opens, highs, lows, closes, ema_period=50, smoothing_period=10):
    ha_open, _, _, ha_close = heikin_ashi(opens, highs, lows, closes)

    ha_o_ema = pd.Series.ewm(ha_open, span=ema_period).mean()
    ha_c_ema = pd.Series.ewm(ha_close, span=ema_period).mean()

    ha_o_ema_smooth = pd.Series.ewm(ha_o_ema, span=smoothing_period).mean()
    ha_c_ema_smooth = pd.Series.ewm(ha_c_ema, span=smoothing_period).mean()

    return ha_c_ema_smooth > ha_o_ema_smooth

def supertrend(barsdata, atr_len=10, mult=3):
    curr_atr = atr(barsdata['Mid_High'], barsdata['Mid_Low'], barsdata['Mid_Close'], lookback=atr_len)
    highs, lows = barsdata['Mid_High'], barsdata['Mid_Low']
    hl2 = (highs + lows) / 2
    final_upperband = hl2 + mult * curr_atr
    final_lowerband = hl2 - mult * curr_atr

    # initialize Supertrend column to True
    supertrend = [True] * len(df)

    close = barsdata['Mid_Close']
    
    for i in range(1, len(df.index)):
        curr, prev = i, i - 1
        
        # if current close price crosses above upperband
        if close[curr] > final_upperband[prev]:
            supertrend[curr] = True

        # if current close price crosses below lowerband
        elif close[curr] < final_lowerband[prev]:
            supertrend[curr] = False

        # else, the trend continues
        else:
            supertrend[curr] = supertrend[prev]
            
            # adjustment to the final bands
            if supertrend[curr] == True and final_lowerband[curr] < final_lowerband[prev]:
                final_lowerband[curr] = final_lowerband[prev]

            if supertrend[curr] == False and final_upperband[curr] > final_upperband[prev]:
                final_upperband[curr] = final_upperband[prev]

    return supertrend, final_upperband, final_lowerband

def fractal(lows, highs, window=20):
    assert len(lows) == len(highs)

    fractal_period = 2 * window + 1

    is_support = lows.rolling(fractal_period, center=True).apply(lambda x: x[window] == min(x), raw=True)
    is_resistance = highs.rolling(fractal_period, center=True).apply(lambda x: x[window] == max(x), raw=True)
    
    is_support_indices = pd.Series(is_support.index[is_support == 1.0])
    is_resistance_indices = pd.Series(is_resistance.index[is_resistance == 1.0])

    support_fractal_vals = lows[is_support_indices].reindex(lows.index).ffill()
    resistance_fractal_vals = highs[is_resistance_indices].reindex(highs.index).ffill()

    return support_fractal_vals, resistance_fractal_vals

In [6]:
df['ema200'] = pd.Series.ewm(df['Mid_Close'], span=200).mean()
df['ema100'] = pd.Series.ewm(df['Mid_Close'], span=100).mean()
df['rsi'] = rsi(df['Mid_Close'])
df['rsi_sma'] = df['rsi'].rolling(50).mean()
df['adx'] = adx(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
df['slow_k'], df['slow_d'] = stoch(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
df['chop'] = chop(df)
df['vo'] = vo(df['Volume'])
df['willy'], df['willy_ema'] = williams_r(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
df['squeeze'] = squeeze(df)
df['macd'] = pd.Series.ewm(df['Mid_Close'], span=12).mean() - pd.Series.ewm(df['Mid_Close'], span=26).mean()
df['macdsignal'] = pd.Series.ewm(df['macd'], span=9).mean()
df['lower_atr_band'], df['upper_atr_band'] = atr_bands(df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
df['qqe_up'], df['qqe_down'], df['qqe_val'] = qqe_mod(df['Mid_Close'])
df['supertrend'], df['supertrend_ub'], df['supertrend_lb'] = supertrend(df)
df['ti_up'] = trend_indicator(df['Mid_Open'], df['Mid_High'], df['Mid_Low'], df['Mid_Close'])
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

df_long['ema200'] = pd.Series.ewm(df_long['Mid_Close'], span=200).mean()
df_long['ema100'] = pd.Series.ewm(df_long['Mid_Close'], span=100).mean()
df_long['rsi'] = rsi(df_long['Mid_Close'])
df_long['rsi_sma'] = df_long['rsi'].rolling(50).mean()
df_long['adx'] = adx(df_long['Mid_High'], df_long['Mid_Low'], df_long['Mid_Close'])
df_long['slow_k'], df_long['slow_d'] = stoch(df_long['Mid_High'], df_long['Mid_Low'], df_long['Mid_Close'])
df_long['chop'] = chop(df_long)
df_long['vo'] = vo(df_long['Volume'])
df_long['willy'], df_long['willy_ema'] = williams_r(df_long['Mid_High'], df_long['Mid_Low'], df_long['Mid_Close'])
df_long['squeeze'] = squeeze(df_long)
df_long['macd'] = pd.Series.ewm(df_long['Mid_Close'], span=12).mean() - pd.Series.ewm(df_long['Mid_Close'], span=26).mean()
df_long['macdsignal'] = pd.Series.ewm(df_long['macd'], span=9).mean()
df_long['support'], df_long['resistance'] = fractal(df_long['Mid_Low'], df_long['Mid_High'])
df_long['support'], df_long['resistance'] = df_long['support'].shift(20), df_long['resistance'].shift(20)
df_long = df_long.fillna(method='ffill')
df_long.dropna(inplace=True)
df_long.reset_index(drop=True, inplace=True)

In [7]:
value_per_pip = 1.0
amounts_per_day = [-0.008, -0.01, -0.012] if 'Jpy' in currency_pair else [-0.00008, -0.0001, -0.00012]
spread_cutoff = 0.1
risk_reward_ratio = 2.0

In [9]:
def get_n_units(trade_type, stop_loss, ask_open, bid_open, mid_open, currency_pair):
    _, second = currency_pair.split('_')
  
    pips_to_risk = ask_open - stop_loss if trade_type == 'buy' else stop_loss - bid_open
    pips_to_risk_calc = pips_to_risk * 10000 if second != 'Jpy' else pips_to_risk * 100

    if second == 'Usd':
        per_pip = 0.0001

    else:
        per_pip = 0.0001 / mid_open if second != 'Jpy' else 0.01 / mid_open

    n_units = int(50 / (pips_to_risk_calc * per_pip))

    return n_units

def calculate_day_fees(start_date, end_date, n_units):
    curr_fee = np.random.choice(amounts_per_day, p=[0.25, 0.50, 0.25]) * n_units
    num_days = np.busday_count(start_date.date(), end_date.date())

    return num_days * curr_fee

def create_data_point(row, long_row, is_buy, label):
    support, resistance, ema200_long, ema100_long, rsi_long, rsi_sma_long, adx_long, slow_k_long, slow_d_long, chop_long, vo_long, willy_long, willy_ema_long, squeeze_long, macd_long, macdsignal_long = long_row[['support', 'resistance', 'ema200', 'ema100', 'rsi', 'rsi_sma', 'adx', 'slow_k', 'slow_d', 'chop', 'vo', 'willy', 'willy_ema', 'squeeze', 'macd', 'macdsignal']]
    mid_open, mid_high, mid_low, mid_close, qqe_val, ema200, ema100, lower_atr_band, upper_atr_band, supertrend_ub, supertrend_lb, rsi, rsi_sma, adx, slow_k, slow_d, chop, vo, willy, willy_ema, squeeze, macd, macdsignal = row[['Mid_Open', 'Mid_High', 'Mid_Low', 'Mid_Close', 'qqe_val', 'ema200', 'ema100', 'lower_atr_band', 'upper_atr_band', 'supertrend_ub', 'supertrend_lb', 'rsi', 'rsi_sma', 'adx', 'slow_k', 'slow_d', 'chop', 'vo', 'willy', 'willy_ema', 'squeeze', 'macd', 'macdsignal']]

    up_trend, up_trend_long = ema100 > ema200, ema100_long > ema200_long
    between_emas = ema200 < mid_close < ema100 if up_trend else ema100 < mid_close < ema200
    between_emas_long = ema200_long < mid_close < ema100_long if up_trend_long else ema100_long < mid_close < ema200_long
    above_support, below_resistance = mid_close > support, mid_close < resistance
    dist_to_support, dist_to_resistance = abs(mid_close - support) * pips_multiplier, abs(resistance - mid_close) * pips_multiplier
    dist_to_ema_100, dist_to_ema_200 = abs(mid_close - ema100) * pips_multiplier, abs(mid_close - ema200) * pips_multiplier
    dist_to_ema_100_long, dist_to_ema_200_long = abs(mid_close - ema100_long) * pips_multiplier, abs(mid_close - ema200_long) * pips_multiplier
    dist_between_emas, dist_between_emas_long = abs(ema200 - ema100) * pips_multiplier, abs(ema200_long - ema100_long) * pips_multiplier
    dist_between_atr_bands = abs(upper_atr_band - lower_atr_band) * pips_multiplier
    dist_to_upper_atr = (upper_atr_band - mid_high) * pips_multiplier
    dist_to_lower_atr = (mid_low - lower_atr_band) * pips_multiplier
    dist_between_supertrend_bands = abs(supertrend_ub - supertrend_lb) * pips_multiplier
    dist_to_upper_supertrend = (supertrend_ub - mid_high) * pips_multiplier
    dist_to_lower_supertrend = (mid_low - supertrend_lb) * pips_multiplier
    bullish = mid_close > mid_open
    candle_size = abs(mid_high - mid_low)
    if candle_size == 0:
        candle_size = 0.00001 if 'Jpy' in currency_pair else 0.0000001
    body_size_percentage = abs(mid_close - mid_open) / candle_size
    upper_wick_percentage, lower_wick_percentage = abs(min(mid_open, mid_close) - mid_low) / candle_size, abs(mid_high - max(mid_open, mid_close)) / candle_size
    candle_size *= pips_multiplier

    rsi_up, rsi_up_long = rsi > rsi_sma, rsi_long > rsi_sma_long
    adx_large, adx_large_long = adx > 30, adx_long > 30
    chop_small, chop_small_long = chop < 0.5, chop_long < 0.5
    vo_positive, vo_positive_long = vo > 0, vo_long > 0
    willy_above_ema, willy_above_ema_long = willy > willy_ema, willy_long > willy_ema_long
    macd_above_signal, macd_above_signal_long = macd > macdsignal, macd_long > macdsignal_long
    macd_above_zero, macd_above_zero_long = min(macd, macdsignal) > 0, min(macd_long, macdsignal_long) > 0

    return [qqe_val, rsi, rsi_long, rsi_up, rsi_up_long, adx_large, adx_large_long, chop_small, chop_small_long, vo_positive, vo_positive_long, 
            willy_above_ema, willy_above_ema_long, macd_above_signal, macd_above_signal_long, macd_above_zero, macd_above_zero_long, up_trend, 
            up_trend_long, between_emas, between_emas_long, above_support, below_resistance, dist_to_support, dist_to_resistance, 
            dist_to_ema_100, dist_to_ema_200, dist_to_ema_100_long, dist_to_ema_200_long, dist_between_emas, dist_between_emas_long, 
            dist_between_atr_bands, dist_to_upper_atr, dist_to_lower_atr, dist_between_supertrend_bands, dist_to_upper_supertrend, 
            dist_to_lower_supertrend, bullish, candle_size, body_size_percentage, upper_wick_percentage, lower_wick_percentage, is_buy, label]
    # return [qqe_val, rsi_up, rsi_up_long, adx_large, adx_large_long, chop_small, chop_small_long, vo_positive, vo_positive_long, 
    #         willy_above_ema, willy_above_ema_long, macd_above_signal, macd_above_signal_long, macd_above_zero, macd_above_zero_long, up_trend, 
    #         up_trend_long, between_emas, between_emas_long, above_support, below_resistance, dist_to_support, dist_to_resistance, 
    #         dist_to_ema_100, dist_to_ema_200, dist_to_ema_100_long, dist_to_ema_200_long, dist_between_emas, dist_between_emas_long, 
    #         dist_between_atr_bands, dist_to_upper_atr, dist_to_lower_atr, dist_between_supertrend_bands, dist_to_upper_supertrend, 
    #         dist_to_lower_supertrend, bullish, candle_size, body_size_percentage, upper_wick_percentage, lower_wick_percentage, is_buy, label]

data, trade = [], None

for i in range(2, len(df)):
    curr_date = df.loc[df.index[i], 'Date']

    if trade is None:
        curr_long = df_long.loc[df_long['Date'] <= curr_date]

        if len(curr_long) < 2:
            continue

        if len(df_long.loc[df_long.Date >= curr_date]) == 0:
            break

        mid_open2, mid_close2, mid_low2, mid_high2 = df.loc[df.index[i - 2], ['Mid_Open', 'Mid_Close', 'Mid_Low', 'Mid_High']]
        mid_open1, mid_close1, mid_low1, mid_high1 = df.loc[df.index[i - 1], ['Mid_Open', 'Mid_Close', 'Mid_Low', 'Mid_High']]
        curr_ao, curr_bo, curr_mid_open, curr_ask_low, curr_bid_high = df.loc[df.index[i], ['Ask_Open', 'Bid_Open', 'Mid_Open', 'Ask_Low', 'Bid_High']]
        spread = abs(curr_ao - curr_bo)

        supertrend2 = df.loc[df.index[i - 2], 'supertrend']
        supertrend1, supertrend_ub, supertrend_lb, qqe_up, qqe_down, lower_atr_band, upper_atr_band = df.loc[df.index[i - 1], ['supertrend', 'supertrend_ub', 'supertrend_lb', 'qqe_up', 'qqe_down', 'lower_atr_band', 'upper_atr_band']]

        supertrend_buy_signal = not supertrend2 and supertrend1
        supertrend_sell_signal = supertrend2 and not supertrend1

        if supertrend_buy_signal and qqe_up:
            open_price = float(curr_ao)
            pullback = float(supertrend_lb) - spread

            stop_loss = round(pullback, rounding)

            if stop_loss < open_price:
                curr_pips_to_risk = open_price - stop_loss

                if spread <= curr_pips_to_risk * spread_cutoff:
                    stop_gain = round(open_price + (curr_pips_to_risk * risk_reward_ratio), rounding)

                    n_units = get_n_units('buy', stop_loss, curr_ao, curr_bo, curr_mid_open, currency_pair)

                    trade = {'start_index': i, 'open_price': open_price, 'trade_type': 'buy', 'stop_loss': stop_loss,
                                                    'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                                                    'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

        elif supertrend_sell_signal and qqe_down:
            open_price = float(curr_bo)
            pullback = float(supertrend_ub) + spread

            stop_loss = round(pullback, rounding)

            if stop_loss > open_price:
                curr_pips_to_risk = stop_loss - open_price

                if spread <= curr_pips_to_risk * spread_cutoff:
                    stop_gain = round(open_price - (curr_pips_to_risk * risk_reward_ratio), rounding)

                    n_units = get_n_units('sell', stop_loss, curr_ao, curr_bo, curr_mid_open, currency_pair)

                    trade = {'start_index': i, 'open_price': open_price, 'trade_type': 'sell', 'stop_loss': stop_loss,
                            'stop_gain': stop_gain, 'pips_risked': round(curr_pips_to_risk, 5),
                            'n_units': n_units, 'original_units': n_units, 'start_date': curr_date, 'end_date': None}

    # if trade is not None:
    #     for j in range(i, len(df)):
    #         curr_date = df.loc[df.index[j], 'Date']
    #         curr_bid_open, curr_bid_high, curr_bid_low, curr_bid_close, curr_ask_open, curr_ask_high, curr_ask_low, curr_ask_close = df.loc[df.index[j], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close']]

    #         if trade['trade_type'] == 'buy' and curr_bid_low <= trade['stop_loss']:
    #             data.append(create_data_point(df.iloc[i - 1, :], curr_long.iloc[-2, :], True, 0))

    #             trade = None
    #             break


    #         if trade['trade_type'] == 'buy' and curr_bid_high >= trade['stop_gain']:
    #             trade_amount = (trade['stop_gain'] - trade['open_price']) * trade['n_units'] * value_per_pip
    #             day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

    #             if trade_amount + day_fees > 0:
    #                 data.append(create_data_point(df.iloc[i - 1, :], curr_long.iloc[-2, :], True, 1))

    #             trade = None
    #             break

    #         if trade['trade_type'] == 'sell' and curr_ask_high >= trade['stop_loss']:
    #             data.append(create_data_point(df.iloc[i - 1, :], curr_long.iloc[-2, :], False, 0))

    #             trade = None
    #             break

    #         if trade['trade_type'] == 'sell' and curr_ask_low <= trade['stop_gain']:
    #             trade_amount = (trade['open_price'] - trade['stop_gain']) * trade['n_units'] * value_per_pip
    #             day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

    #             if trade_amount + day_fees > 0:
    #                 data.append(create_data_point(df.iloc[i - 1, :], curr_long.iloc[-2, :], False, 1))

    #             trade = None
    #             break

    if trade is not None:
        curr_bid_open, curr_bid_high, curr_bid_low, curr_bid_close, curr_ask_open, curr_ask_high, curr_ask_low, curr_ask_close = df.loc[df.index[i], ['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close', 'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close']]
        j = trade['start_index']

        if trade['trade_type'] == 'buy' and curr_bid_low <= trade['stop_loss']:
            data.append(create_data_point(df.iloc[j - 1, :], curr_long.iloc[-2, :], True, 0))

            trade = None
            continue

        if trade['trade_type'] == 'buy' and curr_bid_high >= trade['stop_gain']:
            trade_amount = (trade['stop_gain'] - trade['open_price']) * trade['n_units'] * value_per_pip
            day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

            if trade_amount + day_fees > 0:
                data.append(create_data_point(df.iloc[j - 1, :], curr_long.iloc[-2, :], True, 1))

            else:
                data.append(create_data_point(df.iloc[j - 1, :], curr_long.iloc[-2, :], True, 0))

            trade = None
            continue

        if trade['trade_type'] == 'sell' and curr_ask_high >= trade['stop_loss']:
            data.append(create_data_point(df.iloc[j - 1, :], curr_long.iloc[-2, :], False, 0))

            trade = None
            continue

        if trade['trade_type'] == 'sell' and curr_ask_low <= trade['stop_gain']:
            trade_amount = (trade['open_price'] - trade['stop_gain']) * trade['n_units'] * value_per_pip
            day_fees = calculate_day_fees(trade['start_date'], curr_date, trade['n_units'])

            if trade_amount + day_fees > 0:
                data.append(create_data_point(df.iloc[j - 1, :], curr_long.iloc[-2, :], False, 1))

            else:
                data.append(create_data_point(df.iloc[j - 1, :], curr_long.iloc[-2, :], False, 0))

            trade = None
            continue

In [10]:
data = np.array(data)
data.shape

(2408, 44)

In [18]:
n_trades = len(np.where(data[:, -1] == 1)[0])
n_nones = len(np.where(data[:, -1] == 0)[0])

print(f'Num trades = {n_trades}, num nones = {n_nones}')

Num trades = 698, num nones = 1710


In [19]:
x, y = data[:, :-1], data[:, -1]

_, counts = np.unique(y, return_counts=True)
n = min(counts)
mask = np.hstack([np.random.choice(np.where(y == label)[0], n, replace=False) for label in np.unique(y)])
x, y = x[mask, :], y[mask, ]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=(1 / 3))

In [20]:
scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [21]:
explained_variance = 0.99

pca = PCA(n_components=explained_variance)
x_train_scaled = pca.fit_transform(x_train_scaled)
x_test_scaled = pca.transform(x_test_scaled)

In [22]:
param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
              'degree': [3, 5, 7, 9, 11],
              'gamma': ['scale', 'auto', 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5],
              'C': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50]}

random_search = RandomizedSearchCV(SVC(probability=True), param_grid, cv=5, n_iter=100)
random_search.fit(x_train_scaled, y_train)
model = random_search.best_estimator_
# class_weight='balanced'

print(f'SVM accuracy = {random_search.best_score_}')

SVM accuracy = 0.5408602150537634


In [23]:
y_pred = model.predict(x_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.51931330472103


In [24]:
labels, counts = np.unique(y_pred, return_counts=True)
print(labels, counts)

[0. 1.] [205 261]


In [25]:
y_truth = pd.Series(y_test, name='Actual')
y_hat = pd.Series(y_pred, name='Predicted')
pd.crosstab(y_truth, y_hat)

Predicted,0.0,1.0
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,108,127
1.0,97,134


In [18]:
y_pred_probs = model.predict_proba(x_test_scaled)
hat_truth_prob = np.hstack((y_pred.reshape(-1, 1), y_test.reshape(-1, 1), y_pred_probs))

In [19]:
zero_error_prob_avg = hat_truth_prob[(hat_truth_prob[:, 0] != hat_truth_prob[:, 1]) & (hat_truth_prob[:, 0] == 0)][:, 2].mean()
zero_correct_prob_avg = hat_truth_prob[(hat_truth_prob[:, 0] == hat_truth_prob[:, 1]) & (hat_truth_prob[:, 0] == 0)][:, 2].mean()
one_error_prob_avg = hat_truth_prob[(hat_truth_prob[:, 0] != hat_truth_prob[:, 1]) & (hat_truth_prob[:, 0] == 1)][:, 3].mean()
one_correct_prob_avg = hat_truth_prob[(hat_truth_prob[:, 0] == hat_truth_prob[:, 1]) & (hat_truth_prob[:, 0] == 1)][:, 3].mean()

print(f'Avg prob when zero is misclassified: {zero_error_prob_avg}')
print(f'Avg prob when zero is correct: {zero_correct_prob_avg}')
print(f'Avg prob when one is misclassified: {one_error_prob_avg}')
print(f'Avg prob when one is correct: {one_correct_prob_avg}')

Avg prob when zero is misclassified: 0.49802569382871115
Avg prob when zero is correct: 0.4973921242930348
Avg prob when one is misclassified: 0.4976338164465569
Avg prob when one is correct: 0.4978185967514933


In [23]:
y_hat_adjusted = np.where(y_pred_probs[:, 1] > 0.5, 1, 0)
y_hat_adjusted = pd.Series(y_hat_adjusted, name='Predicted')
pd.crosstab(y_truth, y_hat_adjusted)

Predicted,0,1
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,122,24
1.0,122,22


In [25]:
with open(f'./data/{currency_pair.lower()}_svm.pickle', 'wb') as f:
    pickle.dump(model, f)

with open(f'./data/{currency_pair.lower()}_scaler.pickle', 'wb') as f:
    pickle.dump(scaler, f)

with open(f'./data/{currency_pair.lower()}_pca.pickle', 'wb') as f:
    pickle.dump(pca, f)