Feature functions
Function formulation here, data will run through this to churn out features


In [0]:
import pandas as pd
import numpy as np
from scipy import stats
import scipy.optimize
from scipy.optimize import OptimizeWarning
import warnings
import math
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
#from mpl_finance import candlestick_ohlc
from matplotlib.dates import date2num
from datetime import datetime


class holder:
    1

# Heiken Ashi Candles


def heikenashi(prices,periods):

    """

    :param price: datafram of OHLC and Volume Data
    :param periods: periods for which to create the candles
    :return: heiken ashi OHLC candles

    """

    results = holder()

    dict = {}

    HAclose = prices[['open','high','low','close']].sum(axis=1)/4


    HAopen = HAclose.copy()

    HAopen.iloc[0] = HAclose.iloc[0]
    HAhigh = HAclose.copy()
    HAlow = HAclose.copy()

    for i in range(1,len(prices)):

        HAopen.iloc[i] = (HAopen.iloc[i-1]+HAclose.iloc[i-1])/2
        HAhigh.iloc[i] = np.array ([prices.high.iloc[i],HAopen.iloc[i],HAclose.iloc[i]]).max()
        HAlow.iloc[i] = np.array([prices.low.iloc[i], HAopen.iloc[i], HAclose.iloc[i]]).min()

    df = pd.concat((HAopen,HAhigh,HAlow,HAclose),axis=1)
    df.columns = [['open','high','low','close']]

    df.index = df.index.droplevel(0)

    dict[periods[0]] = df

    results.candles=dict
    return results


# Detrender

def detrend(prices,method='difference'):

    """

    :param prices: data frame of OHLC currency data
    :param method: method by which to detrend 'linear' or 'difference'
    :return: the detrended price series

    """

    if method == 'difference':

        detrended = prices.close[1:]-prices.close[:-1].values

    elif method == 'linear':

        x = np.arange(0,len(prices))
        y = prices.close.values

        model = LinearRegression()

        model.fit(x.reshape(-1,1),y.reshape(-1,1))

        trend = model.predict(x.reshape(-1,1))

        trend = trend.reshape((len(prices),))

        detrended = prices.close - trend

    else:

        print('you did not input a valid method for detrending! Options are linear or difference')

    return detrended

# Fourier Series Expansion Fititng Function

def fseries(x,a0,a1,b1,w):

    """

    :param x: the hours (independent variable)
    :param a0: first fourier series coefficient
    :param a1: second fourier series coefficient
    :param b1: third fourier series coefficient
    :param w: fourier series frequency
    :return: the value of the fourier function

    """

    f = a0 + a1*np.cos(w*x) + b1*np.sin(w*x)

    return f

#Sine Series Expansion Fitting Function

def sseries(x,a0,b1,w):

    """

    :param x: the hours (independent variable)
    :param a0: first sine series coefficient
    :param b1: second sine series coefficient
    :param w: third sine frequency
    :return: the value of the sine function

    """

    f = a0 + b1*np.sin(w*x)

    return f

# Fourier Series Coefficient Calculator Function

def fourier(prices, periods, method ='difference'):

    """

    :param prices: OHLC dataframe
    :param periods: list of periods for which to compute coefficients
    :param method: method by which to detrend the data
    :return: dict of data frams containing coefficients for said periods
    """

    results = holder()
    dict = {}

    # option to plot the expansion fit for each iteration

    plot = False

    # compute the coefficients of the series

    detrended = detrend(prices,method)

    for i in range (0,len(periods)):

        coeffs = []

        for j in range (periods[i],len(prices)-periods[i]):

            x = np.arange(0,periods[i])
            y = detrended.iloc[j-periods[i]:j]

            with warnings.catch_warnings():
                warnings.simplefilter('error',OptimizeWarning)

                try:

                    res = scipy.optimize.curve_fit(fseries,x,y)

                except (RuntimeError, OptimizeWarning):

                    res = np.empty((1,4))
                    res[0,:] = np.NAN

            if plot == True:

                xt = np.linspace(0,periods[i],100)
                yt = fseries(xt,res[0][0],res[0][1],res[0][2],res[0][3])

                plt.plot(x,y)
                plt.plot(xt,yt,'r')

                plt.show()

            coeffs = np.append(coeffs,res[0],axis=0)

        warnings.filterwarnings('ignore',category=np.VisibleDeprecationWarning)

        coeffs = np.array(coeffs).reshape(((len(coeffs)//4,4)))

        df = pd.DataFrame(coeffs,index=prices.iloc[periods[i]:-periods[i]])

        df.columns = [['a0','a1','b1','w']]

        df = df.fillna(method='bfill')

        dict[periods[i]] = df

    results.coeffs=dict

    return results

# Sine Series Coefficient Calculator Function

def sine(prices, periods, method ='difference'):

    """

    :param prices: OHLC dataframe
    :param periods: list of periods for which to compute coefficients
    :param method: method by which to detrend the data
    :return: dict of data frams containing coefficients for said periods
    """

    results = holder()
    dict = {}

    # option to plot the expansion fit for each iteration

    plot = False

    # compute the coefficients of the series

    detrended = detrend(prices,method)

    for i in range (0,len(periods)):

        coeffs = []

        for j in range (periods[i],len(prices)-periods[i]):

            x = np.arange(0,periods[i])
            y = detrended.iloc[j-periods[i]:j]

            with warnings.catch_warnings():
                warnings.simplefilter('error',OptimizeWarning)

                try:

                    res = scipy.optimize.curve_fit(sseries,x,y)

                except (RuntimeError, OptimizeWarning):

                    res = np.empty((1,3))
                    res[0,:] = np.NAN

            if plot == True:

                xt = np.linspace(0,periods[i],100)
                yt = sseries(xt,res[0][0],res[0][1],res[0][2])

                plt.plot(x,y)
                plt.plot(xt,yt,'r')

                plt.show()

            coeffs = np.append(coeffs,res[0],axis=0)

        warnings.filterwarnings('ignore',category=np.VisibleDeprecationWarning)

        coeffs = np.array(coeffs).reshape(((len(coeffs)//3,3)))

        df = pd.DataFrame(coeffs,index=prices.iloc[periods[i]:-periods[i]])

        df.columns = [['a0','b1','w']]

        df = df.fillna(method='bfill')

        dict[periods[i]] = df

    results.coeffs=dict

    return results

# Williams Accumulation Distribution Function

def wadl(prices,periods):

    """

    :param price: date frame of OHLC prices
    :param periods: (list) periods for which to calculate the function
    :return: williams accumulation distribution lines for each period

    """

    results = holder()
    dict = {}

    for i in range(0,len(periods)):

        WAD = []

        for j in range(periods[i],len(prices)-periods[i]):
            TRH = np.array([prices.high.iloc[j],prices.close.iloc[j-1]]).max()

            TRL = np.array([prices.low.iloc[j], prices.close.iloc[j - 1]]).min()

            if prices.close.iloc[j] > prices.close.iloc[j-1]:

                PM = prices.close.iloc[j] - TRL

            elif prices.close.iloc[j] < prices.close.iloc[j-1]:

                PM = prices.close.iloc[j] - TRH

            elif prices.close.iloc[j] == prices.close.iloc[j-1]:

                PM = 0

            else:

                print('unknown error, see isaac')

            AD = PM * prices.AskVol.iloc[j]

            WAD = np.append(WAD,AD)

        WAD = WAD.cumsum()

        WAD = pd.DataFrame(WAD,index=prices.iloc[periods[i]:-periods[i]].index)

        WAD.columns = [['close']]

        dict[periods[i]] = WAD

    results.wadl = dict

    return results

# Data Resampling Function

def OHLCresample (DataFrame, TimeFrame='T', column='Ask'):

    """

    :param DataFrame: dataframe containing data that we want to resample
    :param TimeFrame: timeframe that we want for resampling
    :param column: which column we are resampling (bid or ask) default='ask'
    :return: resampled OHCL data for the given timeframe
    """

    grouped = DataFrame.groupby('Symbol')

    if np.any(DataFrame.columns=='Ask'):

        if column =='ask':
            ask = grouped['Ask'].resample(TimeFrame).ohlc()
            askVol =  grouped ['AskVol'].resample(TimeFrame).count()
            resampled = pd.DataFrame(ask)
            resampled['AskVol'] = askVol

        elif column=='bid':
            bid = grouped['Bid'].resample(TimeFrame).ohlc()
            bidVol =  grouped['BidVol'].resample(TimeFrame).count()
            resampled = pd.DataFrame(bid)
            resampled['BidVol'] = bidVol

        else:

            raise ValueError('Column must be a string. Either ask or bid')

    elif np.any(DataFrame.columns=='close'):
        open = grouped['open'].resample(TimeFrame).ohlc()
        high = grouped['high'].resample(TimeFrame).ohlc()
        low = grouped['low'].resample(TimeFrame).ohlc()
        close = grouped['close'].resample(TimeFrame).ohlc()
        askVol = grouped['AskVol'].resample(TimeFrame).count()

        resampled=pd.DataFrame(open)
        resampled['high'] = high
        resampled['low'] = low
        resampled['close'] = close
        resampled ['AskVol'] = askVol

    resampled = resampled.dropna()

    return resampled


#Momentum Function

def momentum(prices,periods):

    """

    :param price: dataframe of OHLLC data (like usual)
    :param peroids: list of periods to calcullate
    :return: momentum indicator
    """

    results = holder()
    open = {}
    close = {}

    for i in range(0,len(periods)):

        open[periods[i]] = pd.DataFrame(prices.open.iloc[periods[i]:]-prices.open.iloc[:-periods[i]].values,
                                        index=prices.iloc[periods[i]:].index)

        close[periods[i]] = pd.DataFrame(prices.close.iloc[periods[i]:]-prices.close.iloc[:-periods[i]].values,
                                        index=prices.iloc[periods[i]:].index)

        open[periods[i]].columns=[['open']]
        close[periods[i]].columns=[['close']]

    results.open = open
    results.close = close

    return results

#Stochastic Oscillator Function

def stochastic (prices,periods):

    """

    :param prices: OHLC dataframe
    :param periods: periods to calculate function value
    :return: oscillator function values
    """

    results = holder()
    close = {}

    for i in range (0,len(periods)):

        Ks = []

        for j in range(periods[i],len(prices)-periods[i]):

            C = prices.close.iloc[j+1]
            H = prices.high.iloc[j-periods[i]:j].max()
            L = prices.low.iloc[j-periods[i]:j].min()

            if H==L:
                K = 0
            else:
                K = 100*(C-L)/(H-L)
            Ks = np.append(Ks,K)

        df = pd.DataFrame(Ks,index=prices.iloc[periods[i]+1:-periods[i]+1].index)
        df.columns = ['K']
        df['D'] = df.K.rolling(3).mean()
        df = df.dropna()

        close[periods[i]] = df

    results.close = close

    return results

#Williams Oscillator Function

def williams(prices, periods):

    """

    :param prices: OHCL price data
    :param periods: list of periods to calculate function values
    :return: values of williams osc function
    """

    results = holder()
    close = {}

    for i in range(0, len(periods)):
        Rs = []
        for j in range(periods[i],len(prices)-periods[i]):
            C = prices.close.iloc[j+1]
            H = prices.high.iloc[j-periods[i]:j].max()
            L = prices.low.iloc[j-periods[i]:j].min()

            if H==L:
                R = 0
            else:
                R = -100 * (H-C)/(H-L)

            Rs = np.append(Rs,R)
        df = pd.DataFrame(Rs,index=prices.iloc[periods[i]+1:-periods[i]+1].index)
        df.columns = [['R']]
        df = df.dropna()

        close[periods[i]] = df

    results.close = close

    return results

# Proc function (Price of rate change)

def proc(prices,periods):
    """

    :param prices: dataframe containing prices
    :param periods: periods for which to calculate PROC
    :return: PROC for indicated periods
    """
    results = holder()
    proc = {}

    for i in range(0,len(periods)):

        proc[periods[i]] = pd.DataFrame((prices.close.iloc[periods[i]:]-prices.close.iloc[:-periods[i]].values)
                                        /prices.close.iloc[:-periods[i]].values,
                                        index=prices.iloc[periods[i]:].index)
        proc[periods[i]].columns = [['close']]

    results.proc = proc

    return results



# Accumulation Distribution Oscillator

def adosc(prices,periods):

    """

    :param prices: OHLC dataframe
    :param periods: periods for which to compute indicator
    :return: indicator values for indicated periods
    """

    results = holder()
    accdist = {}

    for i in range (0,len(periods)):

        AD = []

        for j in range (periods[i], len(prices)-periods[i]):

            C = prices.close.iloc[j+1]
            H = prices.high.iloc[j-periods[i]:j].max()
            L = prices.low.iloc[j-periods[i]:j].min()
            V = prices.AskVol.iloc[j+1]

            if H==L:
                CLV = 0
            else:
                CLV = ((C-L)-(H-C))/(H-L)
            AD = np.append(AD,CLV*V)

        AD = AD.cumsum()
        AD = pd.DataFrame(AD, index=prices.iloc[periods[i]+1:-periods[i]+1].index)
        AD.columns = [['AD']]

        accdist[periods[i]] = AD

    results.AD = accdist

    return results

#MACD

def macd(prices,periods):
    """

    :param prices: OHLC dataframe of prices
    :param periods: 1x2 array containing values for the EMAs
    :return: MACD for given periods
    """

    results = holder()

    EMA1 = prices.close.ewm(span=periods[0]).mean()
    EMA2 = prices.close.ewm(span=periods[1]).mean()

    MACD = pd.DataFrame(EMA1-EMA2)
    MACD.columns = [['L']]

    SigMACD = MACD.rolling(3).mean()
    SigMACD.columns = [['SL']]

    results.line=MACD
    results.signal = SigMACD

    return results

#CCI (Commodity Channel index)

def cci(prices,periods):

    """

    :param prices: OHLC Dataframe of price data
    :param periods: periods for which to compute the indicator
    :return: CCI for the given periods
    """

    results = holder()
    CCI = {}

    for i in range (0,len(periods)):

        MA = prices.close.rolling(periods[i]).mean()
        std = prices.close.rolling(periods[i]).std()

        D = (prices.close-MA)/std

        CCI[periods[i]] = pd.DataFrame((prices.close-MA)/(0.015*D))
        CCI[periods[i]].columns = [['close']]

    results.cci = CCI

    return results

#bollinger bands

def bollinger(prices,periods,deviations):

    """

    :param prices: OHLC data
    :param periods: periods fo rwhich to compute the bollinger bands
    :param deviations: deviations to use when calculating bands
    :return: bollinger bands
    """

    results = holder()
    boll={}

    for i in range (0,len(periods)):

        mid = prices.close.rolling(periods[i]).mean()
        std = prices.close.rolling(periods[i]).std()

        upper = mid+deviations*std
        lower = mid-deviations*std

        df = pd.concat((upper,mid,lower),axis=1)
        df.columns = [['upper','mid','lower']]

        boll[periods[i]] = df

    results.bands = boll

    return results

# Price averages

def paverage(prices,periods):

    """

    :param prices: OCHL data
    :param periods: list of periods for which to calculate indicator values
    :return: averages over the given periods
    """

    results = holder()

    avs = {}

    for i in range(0,len(periods)):

        avs[periods[i]] = pd.DataFrame(prices[['open','high','low','close']].rolling(periods[i]).mean())

        results.avs =avs

        return results


# Slope functions

def slopes(prices,periods):

    """

    :param prices: OHCL price data
    :param periods: periods to get the indicator values
    :return: slopes over given periods
    """

    results = holder()
    slope = {}

    for i in range(0,len(periods)):

        ms = []

        for j in range(periods[i],len(prices)-periods[i]):

            y = prices.high.iloc[j-periods[i]:j].values
            x = np.arange(0,len(y))

            res = stats.linregress(x,y=y)
            m = res.slope

            ms = np.append(ms,m)

        ms = pd.DataFrame(ms,index = prices.iloc[periods[i]:-periods[i]].index)

        ms.columns = [['high']]

        slope[periods[i]] = ms

    results.slope = slope

    return results




















  











Import data 

In [30]:
from google.colab import files
uploaded = files.upload()

Saving EURUSD5m.csv to EURUSD5m.csv


Reshape data to index date column and drop duplicates

In [31]:
# Load our CSV data
import io
data = pd.read_csv(io.BytesIO(uploaded['EURUSD5m.csv']))
data.columns = ['Date','open','high','low','close','AskVol']
data.Date = pd.to_datetime(data.Date, format='%d.%m.%Y %H:%M:%S.%f')
data = data.set_index(pd.to_datetime(data.Date))
prices = data.drop_duplicates(keep=False)
data

Unnamed: 0_level_0,Date,open,high,low,close,AskVol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-12-26 00:00:00,2016-12-26 00:00:00,1.04521,1.04536,1.04504,1.04527,639.99
2016-12-26 00:05:00,2016-12-26 00:05:00,1.04527,1.04557,1.04514,1.04515,272.63
2016-12-26 00:10:00,2016-12-26 00:10:00,1.04515,1.04542,1.04514,1.04530,795.16
2016-12-26 00:15:00,2016-12-26 00:15:00,1.04529,1.04535,1.04510,1.04520,480.07
2016-12-26 00:20:00,2016-12-26 00:20:00,1.04521,1.04537,1.04513,1.04526,1215.15
...,...,...,...,...,...,...
2019-12-21 23:35:00,2019-12-21 23:35:00,1.10769,1.10769,1.10769,1.10769,0.00
2019-12-21 23:40:00,2019-12-21 23:40:00,1.10769,1.10769,1.10769,1.10769,0.00
2019-12-21 23:45:00,2019-12-21 23:45:00,1.10769,1.10769,1.10769,1.10769,0.00
2019-12-21 23:50:00,2019-12-21 23:50:00,1.10769,1.10769,1.10769,1.10769,0.00


Execute feature creation code and acquire new dataset with features


In [32]:
# Load our CSV data



# Create lists for each periods required by our functions

momentumKey = [3,4,5,8,9,10]
stochasticKey = [3,4,5,8,9,10]
williamsKey = [6,7,8,9,10]
procKey = [12,13,14,15]
wadlKey = [15]
adoscKey = [2,3,4,5]
macdKey = [15,30]
cciKey = [15]
bollingerKey = [15]
heikenashiKey = [15]
paverageKey = [2]
slopeKey = [3,4,5,10,20,30]
fourierKey = [10,20,30]
sinKey = [5,6]

keyList = [momentumKey,stochasticKey,williamsKey,procKey,wadlKey,adoscKey,macdKey,cciKey,bollingerKey,heikenashiKey,paverageKey,slopeKey,fourierKey,sinKey]

# calculate all of the features:

momentumDict = momentum(prices,momentumKey)
print('1')
stochasticDict = stochastic(prices,stochasticKey)
print('2')
williamsDict = williams(prices,williamsKey)
print('3')
procDict = proc(prices,procKey)
print('4')
wadlDict = wadl(prices,wadlKey)
print('5')
adoscDict = adosc(prices,adoscKey)
print('6')
macdDict = macd(prices,macdKey)
print('7')
cciDict = cci(prices,cciKey)
print('8')
bollingerDict = bollinger(prices,bollingerKey,2)
print('9')

hkaprices = prices.copy()
hkaprices['Symbol'] = 'SYMB'

#period = timedelta(hours=1)

HKA= OHLCresample(hkaprices)

heikenDict = heikenashi(HKA,heikenashiKey)
print('10')
paverageDict = paverage(prices, paverageKey)
print('11')
slopeDict = slopes(prices,slopeKey)
print('12')
fourierDict = fourier(prices,fourierKey)
print('13')
sineDict = sine(prices,sinKey)
print('14')

# Create list of dictionaries

dictlist=[momentumDict.close, stochasticDict.close, williamsDict.close, procDict.proc, wadlDict.wadl,
            adoscDict.AD, macdDict.line, cciDict.cci, bollingerDict.bands, heikenDict.candles, paverageDict.avs,
            slopeDict.slope, fourierDict.coeffs, sineDict.coeffs]

# List of 'base' column names:

colFeat = ['momentum', 'stoch', 'will', 'proc', 'wadl', 'adosc', 'macd', 'cci', 'bollinger', 'heiken', 'paverage', 'slope', 'fourier', 'sine']

# populate the masterframe:

masterFrame = pd.DataFrame(index=prices.index)

for i in range(0,len(dictlist)):

    if colFeat[i] == 'macd':

        colID = colFeat[i] + str(keyList[6][0]) + str(keyList[6][1])

        masterFrame[colID] = dictlist[i]

    else:

        for j in keyList[i]:

            for k in list(dictlist[i][j]):

                colID = colFeat[i] + str(j)

                masterFrame[colID] = dictlist[i][j][k]


threshold = round(.7*len(masterFrame))

masterFrame[['open','high','low','close']] = prices[['open','high','low','close']]
masterFrame
# HA is resampled ==> empty data in between
"""
masterFrame.heikenTopen = masterFrame.heikenTopen.fillna(method='ffill')
masterFrame.heikenThigh = masterFrame.heikenThigh.fillna(method='ffill')
masterFrame.heikenTlow = masterFrame.heikenTlow.fillna(method='ffill')
masterFrame.heikenTclose = masterFrame.heikenTclose.fillna(method='ffill')
"""
# drop columns that have 30% or more NAN data

masterFrameCleaned = masterFrame.copy()

masterFrameCleaned = masterFrameCleaned.dropna(axis=1,thresh=threshold)
masterFrameCleaned = masterFrameCleaned.dropna(axis=0)

#from google.colab import files
#masterFrameCleaned.to_csv('testing.csv')
#files.download("masterFrame.csv")
print('completed calculations')
masterFrameCleaned


1
2
3
4
5
6
7
8
9
10
11
12
13
14
completed calculations


Unnamed: 0_level_0,momentum3,momentum4,momentum5,momentum8,momentum9,momentum10,stoch3,stoch4,stoch5,stoch8,stoch9,stoch10,will6,will7,will8,will9,will10,proc12,proc13,proc14,proc15,wadl15,adosc2,adosc3,adosc4,adosc5,macd1530,cci15,bollinger15,heiken15,paverage2,slope3,slope4,slope5,slope10,slope20,slope30,open,high,low,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2016-12-26 02:30:00,0.00011,0.00015,0.00024,0.00021,0.00031,0.00037,138.095238,132.651072,131.578947,89.625850,8.962585e+01,9.019814e+01,35.000000,25.925926,-6.122449,-6.122449,-5.769231,0.000383,0.000421,0.000402,0.000325,0.641645,2.195677e+03,5.737737e+02,3.929856e+02,-5.570396e+02,3.497595e-05,0.010107,1.044721,1.045275,1.045275,0.000100,0.000073,0.000069,0.000023,7.669173e-07,-0.000012,1.04524,1.04538,1.04522,1.04526
2016-12-26 02:35:00,0.00010,0.00011,0.00015,0.00018,0.00021,0.00031,120.430108,121.840261,121.052632,89.263531,9.106263e+01,9.118037e+01,-31.578947,-30.769231,-26.086957,-20.689655,-20.689655,0.000373,0.000383,0.000421,0.000402,0.641645,2.202874e+03,5.949001e+02,4.258581e+02,-5.225701e+02,4.184372e-05,0.010264,1.044744,1.045247,1.045260,0.000095,0.000079,0.000068,0.000022,5.383459e-06,-0.000010,1.04524,1.04526,1.04523,1.04526
2016-12-26 02:40:00,0.00033,0.00046,0.00047,0.00061,0.00054,0.00057,130.828968,127.276343,123.376008,109.776352,1.084539e+02,1.049735e+02,63.157895,63.157895,61.538462,52.173913,41.379310,0.000699,0.000718,0.000727,0.000766,0.786788,3.348331e+03,1.711389e+03,1.425539e+03,3.786137e+02,6.654066e-05,0.013309,1.044706,1.045438,1.045440,-0.000060,0.000021,0.000035,0.000032,1.853383e-05,-0.000007,1.04526,1.04562,1.04525,1.04562
2016-12-26 02:45:00,0.00023,0.00020,0.00033,0.00047,0.00048,0.00041,132.532672,129.098205,122.923263,121.466291,1.198965e+02,1.148676e+02,29.729730,28.947368,28.947368,28.205128,23.913043,0.000517,0.000574,0.000593,0.000603,0.554993,5.068686e+03,3.073186e+03,2.762819e+03,1.617036e+03,7.907760e-05,0.014021,1.044727,1.045600,1.045555,0.000120,0.000060,0.000074,0.000046,2.757895e-05,-0.000002,1.04562,1.04581,1.04548,1.04549
2016-12-26 02:50:00,0.00050,0.00050,0.00047,0.00065,0.00074,0.00075,157.102564,149.432825,142.782912,137.688825,1.343199e+02,1.291715e+02,25.454545,22.950820,22.580645,22.580645,22.222222,0.000679,0.000775,0.000833,0.000852,0.716604,6.060252e+03,4.054392e+03,3.669816e+03,2.517436e+03,1.034461e-04,0.016571,1.044710,1.045635,1.045625,0.000275,0.000165,0.000110,0.000071,3.821805e-05,0.000003,1.04553,1.04577,1.04548,1.04576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-21 21:05:00,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,3.116914e-12,5.021169e-13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1041.320890,-2.754579e+06,-1.904689e+06,-1.425440e+06,-1.142500e+06,1.659117e-12,0.000003,1.107690,1.107690,1.107690,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,1.10769,1.10769,1.10769,1.10769
2019-12-21 21:10:00,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,3.116914e-12,5.021169e-13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1041.320890,-2.754579e+06,-1.904689e+06,-1.425440e+06,-1.142500e+06,1.551870e-12,0.000003,1.107690,1.107690,1.107690,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,1.10769,1.10769,1.10769,1.10769
2019-12-21 21:15:00,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,3.116914e-12,5.021169e-13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1041.320890,-2.754579e+06,-1.904689e+06,-1.425440e+06,-1.142500e+06,1.451506e-12,0.000003,1.107690,1.107690,1.107690,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,1.10769,1.10769,1.10769,1.10769
2019-12-21 21:20:00,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.000000,0.000000,3.116914e-12,5.021169e-13,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1041.320890,-2.754579e+06,-1.904689e+06,-1.425440e+06,-1.142500e+06,1.357803e-12,0.000003,1.107690,1.107690,1.107690,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,1.10769,1.10769,1.10769,1.10769


Create output column
1: buy
0: sell or do nothing

In [0]:
import io
import pandas as pd
import numpy as np
masterFrame = masterFrameCleaned
#masterFrame = masterFrame.iloc[:, :-1]
masterFrame['Order']= np.where(masterFrame['close']-masterFrame['open']>0,'1','0')
masterFrame['Order'] = masterFrame['Order'].shift(-1)
masterFrame.drop(masterFrame.tail(1).index,inplace=True)
from google.colab import files
masterFrame.to_csv('EURUSD1h1y41c.csv')




In [23]:
masterFrame

Unnamed: 0_level_0,momentum3,momentum4,momentum5,momentum8,momentum9,momentum10,stoch3,stoch4,stoch5,stoch8,stoch9,stoch10,will6,will7,will8,will9,will10,proc12,proc13,proc14,proc15,wadl15,adosc2,adosc3,adosc4,adosc5,macd1530,cci15,bollinger15,heiken15,paverage2,slope3,slope4,slope5,slope10,slope20,slope30,open,high,low,close,Order
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
2019-01-03 04:00:00,0.00188,0.00275,0.00551,0.00217,0.00238,0.00394,116.120555,114.546028,114.546028,112.343551,106.551739,98.987632,10.898662,10.898662,10.898662,10.898662,10.898662,0.000960,0.001207,0.000572,-0.000387,-205.165477,-5.383602e+05,-3.527579e+05,-2.876076e+05,-2.872451e+05,-0.001378,0.098993,1.131642,1.136502,1.136600,0.000950,0.000834,0.000552,0.000278,-0.000587,-0.000545,1.13648,1.13678,1.13603,1.13672,2
2019-01-03 05:00:00,0.00028,0.00125,0.00212,0.00177,0.00154,0.00175,102.074713,105.840535,105.904983,105.904983,103.400736,99.662728,-17.014446,-17.014446,-17.014446,-17.014446,-17.014446,0.002267,0.000405,0.000652,0.000018,-218.978749,-5.356926e+05,-3.475519e+05,-2.774285e+05,-2.770060e+05,-0.001268,0.099087,1.131641,1.136370,1.136405,0.000315,0.000559,0.000612,0.000267,-0.000454,-0.000533,1.13672,1.13698,1.13569,1.13609,1
2019-01-03 06:00:00,0.00074,0.00141,0.00238,0.00363,0.00290,0.00267,93.517349,98.628409,98.340193,98.335937,98.335937,98.335937,1.123596,1.123596,1.123596,1.123596,1.123596,0.003920,0.003264,0.001400,0.001647,-193.996160,-5.199766e+05,-3.324089e+05,-2.624406e+05,-2.623219e+05,-0.001106,0.108301,1.131489,1.136607,1.136655,-0.000085,0.000212,0.000409,0.000281,-0.000345,-0.000514,1.13608,1.13765,1.13548,1.13722,2
2019-01-03 07:00:00,0.00024,0.00048,0.00115,0.00575,0.00337,0.00264,85.555899,92.521563,92.721935,93.686463,93.686463,93.686463,-3.084416,-3.049759,-3.049759,-3.049759,-3.049759,0.002310,0.003690,0.003035,0.001171,-213.093895,-4.942745e+05,-3.126947e+05,-2.398532e+05,-2.389569e+05,-0.000980,0.114046,1.131405,1.137005,1.137090,0.000435,0.000170,0.000283,0.000324,-0.000228,-0.000491,1.13722,1.13768,1.13616,1.13696,1
2019-01-03 08:00:00,0.00191,0.00128,0.00152,0.00403,0.00679,0.00441,102.008523,103.633838,102.206053,101.091477,101.091477,101.091477,9.485095,5.255255,5.200594,5.200594,5.200594,0.003041,0.003227,0.004608,0.003952,-138.648145,-4.564050e+05,-2.748252e+05,-2.019838e+05,-2.037739e+05,-0.000811,0.123436,1.131422,1.137185,1.137480,0.000350,0.000337,0.000193,0.000335,-0.000105,-0.000467,1.13696,1.13838,1.13540,1.13800,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-30 10:00:00,0.00026,0.00044,0.00008,0.00020,0.00031,0.00013,89.949797,89.949797,89.949797,87.747550,85.938029,87.486662,-38.655462,-38.655462,-38.655462,-38.655462,-38.655462,0.000354,0.000400,0.000672,-0.000544,-1796.530488,-5.600036e+06,-4.719557e+06,-4.432643e+06,-4.234886e+06,0.000115,0.021346,1.100650,1.101622,1.101655,0.000305,0.000268,0.000133,0.000038,0.000063,0.000035,1.10191,1.10202,1.10114,1.10142,1
2020-01-30 11:00:00,0.00062,0.00112,0.00130,0.00077,0.00106,0.00117,96.346068,96.346068,96.346068,96.346068,93.874753,93.874753,11.188811,11.188811,11.188811,11.188811,11.188811,0.000817,0.001135,0.001181,0.001454,-1790.949739,-5.591945e+06,-4.711796e+06,-4.424883e+06,-4.227125e+06,0.000168,0.024368,1.100666,1.101880,1.101850,0.000070,0.000177,0.000195,0.000063,0.000055,0.000042,1.10143,1.10241,1.10140,1.10228,2
2020-01-30 12:00:00,0.00038,0.00061,0.00111,0.00056,0.00076,0.00105,95.154794,94.340953,94.340953,94.340953,94.340953,94.340953,10.489510,10.489510,10.489510,10.489510,10.489510,0.000890,0.000808,0.001126,0.001172,-1795.380214,-5.579085e+06,-4.699404e+06,-4.412972e+06,-4.215214e+06,0.000209,0.027296,1.100664,1.102320,1.102275,0.000145,0.000149,0.000194,0.000095,0.000060,0.000052,1.10229,1.10272,1.10200,1.10227,1
2020-01-30 13:00:00,0.00112,0.00065,0.00088,0.00120,0.00083,0.00103,111.452022,110.214613,109.745487,109.745487,109.745487,109.745487,7.558140,7.558140,7.558140,7.558140,7.558140,0.001299,0.001135,0.001053,0.001371,-1785.398558,-5.564054e+06,-4.684373e+06,-4.398257e+06,-4.200851e+06,0.000258,0.031401,1.100642,1.102435,1.102405,0.000350,0.000219,0.000197,0.000118,0.000061,0.000064,1.10228,1.10318,1.10174,1.10254,2


Go retrieve data at the files