# Prediction Algorithms

The main idea is to predict the next day low, upper, open and close to determine a strategy for the next day

In [1]:
import pandas as pd
from datetime import datetime
from datetime import date 
import numpy as np

from time import sleep
from tqdm.notebook import tqdm, trange

import seaborn as sns
import matplotlib as plt
import plotly.graph_objects as go

from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split as tts
from xgboost import XGBRegressor as XGBR
from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.ensemble import GradientBoostingRegressor as GBR

# Functions

In [2]:
#Conversion from strings to numeric
def strnum(x):
    try:
        x=x.replace(',','')
        return float(x)
    except:
        return x

#-----------movingAvg function-----------------
#Calculates the Moving Average

#df - list with the dataframes
#x - index of the coin, e.g. 0- BTC, 1-ETH... of the databases' list who contains the collections of data
#dname - name of the destination column of the moving average calculation (not creared yet)
#oname - name of the origin column of the data to take the information for the moving avarege
#msize - number of registers to calculate the moving average

def movingAvg(df,x,dname,oname,msize):
    df[x][dname] = 0.0
    for i in range(len(df[x])):
        temp=0.0
        if i<msize:
            pass
        else:
            for j in range(msize):
                temp+= data[x][oname][i-msize+j]
            df[x][dname][i]=temp/msize
#-----------------oheDate--------------------
#Makes one hot econfing to date depending day of the week, day of the month, week of year and month of year

def oheDate(df,x):
    df[x]['weekday']=df[x]['date']
    df[x]['weekday']=df[x]['weekday'].apply(lambda z: str(z.weekday()))
    
    #df[x]['calendarweek']=df[x]['date']
    #df[x]['calendarweek']=df[x]['date'].apply(lambda z: str(z.isocalendar()[1]))
    
    #df[x]['dayofmonth']=df[x]['date']
    #df[x]['dayofmonth']=df[x]['date'].apply(lambda z: str(z.day))
    
    #df[x]['month']=df[x]['date']
    #df[x]['month']=df[x]['date'].apply(lambda z: str(z.month))
    
    #Contanating dummies
    ohe=pd.get_dummies(df[x][['weekday']])#,'calendarweek']])#,'dayofmonth','month']])
    df[x]=pd.concat([df[x],ohe],axis=1)
    #Eliminating categorical columns
    df[x].drop(columns=['weekday'], inplace=True)
    
    
#--------------------yesterday-----------------
#Takes n "days" to the current day as new column (parameter) for the prediction
def yesterday(df,x,days=1,closev=True,openv=True,highv=True,lowv=True,volumev=True):
    
    #Values selector from the function inputs
    values_names=['close','open','high','low','volume']
    values=[closev,openv,highv,lowv,volumev]
    vals=[]
    for i,e in enumerate(values):
        if e==True:
            vals.append(values_names[i])
    yestvals=[]#List of added variables
    
    #Main set    
    for day in range(days):
        for val in vals:
            y=val+'_'+str(day)
            df[x]['yesterday_'+y]=0.0
            yestvals.append('yesterday_'+y)
            for i in range(len(df[x])-1):
                df[x]['yesterday_'+y][i+1]=df[x][val][i]
    
    return yestvals

# Load and Transform

In [3]:
#Global Variables
coins=['btc','eth','xrp','ltc','bch','tusd','mana','gnt','bat','dai']

#Opening All databases
data=[]
for i,e in enumerate(coins):
    file=e+'-mxn.csv'
    data.append(pd.read_csv(file))
#Conversion of dates from strin to timestamps
for i in range(len(coins)):
    data[i].date=data[i].date.apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))

# Conversion to numeric 
for i in range(len(coins)):
    data[i].open=data[i].open.apply(lambda x: strnum(x))
    data[i].close=data[i].close.apply(lambda x: strnum(x))
    data[i].low=data[i].low.apply(lambda x: strnum(x))
    data[i].high=data[i].high.apply(lambda x: strnum(x))
    data[i].volume=data[i].volume.apply(lambda x: strnum(x))

#MACD Calculation
metricas=['open','close','high','low','volume']

for i in tqdm(range(len(coins))):
    for m in metricas:#tqdm(metricas):
        movingAvg(data,i,'PMEF'+ m,m,12)
        movingAvg(data,i,'PMES'+m,m,26)
        data[i]['MACD'+m]=data[i]['PMEF'+m]-data[i]['PMES'+m]
        movingAvg(data,i,'MACD9'+m,'MACD'+m,9)
        data[i]['diffMACD'+m]=data[i]['MACD'+m]-data[i]['MACD9'+m]
    oheDate(data,i)
    yv=yesterday(data,i,14)



HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




# Predictior Function

In [4]:
def predictor(data=data,end=0):
    #Create Dataframe for predictions
    preds= pd.DataFrame(columns=metricas)
    preds['coin'] =""
    cols=preds.columns.tolist()
    cols=cols[-1:]+cols[:-1]
    preds=preds[cols]
    preds

    for x in tqdm(range(len(coins))):
        test=data[x][35:len(data[x])-1-end] #35 is 26 + 9 for all the number of samples who doesn't has Moving Avg  
        dc=['date','open','close','low','high','volume']
        timevars = [ 'weekday_'+ str(x) for x in range(7)]

        #Adding new row for predictions
        preds=preds.append([0], ignore_index=True).drop(0,axis=1)
        preds=preds[cols]
        preds['coin'][x]=coins[x]

        for m in metricas:
            #Predictions
            X_train, X_test, y_train, y_test=tts(test[['PMEF'+m,'PMES'+m,'MACD'+m,'MACD9'+m]+timevars+yv],test[m], shuffle=True,random_state=100,test_size=.1)
            modelo=XGBR()
            modelo.fit(X_train, y_train)
            preds[m][x]=modelo.predict(data[x][len(data[x])-1-end:len(data[x])-end][['PMEF'+m,'PMES'+m,'MACD'+m,'MACD9'+m]+timevars+yv])[0]
            #train_score=modelo.score(X_train, y_train)
            #test_score=modelo.score(X_test, y_test)
            #print ('Score para', coins[x],m,train_score, test_score)

    #Getting oppenings to make initial corrections

    preds['ropen']=0.0
    for i,e in enumerate(coins):
        preds.ropen[i]=data[i]['open'][len(data[i])-1-end]

    preds['corr']=preds.ropen - preds.open
    preds['pcorr']= (1 - preds.open/preds.ropen)*100
    preds['pcorr']= preds['pcorr'].apply(lambda x: str(x.round(1))+'%')


    #Calculation of investment factors

    preds['go']= preds.close - preds.open
    preds['go']= preds['go'].apply(lambda x: 1 if x>0 else 0)
    preds['buy']=preds.open - (preds.open-preds.low)/3 # 1/3 below the opening regarding the lowest point
    preds['sell']=(preds.high-preds.close)/3+preds.close # 1/3 above the closure regarding the highes point
    preds['rate']=(preds.sell/preds.buy-1)*100
    preds.go=preds.go*preds.rate
    preds.go=preds['go'].apply(lambda x: 1 if x>0 else 0)
    preds.rate=preds.rate.apply(lambda x: x.round(1))

    #Calculuation correction

    preds['buya'] = preds['buy'] + preds['corr']
    preds['sella'] = preds['sell'] # + preds['corr']
    preds['rrate']= (preds['sella']/preds['buya'] - 1)*100
    preds['stoploss'] = preds['low'] + preds['corr']
    preds.rrate=preds.rrate.apply(lambda x: x.round(1))
    preds['go']=preds['rrate']
    preds['go']= preds['go'].apply(lambda x: 1 if x>2 else 0)
    
    return preds


In [5]:
preds=predictor()

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




# Display Predictions

In [6]:
sortedcols=['coin','buya','sella','stoploss','go','rrate']
preds[sortedcols]

Unnamed: 0,coin,buya,sella,stoploss,go,rrate
0,btc,225915.0,230548.0,224332.0,1,2.1
1,eth,7015.2,7442.74,6730.07,1,6.1
2,xrp,4.75054,4.81219,4.61163,0,1.3
3,ltc,1073.07,1093.48,1054.6,0,1.9
4,bch,5534.04,5621.74,5487.3,0,1.6
5,tusd,22.051,22.396,21.9129,0,1.6
6,mana,0.937685,0.972622,0.913055,1,3.7
7,gnt,1.31239,1.3899,1.29717,1,5.9
8,bat,5.50078,5.75251,5.36234,1,4.6
9,dai,22.4861,22.7798,22.3783,0,1.3


In [7]:
preds

Unnamed: 0,coin,open,close,high,low,volume,ropen,corr,pcorr,go,buy,sell,rate,buya,sella,rrate,stoploss
0,btc,226410.0,229774.0,232095.0,224034.0,246.368,226706.94,297.44,0.1%,1,225618.0,230548.0,2.2,225915.0,230548.0,2.1,224332.0
1,eth,7207.22,7281.47,7765.3,6779.52,1694.19,7157.77,-49.4522,-0.7%,1,7064.66,7442.74,5.4,7015.2,7442.74,6.1,6730.07
2,xrp,4.87891,4.77014,4.89628,4.67054,13430900.0,4.82,-0.058912,-1.2%,0,4.80946,4.81219,0.1,4.75054,4.81219,1.3,4.61163
3,ltc,1086.76,1078.9,1122.64,1059.06,2155.43,1082.3,-4.46208,-0.4%,0,1077.53,1093.48,1.5,1073.07,1093.48,1.9,1054.6
4,bch,5549.72,5570.64,5723.93,5479.6,290.543,5557.41,7.6932,0.1%,0,5526.35,5621.74,1.7,5534.04,5621.74,1.6,5487.3
5,tusd,22.2536,22.3399,22.5082,22.0465,70252.5,22.12,-0.133592,-0.6%,0,22.1846,22.396,1.0,22.051,22.396,1.6,21.9129
6,mana,0.94552,0.943765,1.03033,0.908576,2297300.0,0.95,0.00447972,0.5%,1,0.933205,0.972622,4.2,0.937685,0.972622,3.7,0.913055
7,gnt,1.31578,1.33407,1.50155,1.29294,814331.0,1.32,0.00422282,0.3%,1,1.30817,1.3899,6.2,1.31239,1.3899,5.9,1.29717
8,bat,5.58655,5.68451,5.88853,5.37889,77438.8,5.57,-0.0165479,-0.3%,1,5.51733,5.75251,4.3,5.50078,5.75251,4.6,5.36234
9,dai,22.6427,22.7172,22.9049,22.481,14422.1,22.54,-0.102721,-0.5%,0,22.5888,22.7798,0.8,22.4861,22.7798,1.3,22.3783


# Testing to find parameter to buy and sell

In [25]:
def check(n=1):
    
    metricas=['open','close','high','low','volume']
    diffs=[]
    
    for i in tqdm(range(n)):
        preds=predictor(data,i) #Getting the preductions of day i
        
        for c in range(10):
            for m in metricas:
                real=data[c][len(data[c])-i-1:len(data[c])-i].iloc[0][m]
                pred=(preds.iloc[c][m]).round(2)
                error=1-pred/real
                diffs.append([c,m,error.round(2),real,pred])
    return pd.DataFrame(diffs)

In [26]:
z=check(62)

HBox(children=(FloatProgress(value=0.0, max=62.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))





All but volume has less than 2% of error in the prediction. Error in volume is huge

In [12]:
coins=['btc','eth','xrp','ltc','bch','tusd','mana','gnt','bat','dai']
metricas=['open','close','high','low','volume']
for i,e in enumerate(coins):
    for m in metricas:
        print(e,'in',m ,z[(z[0]==i) & (z[1]==m)][2].min().round(2)*100)

btc in open -2.0
btc in close -6.0
btc in high -4.0
btc in low -5.0
btc in volume -866.0
eth in open -2.0
eth in close -9.0
eth in high -4.0
eth in low -7.000000000000001
eth in volume -1023.0
xrp in open -2.0
xrp in close -8.0
xrp in high -6.0
xrp in low -7.000000000000001
xrp in volume -756.0
ltc in open -2.0
ltc in close -10.0
ltc in high -6.0
ltc in low -11.0
ltc in volume -236.0
bch in open -2.0
bch in close -10.0
bch in high -9.0
bch in low -13.0
bch in volume -4753.0
tusd in open -3.0
tusd in close -3.0
tusd in high -4.0
tusd in low -2.0
tusd in volume -365.0
mana in open -4.0
mana in close -10.0
mana in high -11.0
mana in low -6.0
mana in volume -933.0
gnt in open -3.0
gnt in close -9.0
gnt in high -28.000000000000004
gnt in low -8.0
gnt in volume -960.0


AttributeError: 'float' object has no attribute 'round'

In [24]:
z[(z[0]==9) & (z[1]=='open')]

Unnamed: 0,0,1,2,3,4


In [None]:
z

In [20]:
data[9].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Columns: 108 entries, date to yesterday_volume_13
dtypes: datetime64[ns](1), float64(100), uint8(7)
memory usage: 81.3 KB


# Algorthms Intraday Trading

# Graphic

In [None]:
#Graphic with Candle Sticks
i=0
fig=go.Figure(data=[go.Candlestick(x=data[i].date,
                                   open=data[i].open,
                                   high=data[i].high,
                                   low=data[i].low,
                                   close=data[i].close
                                  )
                   ]
             )
fig.show()