# Prediction Algorithms

The main idea is to predict the next day low, upper, open and close to determine a strategy for the next day

In [1]:
import pandas as pd
from datetime import datetime
from datetime import date 
import numpy as np

from time import sleep
from tqdm.notebook import tqdm, trange

import seaborn as sns
import matplotlib as plt
import plotly.graph_objects as go

from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split as tts
from xgboost import XGBRegressor as XGBR
from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.ensemble import GradientBoostingRegressor as GBR

# Functions

In [2]:
#Conversion from strings to numeric
def strnum(x):
    try:
        x=x.replace(',','')
        return float(x)
    except:
        return x

#-----------movingAvg function-----------------
#Calculates the Moving Average

#df - list with the dataframes
#x - index of the coin, e.g. 0- BTC, 1-ETH... of the databases' list who contains the collections of data
#dname - name of the destination column of the moving average calculation (not creared yet)
#oname - name of the origin column of the data to take the information for the moving avarege
#msize - number of registers to calculate the moving average

def movingAvg(df,x,dname,oname,msize):
    df[x][dname] = 0.0
    for i in range(len(df[x])):
        temp=0.0
        if i<msize:
            pass
        else:
            for j in range(msize):
                temp+= data[x][oname][i-msize+j]
            df[x][dname][i]=temp/msize
#-----------------oheDate--------------------
#Makes one hot econfing to date depending day of the week, day of the month, week of year and month of year

def oheDate(df,x):
    df[x]['weekday']=df[x]['date']
    df[x]['weekday']=df[x]['weekday'].apply(lambda z: str(z.weekday()))
    
    #df[x]['calendarweek']=df[x]['date']
    #df[x]['calendarweek']=df[x]['date'].apply(lambda z: str(z.isocalendar()[1]))
    
    #df[x]['dayofmonth']=df[x]['date']
    #df[x]['dayofmonth']=df[x]['date'].apply(lambda z: str(z.day))
    
    #df[x]['month']=df[x]['date']
    #df[x]['month']=df[x]['date'].apply(lambda z: str(z.month))
    
    #Contanating dummies
    ohe=pd.get_dummies(df[x][['weekday']])#,'calendarweek']])#,'dayofmonth','month']])
    df[x]=pd.concat([df[x],ohe],axis=1)
    #Eliminating categorical columns
    df[x].drop(columns=['weekday'], inplace=True)
    

# Load and Transform

In [7]:
#Global Variables
coins=['btc','eth','xrp','ltc','bch','tusd','mana','gnt','bat','dai']

#Opening All databases
data=[]
for i,e in enumerate(coins):
    file=e+'-mxn.csv'
    data.append(pd.read_csv(file))
#Conversion of dates from strin to timestamps
for i in range(len(coins)):
    data[i].date=data[i].date.apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))

# Conversion to numeric 
for i in range(len(coins)):
    data[i].open=data[i].open.apply(lambda x: strnum(x))
    data[i].close=data[i].close.apply(lambda x: strnum(x))
    data[i].low=data[i].low.apply(lambda x: strnum(x))
    data[i].high=data[i].high.apply(lambda x: strnum(x))
    data[i].volume=data[i].volume.apply(lambda x: strnum(x))

#MACD Calculation
metricas=['open','close','high','low','volume']

for i in tqdm(range(len(coins))):
    for m in metricas:#tqdm(metricas):
        movingAvg(data,i,'PMEF'+ m,m,12)
        movingAvg(data,i,'PMES'+m,m,26)
        data[i]['MACD'+m]=data[i]['PMEF'+m]-data[i]['PMES'+m]
        movingAvg(data,i,'MACD9'+m,'MACD'+m,9)
        data[i]['diffMACD'+m]=data[i]['MACD'+m]-data[i]['MACD9'+m]
    oheDate(data,i)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




# Predictior

In [8]:
#Create Dataframe for predictions
preds= pd.DataFrame(columns=metricas)
preds['coin'] =""
cols=preds.columns.tolist()
cols=cols[-1:]+cols[:-1]
preds=preds[cols]
preds

for x in tqdm(range(len(coins))):
    test=data[x][35:len(data[x])-1] #35 is 26 + 9 for all the number of samples who doesn't has Moving Avg  
    dc=['date','open','close','low','high','volume']
    timevars = [ 'weekday_'+ str(x) for x in range(7)]
    #Adding new row for predictions
    preds=preds.append([0], ignore_index=True).drop(0,axis=1)
    preds=preds[cols]
    preds['coin'][x]=coins[x]
    for m in metricas:
        #Predictions
        X_train, X_test, y_train, y_test=tts(test[['PMEF'+m,'PMES'+m,'MACD'+m,'MACD9'+m]+timevars],test[m], shuffle=True,random_state=100,test_size=.2)
        modelo=XGBR()
        modelo.fit(X_train, y_train)
        preds[m][x]=modelo.predict(data[x][len(data[x])-1:][['PMEF'+m,'PMES'+m,'MACD'+m,'MACD9'+m]+timevars])[0]
        #train_score=modelo.score(X_train, y_train)
        #test_score=modelo.score(X_test, y_test)
        #print ('Score para', coins[x],m,train_score, test_score)

#Getting oppenings to make initial corrections
preds['ropen']=0.0
for i,e in enumerate(coins):
    preds.ropen[i]=data[i]['open'][len(data[i])-1]

preds['corr']=preds.ropen - preds.open
        
#Calculation of investment factors
preds['go']= preds.close - preds.open
preds['go']= preds['go'].apply(lambda x: 1 if x>0 else 0)
preds['ent']=preds.open - (preds.open-preds.low)/2
preds['out']=(preds.high-preds.close)/2+preds.close
preds['rate']=(preds.out/preds.ent-1)*100
preds.go=preds.go*preds.rate
preds.go=preds['go'].apply(lambda x: 1 if x>0 else 0)
preds.rate=preds.rate.apply(lambda x: x.round(1))

preds['enta'] = preds['ent'] + preds['corr']
preds['outa'] = preds['out'] + preds['corr']
preds['rrate']= (preds['outa']/preds['enta'] - 1)*100
preds['stoploss'] = preds['low'] + preds['corr']
preds.rrate=preds.rrate.apply(lambda x: x.round(1))

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




# Display Predictions

In [9]:
sortedcols=['coin','enta','outa','stoploss','go','rrate']
preds[sortedcols]

Unnamed: 0,coin,enta,outa,stoploss,go,rrate
0,btc,206127.0,208822.0,203576.0,0,1.3
1,eth,5526.67,5425.55,5633.89,0,-1.8
2,xrp,4.55763,4.59191,4.58527,1,0.8
3,ltc,1016.11,1032.19,1017.7,1,1.6
4,bch,5199.06,5116.57,5043.07,0,-1.6
5,tusd,22.4147,22.5888,22.3793,1,0.8
6,mana,0.897627,0.933422,0.875253,1,4.0
7,gnt,1.23168,1.31748,1.23336,1,7.0
8,bat,5.70262,5.70459,5.61524,0,0.0
9,dai,22.6863,22.8435,22.5226,0,0.7


In [10]:
preds

Unnamed: 0,coin,open,close,high,low,volume,ropen,corr,go,ent,out,rate,enta,outa,rrate,stoploss
0,btc,210221.0,207490.0,213241.0,205119.0,112.222,208678.46,-1543.01,0,207670.0,210365.0,1.3,206127.0,208822.0,1.3,203576.0
1,eth,5193.68,5124.84,5274.73,5408.12,964.861,5419.45,225.767,0,5300.9,5199.78,-1.9,5526.67,5425.55,-1.8,5633.89
2,xrp,4.27474,4.42524,4.24806,4.33001,7230120.0,4.53,0.25526,1,4.30237,4.33665,0.8,4.55763,4.59191,0.8,4.58527
3,ltc,971.56,1007.21,971.23,974.728,913.287,1014.53,42.9702,1,973.144,989.222,1.7,1016.11,1032.19,1.6,1017.7
4,bch,5634.74,5362.36,5430.18,5322.77,122.669,5355.04,-279.697,0,5478.75,5396.27,-1.5,5199.06,5116.57,-1.6,5043.07
5,tusd,22.3859,22.4246,22.6249,22.3153,28171.4,22.45,0.0640518,1,22.3506,22.5248,0.8,22.4147,22.5888,0.8,22.3793
6,mana,0.911815,0.913763,0.936712,0.867068,840523.0,0.92,0.00818495,1,0.889442,0.925238,4.0,0.897627,0.933422,4.0,0.875253
7,gnt,1.13026,1.18565,1.24983,1.13361,299572.0,1.23,0.0997436,1,1.13193,1.21774,7.6,1.23168,1.31748,7.0,1.23336
8,bat,6.09552,5.98774,6.03249,5.92076,17865.1,5.79,-0.305525,0,6.00814,6.01011,0.0,5.70262,5.70459,0.0,5.61524
9,dai,23.1455,23.0418,23.2362,22.8181,4666.1,22.85,-0.295515,0,22.9818,23.139,0.7,22.6863,22.8435,0.7,22.5226


# Graphic

In [None]:
#Graphic with Candle Sticks
i=8
fig=go.Figure(data=[go.Candlestick(x=data[i].date,
                                   open=data[i].open,
                                   high=data[i].high,
                                   low=data[i].low,
                                   close=data[i].close
                                  )
                   ]
             )
fig.show()