# Prediction Algorithms

The main idea is to predict the next day low, upper, open and close to determine a strategy for the next day

In [1]:
import pandas as pd
from datetime import datetime
from datetime import date 
import numpy as np

from time import sleep
from tqdm.notebook import tqdm, trange

import seaborn as sns
import matplotlib as plt
import plotly.graph_objects as go

from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings("ignore")

In [2]:
#Global Variables
coins=['btc','eth','xrp','ltc','bch','tusd','mana','gnt','bat','dai']

In [3]:
#Opening All databases
data=[]
for i,e in enumerate(coins):
    file=e+'-mxn.csv'
    data.append(pd.read_csv(file))
#Conversion of dates from strin to timestamps
for i in range(len(coins)):
    data[i].date=data[i].date.apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))

In [4]:
#Conversion from strings to numeric
def strnum(x):
    try:
        x=x.replace(',','')
        return float(x)
    except:
        return x

for i in range(len(coins)):
    data[i].open=data[i].open.apply(lambda x: strnum(x))
    data[i].close=data[i].close.apply(lambda x: strnum(x))
    data[i].low=data[i].low.apply(lambda x: strnum(x))
    data[i].high=data[i].high.apply(lambda x: strnum(x))
    data[i].volume=data[i].volume.apply(lambda x: strnum(x))
    

In [5]:
#-----------movingAvg function-----------------
#Calculates the Moving Average

#df - list with the dataframes
#x - index of the coin, e.g. 0- BTC, 1-ETH... of the databases' list who contains the collections of data
#dname - name of the destination column of the moving average calculation (not creared yet)
#oname - name of the origin column of the data to take the information for the moving avarege
#msize - number of registers to calculate the moving average

def movingAvg(df,x,dname,oname,msize):
    df[x][dname] = 0.0
    for i in range(len(df[x])):
        temp=0.0
        if i<msize:
            pass
        else:
            for j in range(msize):
                temp+= data[x][oname][i-msize+j]
            df[x][dname][i]=temp/msize

In [6]:
metricas=['close','open','high','low','volume']

for i in tqdm(range(len(coins))):
    for m in metricas:#tqdm(metricas):
        movingAvg(data,i,'PME12'+ m,m,12)
        movingAvg(data,i,'PME26'+m,m,26)
        data[i]['MACD'+m]=data[i]['PME12'+m]-data[i]['PME26'+m]
        movingAvg(data,i,'MACD9'+m,'MACD'+m,9)
        data[i]['diffMACD'+m]=data[i]['MACD'+m]-data[i]['MACD9'+m]


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [7]:
from sklearn.model_selection import train_test_split as tts
from xgboost import XGBRegressor as XGBR
from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.ensemble import GradientBoostingRegressor as GBR

In [58]:
#Create Dataframe for predictions
preds= pd.DataFrame(columns=metricas)
preds['coin'] =""
cols=preds.columns.tolist()
cols=cols[-1:]+cols[:-1]
preds=preds[cols]
preds

for x in tqdm(range(len(coins))):
    test=data[x][35:len(data[x])-1] #35 is 26 + 9 for all the number of samples who doesn't has Moving Avg  
    dc=['date','open','close','low','high','volume']
    #Adding new row for predictions
    preds=preds.append([0], ignore_index=True).drop(0,axis=1)
    preds=preds[cols]
    preds['coin'][x]=coins[x]
    for m in metricas:
        #Predictions
        X_train, X_test, y_train, y_test=tts(test.drop(columns=dc),test[m], shuffle=True,random_state=100,test_size=.2)
        modelo=XGBR()
        modelo.fit(X_train, y_train)
        preds[m][x]=modelo.predict(data[x][len(data[x])-1:].drop(columns=dc))[0]
        #train_score=modelo.score(X_train, y_train)
        #test_score=modelo.score(X_test, y_test)
        #print ('Score para', coins[x],m,train_score, test_score)

#Getting oppenings to make initial corrections
preds['ropen']=0.0
for i,e in enumerate(coins):
    preds.ropen[i]=data[i]['open'][len(data[i])-1]

preds['corr']=preds.ropen - preds.open
        
#Calculation of investment factors
preds['go']= preds.close - preds.open
preds['go']= preds['go'].apply(lambda x: 1 if x>0 else 0)
preds['ent']=preds.open - (preds.open-preds.low)/2
preds['out']=(preds.high-preds.close)/2+preds.close
preds['rate']=(preds.out/preds.ent-1)*100
preds.go=preds.go*preds.rate
preds.go=preds['go'].apply(lambda x: 1 if x>0 else 0)
preds.rate=preds.rate.apply(lambda x: x.round(1))

preds['enta'] = preds['ent'] + preds['corr']
preds['outa'] = preds['out'] + preds['corr']


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [59]:
sortedcols=['coin','enta','outa','go','rate']
preds[sortedcols]

Unnamed: 0,coin,enta,outa,go,rate
0,btc,208740.0,212239.0,1,1.7
1,eth,5220.73,5184.45,0,-0.7
2,xrp,4.08923,4.0796,0,-0.2
3,ltc,943.057,982.815,1,4.2
4,bch,5236.57,5417.44,0,3.5
5,tusd,22.4674,22.5429,0,0.3
6,mana,0.936769,0.984772,0,5.4
7,gnt,1.24985,1.28394,0,2.8
8,bat,6.02549,6.31132,1,5.3
9,dai,22.7605,23.0153,1,1.1


In [60]:
preds

Unnamed: 0,coin,close,open,high,low,volume,ropen,corr,go,ent,out,rate,enta,outa
0,btc,211356.0,210849.0,214186.0,207694.0,182.823,210317.17,-532.314,1,209272.0,212771.0,1.7,208740.0,212239.0
1,eth,5409.65,5363.11,5107.08,5226.19,3274.52,5289.19,-73.9164,0,5294.65,5258.36,-0.7,5220.73,5184.45
2,xrp,3.94933,4.13109,4.19206,4.02956,10014700.0,4.14,0.00890602,0,4.08033,4.07069,-0.2,4.08923,4.0796
3,ltc,985.917,969.829,995.39,931.963,1390.57,961.99,-7.83861,1,950.896,990.654,4.2,943.057,982.815
4,bch,5161.72,5180.88,5481.85,5100.94,207.298,5276.54,95.6596,0,5140.91,5321.78,3.5,5236.57,5417.44
5,tusd,22.5514,22.8425,22.8995,22.4573,61100.7,22.66,-0.182546,0,22.6499,22.7254,0.3,22.4674,22.5429
6,mana,0.907624,0.917724,0.977368,0.871262,1286460.0,0.96,0.0422757,0,0.894493,0.942496,5.4,0.936769,0.984772
7,gnt,1.22563,1.2277,1.29765,1.22741,400166.0,1.25,0.0223007,0,1.22755,1.26164,2.8,1.24985,1.28394
8,bat,5.57693,5.46943,5.80456,5.3404,109916.0,6.09,0.620574,1,5.40491,5.69075,5.3,6.02549,6.31132
9,dai,22.5455,22.4417,22.7284,22.3227,24917.2,22.82,0.378323,1,22.3822,22.6369,1.1,22.7605,23.0153


In [None]:
#Graphic with Candle Sticks
i=6
fig=go.Figure(data=[go.Candlestick(x=data[i].date,
                                   open=data[i].open,
                                   high=data[i].high,
                                   low=data[i].low,
                                   close=data[i].close
                                  )
                   ]
             )
fig.show()