In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from pycaret.regression import *
import datetime

In [47]:
SEED = 61

In [60]:
assets = pd.read_csv('archive/BTC-2021min.csv')
assets.head()

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USD
0,1646106180,2022-03-01 03:43:00,BTC/USD,43046.58,43046.58,43046.58,43046.58,0.0,0.0
1,1646106060,2022-03-01 03:41:00,BTC/USD,43018.23,43046.59,43018.23,43046.58,0.142977,6154.673021
2,1646106000,2022-03-01 03:40:00,BTC/USD,43022.24,43022.24,43016.03,43016.03,0.00923,397.037957
3,1646105940,2022-03-01 03:39:00,BTC/USD,43035.16,43035.16,42999.44,42999.44,0.82095,35300.390268
4,1646105880,2022-03-01 03:38:00,BTC/USD,43077.82,43077.82,43049.46,43049.46,0.02221,956.143143


In [61]:
future_days = 1 * (60 * 24)
# Create a new column (the target or dependent variable) which is shifted 'n' units up
assets['Future_Price'] = assets[['close']].shift(-future_days)

#Choosing the features
assets = assets[['close', 'Future_Price']]
#Show the assets data set
assets.head()

Unnamed: 0,close,Future_Price
0,43046.58,37889.43
1,43046.58,37837.18
2,43016.03,37817.73
3,42999.44,37851.62
4,43049.46,37817.04


In [50]:
#Copy the assets data set to a new data set
df = assets.copy()

#Create the independet data set (X)
X = np.array(df[df.columns])
#Remove the last 'n' rows
X = X[:len(assets)-future_days]

#Create the dependent data set (y)
y = np.array(df['Future_Price'])
#Get all of the y values except the last 'n' rows
y = y[:-future_days]

#Split the data into 85% training and 15% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state=SEED)

In [51]:
#Transform the train array in a dataframe
train_data = pd.DataFrame(x_train, columns=df.columns)

train_data.head(7)

Unnamed: 0,close,Future_Price
0,46985.4,46922.01
1,56598.21,56439.38
2,59537.61,59523.54
3,31665.06,31661.87
4,39400.84,39344.62
5,43938.78,44099.78
6,58140.76,58067.97


In [52]:
#Transform the test array in a dataframe
test_data = pd.DataFrame(x_test, columns=df.columns)

test_data.head(7)

Unnamed: 0,close,Future_Price
0,63807.43,63820.18
1,61421.68,61400.14
2,35223.93,35225.0
3,57167.51,57212.22
4,38038.14,38073.67
5,34494.08,34497.3
6,47536.99,47483.98


In [53]:
#Initialize the setup
regression_setup = setup(data=train_data, target='Future_Price', session_id=123, use_gpu=True)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Future_Price
2,Target type,Regression
3,Original data shape,"(519162, 2)"
4,Transformed data shape,"(519162, 2)"
5,Transformed train set shape,"(363413, 2)"
6,Transformed test set shape,"(155749, 2)"
7,Numeric features,1
8,Preprocess,True
9,Imputation type,simple


In [54]:
# best_model = compare_models(sort='r2')
#best model for this database is linear regression
best_model = 'lr'

In [55]:
# create model
model = create_model(best_model)


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,62.9827,9462.1289,97.2735,0.9999,0.0022,0.0014
1,63.7178,9752.6533,98.7555,0.9999,0.0023,0.0014
2,63.4753,9623.6416,98.1002,0.9999,0.0022,0.0014
3,63.0465,9112.1875,95.4578,0.9999,0.0022,0.0014
4,63.385,9688.7471,98.4314,0.9999,0.0022,0.0014
5,63.5868,10663.1143,103.2624,0.9999,0.0023,0.0014
6,63.4214,9714.7959,98.5637,0.9999,0.0022,0.0014
7,62.9169,9275.5518,96.3097,0.9999,0.0022,0.0014
8,62.9042,9582.3818,97.8896,0.9999,0.0023,0.0014
9,63.5787,9852.8408,99.2615,0.9999,0.0023,0.0014


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [56]:
#evaluate model
evaluate_model(model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [57]:
#predict model
useen_predictions = predict_model(model, data=test_data)

useen_predictions.head()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Linear Regression,63.1961,9384.9863,96.8761,0.9999,0.0022,0.0014


Unnamed: 0,close,Future_Price,prediction_label
0,63807.429688,63820.179688,63806.425781
1,61421.679688,61400.140625,61420.816406
2,35223.929688,35225.0,35224.574219
3,57167.511719,57212.21875,57166.890625
4,38038.140625,38073.671875,38038.621094
