In this Data Science Project, we will predict Bitcoin Price for the next 30 days with Machine Learning model Support Vector Machines(Regression).

In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("bitcoin.csv")
df.head()

Unnamed: 0,Date,Price
0,5/23/2019,7881.84668
1,5/24/2019,7987.371582
2,5/25/2019,8052.543945
3,5/26/2019,8673.21582
4,5/27/2019,8805.77832


In [2]:
# Remove the date column

df.drop(['Date'],1,inplace=True)

In [3]:
predictionDays = 30

# Create another column shifted 'n'  units up
df['Prediction'] = df[['Price']].shift(-predictionDays)

In [4]:
df.head()

Unnamed: 0,Price,Prediction
0,7881.84668,10701.69141
1,7987.371582,10855.37109
2,8052.543945,11011.10254
3,8673.21582,11790.91699
4,8805.77832,13016.23145


In [5]:
df.tail()

Unnamed: 0,Price,Prediction
362,9729.038086,
363,9522.981445,
364,9081.761719,
365,9182.577148,
366,9180.045898,


In [6]:
# Here we will convert the data frame into a numpy array and drop the prediction column

x = np.array(df.drop(['Prediction'],1))

In [7]:
# Remove the last 'n' rows where 'n' is the predictionDays

x = x[:len(df)-predictionDays]

In [8]:
x[-5:]

array([[7189.424805],
       [6881.958496],
       [6880.323242],
       [7117.20752 ],
       [7429.724609]])

In [9]:
y = np.array(df['Prediction'])

# Get all the values except last 'n' rows
y = y[:-predictionDays]

In [10]:
y[-5:]

array([9729.038086, 9522.981445, 9081.761719, 9182.577148, 9180.045898])

In [11]:
# Split the data into 80% training and 20% testing

from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size = 0.2)

In [12]:
# set the prediction Days array equal to last 30 rows from the original data set

predictionDays_array = np.array(df.drop(['Prediction'],1))[-predictionDays:]

In [13]:
predictionDays_array

array([[7550.900879],
       [7569.936035],
       [7679.867188],
       [7795.601074],
       [7807.058594],
       [8801.038086],
       [8658.553711],
       [8864.766602],
       [8988.59668 ],
       [8897.46875 ],
       [8912.654297],
       [9003.070313],
       [9268.761719],
       [9951.518555],
       [9842.666016],
       [9593.896484],
       [8756.430664],
       [8601.795898],
       [8804.477539],
       [9269.987305],
       [9733.72168 ],
       [9328.197266],
       [9377.013672],
       [9670.739258],
       [9726.575195],
       [9729.038086],
       [9522.981445],
       [9081.761719],
       [9182.577148],
       [9180.045898]])

### Creating a Machine Learning Model

In [14]:
from sklearn.svm import SVR

# Create and Train the Support Vector Machine (Regression) 
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.00001) # rbf -> radial basis function
svr_rbf.fit(xtrain, ytrain)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=1e-05,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [15]:
# Test the model

svr_rbf_confidence = svr_rbf.score(xtest,ytest)
print('SVR_RBF accuracy :',svr_rbf_confidence)

SVR_RBF accuracy : 0.14042345234275488


In [16]:
svm_prediction = svr_rbf.predict(xtest)

In [17]:
# Model predictions for the next 30 days

svm_prediction = svr_rbf.predict(predictionDays_array)

In [18]:
svm_prediction

array([7765.83010321, 7782.96954698, 8141.90197474, 8735.21898051,
       8789.83991847, 9118.00206775, 8972.71754771, 8990.44043706,
       8409.13348373, 8872.96882248, 8807.78303005, 8325.45027008,
       7662.70797881, 8170.696253  , 8085.08682441, 8535.16123846,
       9126.54303687, 8844.53126145, 9114.65870595, 7666.14790951,
       8209.32407545, 7882.27353479, 8107.88943514, 8365.22442112,
       8225.23148091, 8219.67083186, 8566.22432044, 7892.87732452,
       7580.06702821, 7582.75582273])

In [19]:
# actual price for bitcoin for last 30 days
df.tail(predictionDays)

Unnamed: 0,Price,Prediction
337,7550.900879,
338,7569.936035,
339,7679.867188,
340,7795.601074,
341,7807.058594,
342,8801.038086,
343,8658.553711,
344,8864.766602,
345,8988.59668,
346,8897.46875,


### KFold cross validation

In [20]:
from sklearn.model_selection import KFold, cross_val_score

In [22]:
svr = SVR()

In [25]:
kf = KFold(10, True, 123)

mses = cross_val_score(svr,x,y,  scoring = "neg_mean_squared_error", cv = kf)

In [35]:
rmses = np.sqrt(abs(mses))
mean_rmse = np.mean(rmses)
mean_rmse

1526.9830799068102

In [36]:
rmses

array([1461.99507016, 1623.78897656, 1302.58198414, 1661.67637976,
       1509.37836698, 1502.48575008, 1465.0704435 , 1683.69051439,
       1726.23664817, 1332.92666533])