In [1]:
# Description: Predict price of bitcoin "n"(30) days into the future

In [2]:
#Import the libraries
import pandas as pd
import numpy as np

In [3]:
# Load the data
df = pd.read_csv('Dataset/Bitcoin.csv')

In [4]:
# Show first 10 rows of data
df.head(10)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price,FTimestamp
0,20/12/2015,461.89,462.0,433.83,442.54,16626.49,7430072.73,446.88,42358
1,21/12/2015,442.47,450.0,425.52,438.07,18930.86,8257540.79,436.19,42359
2,22/12/2015,438.14,443.58,433.06,435.48,8146.02,3563201.25,437.42,42360
3,23/12/2015,435.65,445.52,434.43,442.56,7100.23,3125137.43,440.15,42361
4,24/12/2015,443.01,459.54,443.01,455.0,10138.32,4598655.92,453.59,42362
5,25/12/2015,455.78,458.33,448.0,455.51,2877.17,1304947.59,453.55,42363
6,26/12/2015,455.5,457.45,407.25,417.39,26672.12,11386692.92,426.91,42364
7,27/12/2015,417.11,425.42,410.5,421.76,6968.57,2921475.22,419.24,42365
8,28/12/2015,421.78,429.86,417.01,421.46,7560.56,3204101.79,423.79,42366
9,29/12/2015,420.81,433.33,418.55,431.82,10419.59,4444308.02,426.53,42367


In [5]:
#Remove the Timestamp column
df.drop(['Timestamp'],1,inplace=True)

In [6]:
# Show first 7 rows of data for new data
df.head(7)

Unnamed: 0,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price,FTimestamp
0,461.89,462.0,433.83,442.54,16626.49,7430072.73,446.88,42358
1,442.47,450.0,425.52,438.07,18930.86,8257540.79,436.19,42359
2,438.14,443.58,433.06,435.48,8146.02,3563201.25,437.42,42360
3,435.65,445.52,434.43,442.56,7100.23,3125137.43,440.15,42361
4,443.01,459.54,443.01,455.0,10138.32,4598655.92,453.59,42362
5,455.78,458.33,448.0,455.51,2877.17,1304947.59,453.55,42363
6,455.5,457.45,407.25,417.39,26672.12,11386692.92,426.91,42364


In [7]:
# Create a variable for predicting 'n' days into the future
prediction_days = 30 #n = prediction_days = 30

# Create another column for target/dependent variable shifted up
df['Prediction'] = df[['Close']].shift(-prediction_days)

In [8]:
#Remove other columns
df.drop(['High'],1,inplace=True)
df.drop(['Low'],1,inplace=True)
df.drop(['Open'],1,inplace=True)
df.drop(['Volume (BTC)'],1,inplace=True)
df.drop(['Volume (Currency)'],1,inplace=True)
df.drop(['Weighted Price'],1,inplace=True)
df.drop(['FTimestamp'],1,inplace=True)

# Show first 7 rows of the new dataset 
df.head(7)

Unnamed: 0,Close,Prediction
0,442.54,376.76
1,438.07,416.32
2,435.48,409.59
3,442.56,382.55
4,455.0,386.4
5,455.51,402.16
6,417.39,390.9


In [9]:
#Show last 7 rows of the new dataset
df.tail(7)

Unnamed: 0,Close,Prediction
1453,7188.42,
1454,7257.47,
1455,7059.03,
1456,7115.08,
1457,6882.19,
1458,6612.3,
1459,6675.0,


In [10]:
#Create the independent dataset
#Convert the df to a numpy array
X = np.array(df.drop(['Prediction'], 1))

#Remove the last 'n' rows, where n = prediction_days
X = X[:len(df)-prediction_days]

print(X)

[[ 442.54]
 [ 438.07]
 [ 435.48]
 ...
 [8484.63]
 [8499.31]
 [8170.65]]


In [11]:
#Create the dependent dataset
#Convert the df(dataframe) to a numpy array

y = np.array(df['Prediction'])

#Get all the values except the las 'n' rows of the Prediction column
y = y[:-prediction_days]
print(y)

[ 376.76  416.32  409.59 ... 6882.19 6612.3  6675.  ]


In [12]:
#Split the data into 80% training and 20% training
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [13]:
# Set the prediction_day_array to the last 30 rows from the original dataset
Prediction_days_array = np.array(df.drop(['Prediction'], 1))[-prediction_days:]
print(Prediction_days_array)

[[8121.63]
 [8088.44]
 [7611.51]
 [7285.85]
 [7320.39]
 [6908.36]
 [7122.14]
 [7159.22]
 [7527.84]
 [7436.72]
 [7753.69]
 [7550.67]
 [7412.66]
 [7309.64]
 [7300.43]
 [7197.78]
 [7390.42]
 [7541.79]
 [7502.65]
 [7524.26]
 [7340.52]
 [7216.07]
 [7207.42]
 [7188.42]
 [7257.47]
 [7059.03]
 [7115.08]
 [6882.19]
 [6612.3 ]
 [6675.  ]]


In [14]:
# Lets do some machine learning

from sklearn.svm import SVR

#create and train the Support Vector Machine (Regression) using radial basis function
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.00001)

# Train our model
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=1e-05,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [15]:
# Test the model
svr_rbf_confidence = svr_rbf.score(x_test, y_test)
print("SVR_rbf_accuracy:", svr_rbf_confidence*100)

SVR_rbf_accuracy: 81.78693075233015


In [16]:
#Predict the predicted values
svm_prediction = svr_rbf.predict(x_test)
print(svm_prediction)

#Print an new line and print actual values
print('\n')

print(y_test)

[ 3428.63172361  7961.15247992   442.57500893   724.90132396
   894.99142423  9097.95087932  8266.69151877  6121.99015764
  6730.27659883  8878.70738708  9222.92257678  7765.1112952
   427.13044366  7497.71564444   424.40372371  8888.46063374
  7889.72557641  1147.34788409  6577.5330208   2717.48786128
   726.41194852  8438.26355706  6472.98614761   433.29692555
   686.75863754  3884.28861046  6266.05724276   637.44691547
  2905.56926918   693.37703393  8941.45902374  7586.29421094
   832.40664653  1181.76108258   431.88356281  8015.46174792
  6561.55237501   611.80435731   801.12484156  7805.48355162
  4335.0291695   6339.7824438   8244.34696786   423.14333111
  2638.07755911  2905.01992279 10324.91959932  9222.62793763
  7927.62934047  8932.48396954  6156.09649711  8886.91514796
  1200.63279564  7017.9262992   6461.04649333  7848.86893662
  1891.94381948  3863.27905061  5138.54545251  6506.71537925
   626.0826987    463.49019775   441.34629249  3187.2472662
  3890.96724126 10330.7659

In [17]:
# Print prediction for next 'n' days, where n = 30

svm_prediction = svr_rbf.predict(Prediction_days_array)
print(svm_prediction)

# Print new line and actual values of the last 30days
print('\n')

print(df.tail(prediction_days))

[9076.5910763  9157.96172468 7082.80824242 6994.38330476 6807.78990629
 7665.68091993 7872.05075491 7717.81468876 6618.02240901 6463.26963483
 8240.33241484 6717.34751168 6488.5473067  6863.14942349 6912.67225981
 7515.66459268 6535.19480171 6676.02116556 6535.66069182 6604.53783225
 6712.45036153 7409.95255262 7460.50728685 7567.74010232 7161.50029669
 8010.23673366 7895.81782624 7538.54421225 6509.80387264 6652.36700678]


        Close  Prediction
1430  8121.63         NaN
1431  8088.44         NaN
1432  7611.51         NaN
1433  7285.85         NaN
1434  7320.39         NaN
1435  6908.36         NaN
1436  7122.14         NaN
1437  7159.22         NaN
1438  7527.84         NaN
1439  7436.72         NaN
1440  7753.69         NaN
1441  7550.67         NaN
1442  7412.66         NaN
1443  7309.64         NaN
1444  7300.43         NaN
1445  7197.78         NaN
1446  7390.42         NaN
1447  7541.79         NaN
1448  7502.65         NaN
1449  7524.26         NaN
1450  7340.52         NaN