### Predict Price of Ethereum
---

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

%matplotlib inline

#### Load Crypto prices in from yfinance
---

In [2]:
df = yf.download('ETH-USD', start='2019-12-09')
df = df.reset_index()
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-12-08,148.743683,152.392929,148.313339,151.264725,151.264725,6441727252
1,2019-12-09,151.224396,151.913940,147.621368,148.225174,148.225174,6744206747
2,2019-12-10,148.179855,148.564468,144.907959,146.267044,146.267044,6859512025
3,2019-12-11,146.320648,147.139206,143.045364,143.608002,143.608002,7037180049
4,2019-12-12,143.615662,145.751648,141.436981,145.604004,145.604004,7890383413
...,...,...,...,...,...,...,...
888,2022-05-14,2014.280640,2063.429932,1956.572998,2056.273926,2056.273926,15457044616
889,2022-05-15,2056.183105,2147.194336,2008.162720,2145.706787,2145.706787,14846088335
890,2022-05-16,2145.836914,2145.836914,1988.811768,2022.725952,2022.725952,21459552191
891,2022-05-17,2022.882324,2113.059814,2015.971558,2090.409180,2090.409180,18509929297


In [3]:
# Create a variable for predicting 'n' days out into the future
projection = 14
# Create a new column called prediction
df['Prediction'] = df[['Close']].shift(-projection)
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Prediction
0,2019-12-08,148.743683,152.392929,148.313339,151.264725,151.264725,6441727252,132.372513
1,2019-12-09,151.224396,151.913940,147.621368,148.225174,148.225174,6744206747,128.614090
2,2019-12-10,148.179855,148.564468,144.907959,146.267044,146.267044,6859512025,129.063889
3,2019-12-11,146.320648,147.139206,143.045364,143.608002,143.608002,7037180049,126.454636
4,2019-12-12,143.615662,145.751648,141.436981,145.604004,145.604004,7890383413,126.361221
...,...,...,...,...,...,...,...,...
888,2022-05-14,2014.280640,2063.429932,1956.572998,2056.273926,2056.273926,15457044616,
889,2022-05-15,2056.183105,2147.194336,2008.162720,2145.706787,2145.706787,14846088335,
890,2022-05-16,2145.836914,2145.836914,1988.811768,2022.725952,2022.725952,21459552191,
891,2022-05-17,2022.882324,2113.059814,2015.971558,2090.409180,2090.409180,18509929297,


In [13]:
df.tail(20)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Prediction
873,2022-04-29,2936.776611,2943.445801,2782.435303,2815.601807,2815.601807,18771041399,2014.418213
874,2022-04-30,2815.533447,2836.827637,2727.40625,2730.186768,2730.186768,13520941867,2056.273926
875,2022-05-01,2729.994141,2838.704346,2728.078857,2827.756104,2827.756104,15332730152,2145.706787
876,2022-05-02,2827.614014,2874.145996,2785.522949,2857.4104,2857.4104,18609741545,2022.725952
877,2022-05-03,2857.152344,2859.193359,2762.118652,2783.476318,2783.476318,13026093219,2090.40918
878,2022-05-04,2783.131104,2956.686768,2779.273682,2940.644775,2940.644775,18186749944,1968.029663
879,2022-05-05,2940.226562,2948.960693,2704.91626,2749.213135,2749.213135,22642925048,
880,2022-05-06,2748.931641,2754.837646,2645.335205,2694.979736,2694.979736,21027599270,
881,2022-05-07,2694.991943,2696.652832,2599.523438,2636.093018,2636.093018,13369276367,
882,2022-05-08,2636.121826,2638.830566,2498.430176,2517.459961,2517.459961,20802269935,


Aim is to predict the last 14 rows of Prediction column using the 'Close' price data

In [5]:
# Create the independent dataset (X)
X = np.array(df[['Close']])
# Remove the last 14 rows of data
X = X[:-projection]
print(X)

[[ 151.26472473]
 [ 148.22517395]
 [ 146.26704407]
 [ 143.60800171]
 [ 145.60400391]
 [ 144.94474792]
 [ 142.86923218]
 [ 143.11499023]
 [ 133.61402893]
 [ 122.60388947]
 [ 133.0921936 ]
 [ 129.32113647]
 [ 129.0660553 ]
 [ 128.13095093]
 [ 132.37251282]
 [ 128.61408997]
 [ 129.06388855]
 [ 126.45463562]
 [ 126.36122131]
 [ 127.21460724]
 [ 128.32270813]
 [ 134.75798035]
 [ 132.63348389]
 [ 129.6108551 ]
 [ 130.80200195]
 [ 127.41017914]
 [ 134.17170715]
 [ 135.06936646]
 [ 136.27677917]
 [ 144.30415344]
 [ 143.54399109]
 [ 141.25813293]
 [ 138.97920227]
 [ 143.96377563]
 [ 142.92710876]
 [ 145.87393188]
 [ 144.22659302]
 [ 165.95535278]
 [ 166.23068237]
 [ 164.39100647]
 [ 170.779953  ]
 [ 175.36567688]
 [ 166.96984863]
 [ 167.12051392]
 [ 169.69715881]
 [ 168.29415894]
 [ 162.92855835]
 [ 163.05117798]
 [ 161.28393555]
 [ 168.07710266]
 [ 170.93089294]
 [ 176.37031555]
 [ 175.05033875]
 [ 184.69047546]
 [ 180.16017151]
 [ 183.6739502 ]
 [ 188.61753845]
 [ 189.86506653]
 [ 189.2505950

In [6]:
# Create the dependent data set
y = df['Prediction'].values
y = y[:-projection]
print(y)

[ 132.37251282  128.61408997  129.06388855  126.45463562  126.36122131
  127.21460724  128.32270813  134.75798035  132.63348389  129.6108551
  130.80200195  127.41017914  134.17170715  135.06936646  136.27677917
  144.30415344  143.54399109  141.25813293  138.97920227  143.96377563
  142.92710876  145.87393188  144.22659302  165.95535278  166.23068237
  164.39100647  170.779953    175.36567688  166.96984863  167.12051392
  169.69715881  168.29415894  162.92855835  163.05117798  161.28393555
  168.07710266  170.93089294  176.37031555  175.05033875  184.69047546
  180.16017151  183.6739502   188.61753845  189.86506653  189.25059509
  204.23023987  212.33908081  222.72605896  223.14651489  228.5785675
  223.52270508  235.85119629  265.40612793  268.09924316  284.21749878
  264.72857666  259.89471436  266.36343384  281.94458008  259.76397705
  257.94946289  265.60061646  262.33172607  273.75415039  265.21643066
  247.81759644  225.68026733  226.75338745  226.76049805  219.84851074
  218.97

In [7]:
# Split the data into 85% training and 15% testing data sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.15)

In [9]:
# Create and train the model
linReg = LinearRegression()

# Train the model
linReg.fit(X_train, y_train)

LinearRegression()

In [10]:
# Test the model using score
linReg_confidence = linReg.score(X_test, y_test)
print('Linear Regression Confidence: ', linReg_confidence)

Linear Regression Confidence:  0.9140084593217322


In [11]:
# Create a variable called X_projection and set it equal to the last 14 rows of data from the original data set
X_projection = np.array(df[['Close']])[-projection:]
print(X_projection)

[[2749.21313477]
 [2694.97973633]
 [2636.09301758]
 [2517.45996094]
 [2245.43041992]
 [2343.51098633]
 [2072.10864258]
 [1961.70153809]
 [2014.41821289]
 [2056.27392578]
 [2145.70678711]
 [2022.72595215]
 [2090.40917969]
 [1968.02966309]]


In [12]:
# Print the linear regression model predictions for the next 14 days
linReg_prediction = linReg.predict(X_projection)
print(linReg_prediction)

[2731.4007507  2679.66775854 2623.49598529 2510.332452   2250.84470978
 2344.40331609 2085.51385449 1980.19701955 2030.48321594 2070.40919014
 2155.7187899  2038.40793064 2102.97065362 1986.23338921]
