### Description: Predicting Ethereum Price with Python and Machine Learning using Scikit-learn and Support Vector Regression

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
data = pd.read_csv('ethereum_2015-08-07_2024-06-02.csv')
data = data.set_index(pd.DatetimeIndex(data['Start'].values))
display(data)

# Handle missing values
data = data.fillna(method='ffill')

# Feature selection (assuming 'Price' is the target and other columns are features)
X = data.drop('Price', axis=1)
y = data['Price']

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)




Unnamed: 0,Start,End,Open,High,Low,Close,Volume,Market Cap
2024-06-01,2024-06-01,2024-06-02,2948.742856,2978.306538,2941.392797,2971.467374,1.519628e+10,3.558701e+11
2024-05-31,2024-05-31,2024-06-01,2940.157084,2992.084249,2925.939443,2947.755919,1.673715e+10,3.544224e+11
2024-05-30,2024-05-30,2024-05-31,2953.729354,2981.526565,2913.709414,2940.947921,1.754862e+10,3.542934e+11
2024-05-29,2024-05-29,2024-05-30,3000.900270,3021.146455,2938.025184,2951.882733,1.889175e+10,3.575360e+11
2024-05-28,2024-05-28,2024-05-29,3028.869403,3045.442442,2962.778842,3000.077482,1.966750e+10,3.614046e+11
...,...,...,...,...,...,...,...,...
2015-08-12,2015-08-12,2015-08-13,0.838300,1.012639,0.693657,0.971606,1.547950e+06,5.246035e+07
2015-08-11,2015-08-11,2015-08-12,0.556151,0.888188,0.520658,0.830301,5.029142e+05,3.743174e+07
2015-08-10,2015-08-10,2015-08-11,0.551009,0.572956,0.499706,0.555567,3.593449e+05,3.283582e+07
2015-08-09,2015-08-09,2015-08-10,0.591381,0.690675,0.493933,0.572558,5.493833e+05,3.262780e+07


KeyError: "['Price'] not found in axis"

In [None]:
# Define the model
svm = SVR()

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

# Perform grid search
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print(f"Best parameters: {best_params}")

# Train the model with the best parameters
best_svm = grid_search.best_estimator_


Unnamed: 0,Close,10_Day_Price_Forecast
2024-06-01,2971.467374,2936.321752
2024-05-31,2947.755919,2970.524163
2024-05-30,2940.947921,2869.336887
2024-05-29,2951.882733,2415.723011
2024-05-28,3000.077482,2452.934356
...,...,...
2015-08-12,0.971606,
2015-08-11,0.830301,
2015-08-10,0.555567,
2015-08-09,0.572558,


In [None]:

# Predict on the test set
y_pred = best_svm.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")


[[2.97146737e+03]
 [2.94775592e+03]
 [2.94094792e+03]
 ...
 [1.15041920e+00]
 [9.82203416e-01]
 [8.59668404e-01]]


In [None]:
# Plot the predicted and actual prices
plt.figure(figsize=(20, 5))
plt.plot(y_test.values, color='red', label='Actual')
plt.plot(y_pred, color='blue', label='Prediction')
plt.xlabel('Time')
plt.ylabel('Price in GBP')
plt.title('Prediction vs Actual')
plt.legend()
plt.show()


[[2.93632175e+03]
 [2.97052416e+03]
 [2.86933689e+03]
 ...
 [5.55566633e-01]
 [5.72557778e-01]
 [5.61197639e-01]]


In [None]:
svr_rbf_confidence = svr_rbf.score(x_test, y_test)
print('svr_rbf accuracy:', svr_rbf_confidence)

svr_rbf accuracy: 0.9626574760602388


In [None]:
svm_predict = svr_rbf.predict(x_test)
print(svm_predict)

[8.51829529e+00 3.81301442e+02 1.23290897e+03 9.95635012e+01
 1.68620889e+02 8.34880485e+00 1.04047698e+02 3.34059130e+02
 1.03128541e+02 1.42727539e+02 9.42061354e-01 9.28759746e+01
 1.47991441e+03 3.21154765e+02 1.21400100e+03 8.37933949e+00
 9.61684859e-01 1.13081108e+03 1.00712348e+03 2.83762282e+02
 2.50109946e+03 7.72678479e+00 1.75035385e+02 9.26512316e+00
 6.58793657e+00 1.29136251e+03 2.57108030e+03 1.90062734e+03
 3.56212676e+02 2.57256374e+03 7.86611383e+00 1.30822719e+03
 2.55007064e+03 3.44851971e+02 9.18066752e+00 1.98169569e+00
 1.04775530e+03 2.96991941e+02 6.79800498e+02 1.28736976e+03
 9.09536599e+01 1.66170557e+03 1.03490278e+03 2.70840079e+03
 2.50937220e+03 9.67468525e-01 1.02159641e+03 7.91481253e+00
 1.39180683e+03 1.25052441e+03 1.03809816e+02 1.20205509e+03
 2.15243311e+02 7.63751414e+02 1.20426922e+02 1.29493586e+03
 1.01524602e+00 1.12245626e+02 1.80092221e+02 2.26224778e+02
 1.66145569e+03 1.44681626e+03 1.39408914e+03 1.64124288e+03
 2.92939029e+02 1.446200

In [None]:
print(y_test)

[[1.03312033e+01]
 [4.71728262e+02]
 [9.69921772e+02]
 [1.00506004e+02]
 [1.78883610e+02]
 [9.50998556e+00]
 [1.66375828e+02]
 [3.90874377e+02]
 [1.52798099e+02]
 [1.35786967e+02]
 [1.04779250e+00]
 [1.01044313e+02]
 [1.36767089e+03]
 [3.59341330e+02]
 [1.28683508e+03]
 [9.16543679e+00]
 [6.77456352e-01]
 [1.28236324e+03]
 [8.91094643e+02]
 [2.33606261e+02]
 [2.85679667e+03]
 [9.08583496e+00]
 [1.65154179e+02]
 [1.04181844e+01]
 [6.93376719e+00]
 [1.27370046e+03]
 [2.53807283e+03]
 [2.64892278e+03]
 [3.88007479e+02]
 [2.41524858e+03]
 [8.66411794e+00]
 [1.31334045e+03]
 [2.01974715e+03]
 [4.34495741e+02]
 [8.43638134e+00]
 [1.68653049e+00]
 [1.09706380e+03]
 [2.76546033e+02]
 [7.46363930e+02]
 [1.27262294e+03]
 [7.41287580e+01]
 [1.62888589e+03]
 [1.05144380e+03]
 [2.30803586e+03]
 [2.05985054e+03]
 [1.06091817e+00]
 [1.01652859e+03]
 [9.52081894e+00]
 [1.23137843e+03]
 [1.03075597e+03]
 [1.75957722e+02]
 [1.06904462e+03]
 [2.21920969e+02]
 [6.90990825e+02]
 [1.12388315e+02]
 [1.358595