### Description: Predicting Ethereum Price with Python and Machine Learning using Scikit-learn and Support Vector Regression

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt

# Load the data
data = pd.read_csv('ethereum_2015-08-07_2024-06-02.csv')


# Check the column names
display(data.columns)

# Handle missing values
data = data.fillna(method='ffill')

# Feature Engineering: Create rolling averages and standard deviations
data['Rolling_Mean'] = data['Price'].rolling(window=5).mean()
data['Rolling_Std'] = data['Price'].rolling(window=5).std()
data['Lagged_Price'] = data['Price'].shift(1)
data = data.dropna()

# Feature selection (assuming 'Price' is the target and other columns are features)
X = data.drop('Price', axis=1)
y = data['Price']

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the model
svm = SVR()

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto'],
    'degree': [2, 3, 4]  # For polynomial kernel
}

# Perform grid search
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print(f"Best parameters: {best_params}")

# Train the model with the best parameters
best_svm = grid_search.best_estimator_

# Predict on the test set
y_pred = best_svm.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared: {r2}")

# Plot the predicted and actual prices
plt.figure(figsize=(20, 5))
plt.plot(y_test.values, color='red', label='Actual')
plt.plot(y_pred, color='blue', label='Prediction')
plt.xlabel('Time')
plt.ylabel('Price in GBP')
plt.title('Prediction vs Actual')
plt.legend()
plt.show()

# Predicting future prices
# Assuming you want to predict the next 'n' days
n = 30
last_values = X_scaled[-1].reshape(1, -1)

future_predictions = []
for _ in range(n):
    next_pred = best_svm.predict(last_values)
    future_predictions.append(next_pred[0])
    
    # Update last_values for next prediction
    next_row = np.append(last_values[:, 1:], next_pred)
    last_values = next_row.reshape(1, -1)

# Plot future predictions
plt.figure(figsize=(10, 5))
plt.plot(range(n), future_predictions, color='green', label='Future Predictions')
plt.xlabel('Days')
plt.ylabel('Price in GBP')
plt.title('Future Price Predictions')
plt.legend()
plt.show()


KeyError: 'Price'