In [1]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:

# Bitcoin Data
btc_ticker = "BTC-USD"
eth_ticker = "ETH-USD"
start_date = "2024-01-01"
SPLIT_PERCENTAGE = 0.8
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
btc_df = yf.download(btc_ticker, start=start_date, end=end_date)
eth_df = yf.download(eth_ticker, start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [3]:
btc_df.sort_index(inplace=True)
eth_df.sort_index(inplace=True)

In [4]:
import numpy as np
def calculate_atr(df):
    df['High_Low'] = df['High'] - df['Low']
    df['High_Closing'] = np.abs(df['High'] - df['Close'].shift(1))
    df['Low_Closing'] = np.abs(df['Low'] - df['Close'].shift(1))
    df['ATR'] = df[['High_Low', 'High_Closing', 'Low_Closing']].max(axis=1).rolling(window=20).mean()
    df['Upper_ATR'] = df['Close'] + (2 * df['ATR'])
    df['Lower_ATR'] = df['Close'] - (2 * df['ATR'])
    return df.dropna()
btc_df = calculate_atr(btc_df)
eth_df = calculate_atr(eth_df)

In [5]:
btc_df_train, btc_df_test = train_test_split(btc_df, train_size=SPLIT_PERCENTAGE, shuffle=False)
eth_df_train, eth_df_test = train_test_split(eth_df, train_size=SPLIT_PERCENTAGE, shuffle=False)

In [6]:
import numpy as np
import statsmodels.api as sm
def kernel_regression(df, bandwidth):
    X_kr = np.arange(len(df)).reshape(-1, 1)
    y_kr = df['Close'].values
    kernel_model = sm.nonparametric.KernelReg(endog=y_kr, exog=X_kr, var_type='c', bw=[bandwidth])
    return kernel_model

def forecasting_prices(df, kernel_model):
    X_kr = np.arange(len(df)).reshape(-1, 1)
    y_kr = df['Close'].values
    y_hat, _ = kernel_model.fit(X_kr)
    future_predictions, _ = kernel_model.fit(np.arange(df.shape[0]).reshape(-1, 1)) # Next X Days depending the size of Testing Data
    return y_hat, df.index, future_predictions, y_kr

btc_model = kernel_regression(btc_df_train, bandwidth=5)
eth_model = kernel_regression(eth_df_train, bandwidth=5)


btc_y_hat, btc_future_dates, btc_future_predictions, btc_observed = forecasting_prices(btc_df_test, btc_model)
eth_y_hat, eth_future_dates, eth_future_predictions, eth_observed = forecasting_prices(btc_df_test, eth_model)

In [7]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
def plot_results(df, y_hat, future_dates, future_predictions, observed_test_data, title):
    fig = make_subplots(rows=1, cols=1)
    fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='markers', name='Actual Prices'))
    fig.add_trace(go.Scatter(x=df.index, y=df['Upper_ATR'], mode='lines', name='Upper ATR Range'))
    fig.add_trace(go.Scatter(x=df.index, y=df['Lower_ATR'], mode='lines', name='Lower ATR Range'))
    fig.add_trace(go.Scatter(x=df.index, y=y_hat, mode='lines', name='Kernel Regression'))
    fig.add_trace(go.Scatter(x=future_dates, y=future_predictions, mode='markers', name='Future Predictions'))
    fig.add_trace(go.Scatter(x=future_dates, y=observed_test_data, mode='markers', name='Observed Price'))
    
    y_min = min(df['Close'].min(), df['Lower_ATR'].min(), future_predictions.min())
    y_max = max(df['Close'].max(), df['Upper_ATR'].max(), future_predictions.max())
    fig.update_yaxes(range=[y_min * 0.9, y_max * 1.1])
    fig.show()

plot_results(btc_df_train, btc_y_hat, btc_future_dates, btc_future_predictions, btc_df_test['Close'], 'Bitcoin Price Prediction using ATR and Ker')
plot_results(eth_df_train, eth_y_hat, eth_future_dates, eth_future_predictions, eth_df_test['Close'], 'Ethereum Price Prediction using ATR ')

# Performance

In [8]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
def evaluate_model(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return r2, mae, rmse, mape

btc_r2, btc_mae, btc_rmse, btc_mape = evaluate_model(btc_df_test['Close'], btc_y_hat)
eth_r2, eth_mae, eth_rmse, eth_mape = evaluate_model(eth_df_test['Close'], eth_y_hat)
print("\nBitcoin Model Performance Metrics:")
print(f"R-squared Score: {btc_r2:.4f}")

print(f"Mean Absolute Error: ${btc_mae:.2f}")
print(f"Root Mean Squared Error: ${btc_rmse:.2f}")
print(f"Mean Absolute Percentage Error: {btc_mape:.2f}%")
print("\nEthereum Model Performance Metrics:")
print(f"R-squared Score: {eth_r2:.4f}")
print(f"Mean Absolute Error: ${eth_mae:.2f}")
print(f"Root Mean Squared Error: ${eth_rmse:.2f}")
print(f"Mean Absolute Percentage Error: {eth_mape:.2f}%")


Bitcoin Model Performance Metrics:
R-squared Score: -12.8622
Mean Absolute Error: $13974.59
Root Mean Squared Error: $15811.24
Mean Absolute Percentage Error: 22.20%

Ethereum Model Performance Metrics:
R-squared Score: -4.0554
Mean Absolute Error: $719.24
Root Mean Squared Error: $768.98
Mean Absolute Percentage Error: 23.25%


In [9]:
from scipy import stats
import numpy as np
confidence_level = 0.95
def calculate_confidence_interval(df, y_hat):
    degrees_of_freedom = len(df) - 2
    t_value = stats.t.ppf((1 + confidence_level) / 2, degrees_of_freedom)
    residuals = df['Close'] - y_hat
    std_error = np.sqrt(np.sum(residuals**2) / degrees_of_freedom)
    margin_of_error = t_value * std_error
    return margin_of_error

btc_margin_of_error = calculate_confidence_interval(btc_df_test, btc_y_hat)
eth_margin_of_error = calculate_confidence_interval(eth_df_test, eth_y_hat)
print(f"\nBitcoin {confidence_level*100}% Confidence Interval: +/- ${btc_margin_of_error:.2f}")
print(f"Ethereum {confidence_level*100}% Confidence Interval: +/- ${eth_margin_of_error:.2f}")


Bitcoin 95.0% Confidence Interval: +/- $32744.87
Ethereum 95.0% Confidence Interval: +/- $1592.55
