In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas_ta as ta  
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense

In [3]:
# Load the dataset
file_path = 'C:\\Users\\vaish\\OneDrive\\UH\\Project\\bitcoin-historical-data.csv'
df = pd.read_csv(file_path)

In [4]:
# Preprocessing
df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')  # Convert 'Timestamp' to datetime
df = df.set_index('Timestamp')  # Set 'Timestamp' as the index

In [5]:
# Calculate Market Indicators

# 1. Moving Averages
df['SMA_30'] = df['Close'].rolling(window=30).mean()  # 30-day Simple Moving Average
df['EMA_30'] = df['Close'].ewm(span=30, adjust=False).mean()  # 30-day Exponential Moving Average

# 2. Relative Strength Index (RSI)
df['RSI'] = df.ta.rsi(length=14) # 14-period RSI

# 3. Bollinger Bands
bb = df.ta.bbands(length=20, std=2)

# Assign the individual bands to the DataFrame
df['BB_lower'] = bb['BBL_20_2.0']  # Lower Bollinger Band
df['BB_middle'] = bb['BBM_20_2.0']  # Middle Bollinger Band
df['BB_upper'] = bb['BBU_20_2.0']  # Upper Bollinger Band

# Drop any NaN values
df.dropna(inplace=True)

In [None]:
# Prepare data for machine learning models
features = ['Close', 'SMA_30', 'EMA_30', 'RSI', 'BB_upper', 'BB_lower']
X_data = df[features].values
y_data = df['Close'].values  # Target variable: Bitcoin closing price

# Time-Based Split (First 80% train, last 20% test) 
split_index = int(0.8 * len(df))
X_train, X_test = X_data[:split_index], X_data[split_index:]
y_train, y_test = y_data[:split_index], y_data[split_index:]

# Initialize & Train Models

def evaluate(y_test, y_pred, model_name):
    """ Compute and print RMSE, MAE, and MAPE """
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    print(f"{model_name} - RMSE: {rmse:.2f}, MAE: {mae:.2f}, MAPE: {mape:.2f}%")
    return y_pred