<a href="https://colab.research.google.com/github/awaiskhan005/DEEP-LEARNING-AND-FORECASTING/blob/main/Gitty__BTC_Project_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# -*- coding: utf-8 -*-
# BTC Prediction with CRPS evaluation and actual vs predicted plot
import warnings
warnings.filterwarnings('ignore')

# Install necessary packages
!pip install python-binance
# Import required libraries
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from binance.client import Client
import plotly.graph_objects as go
from google.colab import drive



# Binance API keys (replace with your own API key and secret)
api_key = "8gexdTyvvgmMQCqeFH9Uw4kDUc7xdlQ7a8ZGYx|sczMFUD78Wkwe1cNQYNbKPe7c"
api_secret = "g39iujh5i29t6GcMoCY|GXKusnfkE9PMpcZVyNNk1XIO|q2eGumGM6fb9za6DOZ"
client = Client(api_key, api_secret)

Collecting python-binance
  Downloading python_binance-1.0.28-py2.py3-none-any.whl.metadata (13 kB)
Collecting dateparser (from python-binance)
  Downloading dateparser-1.2.1-py3-none-any.whl.metadata (29 kB)
Collecting pycryptodome (from python-binance)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading python_binance-1.0.28-py2.py3-none-any.whl (130 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.3/130.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dateparser-1.2.1-py3-none-any.whl (295 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.7/295.7 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycryptodome

In [None]:


# Mount Google Drive for persistent storage
drive.mount('/content/drive')
base_path = "/content/drive/My Drive/BTC_Predictions/"
os.makedirs(base_path, exist_ok=True)

# Step 1: Data Collection (fetch past 60 days of BTCUSDT 5-min data)
def fetch_data(symbol='BTCUSDT', interval=Client.KLINE_INTERVAL_5MINUTE, lookback='60 day ago UTC'):
    klines = client.get_historical_klines(symbol, interval, lookback)
    data = pd.DataFrame(klines, columns=['Time','Open','High','Low','Close','Volume',
                                        'CloseTime','QuoteAssetVolume','NumberOfTrades',
                                        'TakerBuyBaseAssetVolume','TakerBuyQuoteAssetVolume','Ignore'])
    data['Close'] = data['Close'].astype(float)
    data['Time'] = pd.to_datetime(data['Time'], unit='ms')
    data.set_index('Time', inplace=True)
    return data[['Close']]

# Step 2: Feature Engineering (returns, moving averages, volatility)
def create_features(data):
    data['Return'] = data['Close'].pct_change()
    data['MA7'] = data['Close'].rolling(window=7).mean()
    data['MA21'] = data['Close'].rolling(window=21).mean()
    data['Volatility'] = data['Return'].rolling(window=21).std()
    data.dropna(inplace=True)
    return data

# Step 3: Data Preprocessing (scaling features)
def preprocess_data(data):
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_data = scaler.fit_transform(data)
    return scaled_data, scaler

# Step 4: Create sequences for LSTM training
def create_sequences(data, lookback=48):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:(i+lookback)])
        # Target: closing price at i+lookback
        y.append(data[i + lookback, 0])
    return np.array(X), np.array(y)

# Step 5: Build LSTM model
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Step 6: Monte Carlo simulation for next 24h price paths
def monte_carlo_simulation(initial_price, sigma, time_length=288, num_simulations=100):
    dt = 5 / (24 * 60)  # 5 minutes as fraction of a day
    # Vectorized simulation of price changes
    price_changes = np.random.normal(0, sigma * np.sqrt(dt), size=(num_simulations, time_length))
    price_paths = initial_price * np.cumprod(1 + price_changes, axis=1)
    return price_paths

# === STEP 1: Run now to train model and generate 24h predictions ===
data = fetch_data()
data = create_features(data)
scaled_data, scaler = preprocess_data(data)
X, y = create_sequences(scaled_data, lookback=48)
model = build_lstm_model((X.shape[1], X.shape[2]))
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.1, verbose=0)
# Monte Carlo simulate 100 possible paths for the next 24 hours
sigma = data['Volatility'].mean()
initial_price = data['Close'].iloc[-1]
simulated_paths = monte_carlo_simulation(initial_price, sigma, time_length=288, num_simulations=100)
# Timestamps for the next 24 hours (5-minute intervals)
start_time = data.index[-1] + pd.Timedelta(minutes=5)
future_times = pd.date_range(start=start_time, periods=simulated_paths.shape[1], freq='5T')
# Save predicted paths to Google Drive
predictions_df = pd.DataFrame(simulated_paths.T, index=future_times)
predictions_df.to_csv(os.path.join(base_path, "predicted_paths.csv"))
print("Predictions saved! After 24 hours, run Step 2 to fetch actual prices for comparison.")

# Plot the 100 predicted paths
fig = go.Figure()
for i, path in enumerate(simulated_paths):
    fig.add_trace(go.Scatter(x=future_times, y=path, mode='lines',
                             name=f'Path {i+1}', line=dict(color='blue'), opacity=0.1))
fig.update_layout(title='100 Possible Predicted Bitcoin Price Paths (Monte Carlo)',
                  xaxis_title='Time (Next 24h, 5min intervals)',
                  yaxis_title='BTC Price (USD)',
                  xaxis=dict(tickformat='%d-%b %H:%M', showgrid=True),
                  template='plotly_dark')
fig.show()




MessageError: Error: credential propagation was unsuccessful

In [None]:
# === STEP 2: Run after 24 hours to fetch actual BTC prices ===
def fetch_real_prices():
    klines = client.get_historical_klines('BTCUSDT', Client.KLINE_INTERVAL_5MINUTE, "24 hours ago UTC")
    real_data = pd.DataFrame(klines, columns=['Time','Open','High','Low','Close','Volume',
                                             'CloseTime','QuoteAssetVolume','NumberOfTrades',
                                             'TakerBuyBaseAssetVolume','TakerBuyQuoteAssetVolume','Ignore'])
    real_data['Close'] = real_data['Close'].astype(float)
    real_data['Time'] = pd.to_datetime(real_data['Time'], unit='ms')
    real_data.set_index('Time', inplace=True)
    # Save to CSV for later use in CRPS calculation
    real_data[['Close']].to_csv(os.path.join(base_path, "real_prices.csv"))
    print("Real BTC prices for the last 24h fetched and saved.")
    return real_data[['Close']]


In [None]:
real_data = fetch_real_prices()
print(real_data.head())  # Check if data was fetched


Real BTC prices for the last 24h fetched and saved.
                        Close
Time                         
2025-02-27 16:20:00  85208.02
2025-02-27 16:25:00  84918.01
2025-02-27 16:30:00  84884.40
2025-02-27 16:35:00  84835.87
2025-02-27 16:40:00  84943.39


In [None]:
# === STEP 3: Run after Step 2 to compute CRPS and plot comparison ===
def calculate_crps_for_miner(predicted_paths, real_prices, time_increment=300):
    predicted_paths = np.array(predicted_paths)  # shape: (num_simulations, T)
    real_prices = np.array(real_prices)          # shape: (T,)
    # CRPS: mean over time of [mean(|pred - obs|) - 0.5*mean(|pred_i - pred_j|)]
    abs_diffs = np.abs(predicted_paths - real_prices)
    term1 = np.mean(abs_diffs, axis=0)
    pairwise_diffs = np.abs(predicted_paths[:, None, :] - predicted_paths[None, :, :])
    term2 = 0.5 * np.mean(pairwise_diffs, axis=(0, 1))
    crps_per_time = term1 - term2
    crps_score = crps_per_time.mean()
    return crps_score, crps_per_time

def compute_crps():
    predictions_df = pd.read_csv(os.path.join(base_path, "/content/drive/MyDrive/BTC_Predictions/predicted_paths.csv"), index_col=0, parse_dates=True)
    real_prices_df = pd.read_csv(os.path.join(base_path, "/content/drive/MyDrive/BTC_Predictions/real_prices.csv"), index_col=0, parse_dates=True)
    # Align lengths in case of any mismatch
    if len(real_prices_df) > len(predictions_df):
        real_prices_df = real_prices_df.iloc[:len(predictions_df)]
    elif len(real_prices_df) < len(predictions_df):
        predictions_df = predictions_df.iloc[:len(real_prices_df)]
    predicted_paths = predictions_df.values.T
    real_prices = real_prices_df['Close'].values.flatten()
    crps_score, _ = calculate_crps_for_miner(predicted_paths, real_prices, time_increment=300)
    print(f"CRPS Score: {crps_score:.4f}")
    # Plot predicted vs actual
    fig = go.Figure()
    # Predicted paths (faint lines)
    for i in range(predicted_paths.shape[0]):
        fig.add_trace(go.Scatter(x=predictions_df.index, y=predicted_paths[i],
                                 mode='lines', line=dict(color='steelblue'), opacity=0.2,
                                 name=f'Path {i+1}', showlegend=False))
    # Actual price (highlighted)
    fig.add_trace(go.Scatter(x=real_prices_df.index, y=real_prices_df['Close'],
                             mode='lines', name='Actual Price',
                             line=dict(color='red', width=3)))
    fig.update_layout(title=f'Bitcoin Price: Predicted vs Actual (CRPS={crps_score:.4f})',
                      xaxis_title='Time (Last 24 Hours)',
                      yaxis_title='BTC Price (USD)',
                      xaxis=dict(tickformat='%d-%b %H:%M', showgrid=True),
                      template='plotly_dark')
    fig.show()
    return crps_score

# Usage after 24h:
# real_data = fetch_real_prices()
# crps_value = compute_crps()


In [None]:
import pandas as pd

file_path = "/content/drive/My Drive/BTC_Predictions/real_prices.csv"
real_prices_df = pd.read_csv(file_path, index_col=0, parse_dates=True)
print(real_prices_df.head())  # Should show the actual BTC prices


                        Close
Time                         
2025-02-27 16:20:00  85208.02
2025-02-27 16:25:00  84918.01
2025-02-27 16:30:00  84884.40
2025-02-27 16:35:00  84835.87
2025-02-27 16:40:00  84943.39


In [None]:
crps_value = compute_crps()


CRPS Score: 4509.1608


In [None]:
import pandas as pd

predictions_df = pd.read_csv("/content/drive/My Drive/BTC_Predictions/predicted_paths.csv", index_col=0, parse_dates=True)
real_prices_df = pd.read_csv("/content/drive/My Drive/BTC_Predictions/real_prices.csv", index_col=0, parse_dates=True)

print("Predictions:")
print(predictions_df.describe())

print("\nActual BTC Prices:")
print(real_prices_df.describe())


Predictions:
                  0             1             2             3             4  \
count    288.000000    288.000000    288.000000    288.000000    288.000000   
mean   86714.220929  86558.987030  86577.071148  86420.953077  86581.787425   
std       55.377324     55.888557     38.392119     82.039395     18.781760   
min    86595.182457  86469.963785  86501.585697  86322.257836  86533.283666   
25%    86673.705214  86505.854270  86547.313479  86362.753165  86568.352274   
50%    86732.170118  86553.522274  86573.179373  86383.918931  86580.262567   
75%    86754.493055  86609.511934  86611.412113  86454.824883  86591.235364   
max    86802.553586  86657.523686  86676.555947  86615.639142  86630.344512   

                  5             6             7             8             9  \
count    288.000000    288.000000    288.000000    288.000000    288.000000   
mean   86575.924158  86626.269005  86690.391540  86702.351845  86631.648905   
std       51.035012     34.827135     

In [3]:
pip install properscoring

Collecting properscoring
  Downloading properscoring-0.1-py2.py3-none-any.whl.metadata (6.2 kB)
Downloading properscoring-0.1-py2.py3-none-any.whl (23 kB)
Installing collected packages: properscoring
Successfully installed properscoring-0.1


In [4]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from properscoring import crps_ensemble  # Ensure this is installed via pip install properscoring

def calculate_price_changes_over_intervals(price_paths, interval_steps, absolute_price=False):
    """
    Calculate price changes over specified intervals.
    """
    interval_prices = price_paths[:, ::interval_steps]
    return np.diff(interval_prices, axis=1) if not absolute_price else interval_prices[:, 1:]

def calculate_crps_for_miner(simulation_runs, real_price_path, time_increment):
    """
    Calculate the total CRPS score for miner's simulations over specified intervals.
    """
    scoring_intervals = {
        "5min": 300,
        "30min": 1800,
        "3hour": 10800,
        "24hour_abs": 86400,
    }
    def get_interval_steps(scoring_interval, time_increment):
        return int(scoring_interval / time_increment)
    sum_all_scores = 0.0
    for interval_name, interval_seconds in scoring_intervals.items():
        interval_steps = get_interval_steps(interval_seconds, time_increment)
        simulated_changes = calculate_price_changes_over_intervals(
            simulation_runs, interval_steps, absolute_price=interval_name.endswith("_abs"))
        real_changes = calculate_price_changes_over_intervals(
            real_price_path.reshape(1, -1), interval_steps, absolute_price=interval_name.endswith("_abs"))[0]
        crps_values = [crps_ensemble(real_changes[t], simulated_changes[:, t]) for t in range(len(real_changes))]
        total_crps_interval = np.sum(crps_values)
        sum_all_scores += total_crps_interval
    return sum_all_scores

def compute_crps():
    predictions_df = pd.read_csv("/content/drive/My Drive/BTC_Predictions/predicted_paths.csv", index_col=0, parse_dates=True)
    real_prices_df = pd.read_csv("/content/drive/My Drive/BTC_Predictions/real_prices.csv", index_col=0, parse_dates=True)
    if len(real_prices_df) > len(predictions_df):
        real_prices_df = real_prices_df.iloc[:len(predictions_df)]
    elif len(real_prices_df) < len(predictions_df):
        predictions_df = predictions_df.iloc[:len(real_prices_df)]
    predicted_paths = predictions_df.values.T
    real_prices = real_prices_df['Close'].values.flatten()
    crps_score = calculate_crps_for_miner(predicted_paths, real_prices, time_increment=300)
    print(f"Updated CRPS Score (based on price changes): {crps_score:.4f}")
    fig = go.Figure()
    for i in range(predicted_paths.shape[0]):
        fig.add_trace(go.Scatter(x=predictions_df.index, y=predicted_paths[i],
                                 mode='lines', line=dict(color='steelblue'), opacity=0.2,
                                 name=f'Path {i+1}', showlegend=False))
    fig.add_trace(go.Scatter(x=real_prices_df.index, y=real_prices_df['Close'],
                             mode='lines', name='Actual Price',
                             line=dict(color='red', width=3)))
    fig.update_layout(title=f'Bitcoin Price: Predicted vs Actual (Updated CRPS={crps_score:.4f})',
                      xaxis_title='Time (Last 24 Hours)',
                      yaxis_title='BTC Price (USD)',
                      xaxis=dict(tickformat='%d-%b %H:%M', showgrid=True),
                      template='plotly_dark')
    fig.show()
    return crps_score

# Run CRPS Calculation
total_crps = compute_crps()


Updated CRPS Score (based on price changes): 82832.6867
