In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
btc_df = pd.read_csv(
    "../data/processed/btc_sentiment_aligned.csv",
    parse_dates=["date"]
)

btc_df.head()

In [None]:
# returns
btc_df["log_return"] = np.log(btc_df["close"]).diff()

# realized volatility (PAST only)
btc_df["rv_5"] = btc_df["log_return"].rolling(5).std()
btc_df["rv_22"] = btc_df["log_return"].rolling(22).std()
btc_df["rv_60"] = btc_df["log_return"].rolling(60).std()

# target = next-day volatility (shift target, NOT features)
btc_df["target"] = np.log(btc_df["rv_5"].shift(-1) + 1e-6)

# sentiment smoothing
btc_df["sent_ema_3"] = btc_df["finbert_score"].ewm(span=3).mean()
btc_df["sent_ema_7"] = btc_df["finbert_score"].ewm(span=7).mean()

# interaction
btc_df["sent_vol"] = btc_df["sent_ema_3"] * btc_df["rv_5"]

btc_df = btc_df.dropna().reset_index(drop=True)

In [None]:
features = [
    "rv_5", "rv_22", "rv_60",
    "sent_ema_3", "sent_ema_7",
    "sent_vol"
]

X = btc_df[features].values
y = btc_df["target"].values

In [None]:
y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y.reshape(-1,1)).flatten()
#no

In [None]:
WINDOW = 20


def make_sequences(X, y, window):
    Xs, ys = [], []
    for i in range(len(X) - window):
        Xs.append(X[i:i + window])
        ys.append(y[i + window])
    return np.array(Xs), np.array(ys)


Xs, ys = make_sequences(X, y, WINDOW)
print(Xs.shape, ys.shape)


In [None]:
split = int(0.8 * len(Xs))

X_train, X_test = Xs[:split], Xs[split:]
y_train, y_test = ys[:split], ys[split:]

scaler = StandardScaler()
X_train = scaler.fit_transform(
    X_train.reshape(-1, X_train.shape[-1])
).reshape(X_train.shape)

X_test = scaler.transform(
    X_test.reshape(-1, X_test.shape[-1])
).reshape(X_test.shape)

In [None]:
cnn = tf.keras.Sequential([
    tf.keras.layers.Conv1D(64, 3, activation="relu", padding="causal",
                           input_shape=X_train.shape[1:]),
    tf.keras.layers.Conv1D(32, 3, activation="relu", padding="causal"),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(1)
])

cnn.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=tf.keras.losses.Huber(delta=0.05)
)

cnn.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[EarlyStopping(patience=8, restore_best_weights=True)],
    verbose=1
)

In [None]:
pred = cnn.predict(X_test).flatten()

actual_vol = np.exp(y_test)
pred_vol = np.exp(pred)

rmse = np.sqrt(mean_squared_error(actual_vol, pred_vol))
rmse

In [None]:
plt.figure(figsize=(10, 4))
plt.plot(actual_vol, label="Actual")
plt.plot(pred_vol, label="Predicted (CNN)")
plt.legend()
plt.title("BTC â€” Sentiment-Aware Volatility Forecast (CNN)")
plt.show()