In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

  if not hasattr(np, "object"):


In [2]:
nifty_df = pd.read_csv(
    "../data/processed/nifty_sentiment_aligned.csv",
    parse_dates=["date"]
)

nifty_df.head()

Unnamed: 0,date,open,high,low,close,volume,return,asset,finbert_score,vader_score,finbert_lag_1,vader_lag_1,finbert_lag_2,vader_lag_2,finbert_lag_3,vader_lag_3,finbert_lag_5,vader_lag_5
0,2024-10-10,25067.050781,25134.050781,24979.400391,24998.449219,261400,0.00066,NIFTY,0.111314,0.369538,0.385222,0.301867,0.827009,0.2561,0.143285,0.290683,0.397322,0.00398
1,2024-10-11,24985.300781,25028.650391,24920.050781,24964.25,210500,-0.001369,NIFTY,0.464328,0.3854,0.111314,0.369538,0.385222,0.301867,0.827009,0.2561,0.277918,-0.022869
2,2024-10-14,25023.449219,25159.75,25017.5,25127.949219,206400,0.006536,NIFTY,0.3112,0.3697,0.464328,0.3854,0.111314,0.369538,0.385222,0.301867,0.143285,0.290683
3,2024-10-15,25186.300781,25212.050781,25008.150391,25057.349609,257200,-0.002814,NIFTY,0.221171,0.242862,0.3112,0.3697,0.464328,0.3854,0.111314,0.369538,0.827009,0.2561
4,2024-10-16,25008.550781,25093.400391,24908.449219,24971.300781,226800,-0.00344,NIFTY,0.489884,0.01658,0.221171,0.242862,0.3112,0.3697,0.464328,0.3854,0.385222,0.301867


In [3]:
nifty_df["volatility"] = nifty_df["return"].rolling(10).std().shift(-1)
nifty_df["log_vol"] = np.log(nifty_df["volatility"] + 1e-6)

nifty_df["abs_return"] = np.abs(nifty_df["return"])
nifty_df["sq_return"] = nifty_df["return"] ** 2

nifty_df["vol_5"] = nifty_df["return"].rolling(5).std()
nifty_df["vol_22"] = nifty_df["return"].rolling(22).std()
nifty_df["vol_60"] = nifty_df["return"].rolling(60).std()

nifty_df["sent_vol"] = nifty_df["finbert_score"] * nifty_df["vol_5"]

nifty_df = nifty_df.dropna().reset_index(drop=True)

In [4]:
features = [
    "finbert_score", "vader_score",
    "abs_return", "sq_return",
    "vol_5", "vol_22", "vol_60",
    "sent_vol"
]

X = nifty_df[features].values
y = nifty_df["log_vol"].values

In [5]:
y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y.reshape(-1,1)).flatten()

ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by StandardScaler.

In [None]:
WINDOW = 30

def make_sequences(X, y, window):
    Xs, ys = [], []
    for i in range(window, len(X)):
        Xs.append(X[i-window:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

Xs, ys = make_sequences(X, y_scaled, WINDOW)

In [None]:
split = int(0.8 * len(Xs))
X_train, X_test = Xs[:split], Xs[split:]
y_train, y_test = ys[:split], ys[split:]

scaler = StandardScaler()
X_train = scaler.fit_transform(
    X_train.reshape(-1, X_train.shape[-1])
).reshape(X_train.shape)

X_test = scaler.transform(
    X_test.reshape(-1, X_test.shape[-1])
).reshape(X_test.shape)

In [None]:
cnn = tf.keras.Sequential([
    tf.keras.layers.Conv1D(64, 3, activation="relu", input_shape=X_train.shape[1:]),
    tf.keras.layers.Conv1D(32, 3, activation="relu"),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(1)
])

cnn.compile(
    optimizer="adam",
    loss=tf.keras.losses.Huber(delta=0.01)
)

cnn.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[EarlyStopping(patience=10, restore_best_weights=True)],
    verbose=1
)

In [None]:
cnn_pred = cnn.predict(X_test).flatten()
cnn_pred = y_scaler.inverse_transform(cnn_pred.reshape(-1,1)).flatten()

actual_vol = np.exp(y_scaler.inverse_transform(y_test.reshape(-1,1)).flatten())
pred_vol = np.exp(cnn_pred)

rmse = np.sqrt(mean_squared_error(actual_vol, pred_vol))
rmse

In [None]:
plt.figure(figsize=(10,4))
plt.plot(actual_vol, label="Actual")
plt.plot(pred_vol, label="Predicted (CNN)")
plt.legend()
plt.title("NIFTY â€” Sentiment-Aware Volatility Forecast")
plt.show()