In [18]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np

from sklearn.linear_model import HuberRegressor, LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler

from sklearn.metrics import r2_score

In [19]:
df = pd.read_csv('VIX and SPX.csv')

df_vix = df[['VIX Index', 'VIX Index.1']].copy()
df_vix = df_vix.rename(columns = {'VIX Index' : 'Time', 'VIX Index.1': 'VIX'})
df_vix['Time'] = pd.to_datetime(df_vix['Time'])
df_vix = df_vix.set_index('Time')

df_spx = df[['SPX Index', 'SPX Index.1']].copy()
df_spx = df_spx.rename(columns = {'SPX Index' : 'Time', 'SPX Index.1': 'SPX'})
df_spx["SPX_Var"] = df_spx["SPX"].rolling(window = 21, min_periods = 2).std()
df_spx['Time'] = pd.to_datetime(df_spx['Time'])
df_spx = df_spx.set_index('Time')

In [20]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df_vix.index,
        y=df_vix["VIX"].diff(),
        mode="lines+markers",
        name="VIX"
    )
)
fig.add_trace(
    go.Scatter(
        x=df_spx.index,
        y=df_spx["SPX_Var"].diff(),
        mode="lines+markers",
        name="SPX"
    )
)

fig.update_layout(
    title="Évolution du VIX vs SPX",
    xaxis_title="Date",
    yaxis_title="Valeur VIX et SPX",
    template="plotly_white"
)

fig.show()


The VIX is the 30-day expected volatility of the SP500 index, more precisely the square root of a 30-day expected realized variance of the index.

It is calculated as a weighted average of out-of-the-money call and put options on the S&P 500:
$$
\text{VIX}^2 
= \frac{2 e^{r \tau}}{\tau} 
\left( 
\int_{0}^{F} \frac{P(K)}{K^2} \, dK 
+ 
\int_{F}^{\infty} \frac{C(K)}{K^2} \, dK 
\right)
$$



In [21]:
df_ = df_spx.join(df_vix, how="left")

In [22]:
df_ = df_.dropna()

In [23]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df_.index,
        y=df_["VIX"].diff(),
        mode="lines+markers",
        name="VIX"
    )
)
fig.add_trace(
    go.Scatter(
        x=df_.index,
        y=df_["SPX_Var"].diff(),
        mode="lines+markers",
        name="SPX"
    )
)

fig.update_layout(
    title="Évolution du VIX vs SPX",
    xaxis_title="Date",
    yaxis_title="Valeur VIX et SPX",
    template="plotly_white"
)

fig.show()


fitter un model et regarder le temps de retour a la moyenne

In [29]:
x = df_[df_['SPX_Var'] > df_['SPX_Var'].rolling(window = 10, min_periods = 1).median()]['VIX'].pct_change().fillna(0).to_numpy(dtype=float)
y = df_[df_['SPX_Var'] > df_['SPX_Var'].rolling(window = 10, min_periods = 1).median()]['SPX_Var'].pct_change().fillna(0).to_numpy(dtype=float)


max_lag = 60
lags = np.arange(-max_lag, max_lag + 1)

# corr( x_{t-lag}, y_t ) ; lag>0 => VIX lead
ccf = []
for L in lags:
    if L > 0:
        a = x[:-L]; b = y[L:]
    elif L < 0:
        k = -L
        a = x[k:]; b = y[:-k]
    else:
        a = x; b = y
    ccf.append(np.corrcoef(a, b)[0, 1])
ccf = np.array(ccf)

# meilleur lag
best_idx = int(np.nanargmax(ccf))
best_lag = int(lags[best_idx])
best_corr = float(ccf[best_idx])

# --- FIG 1: corrélation vs lag (go.Scatter) ---
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=lags, y=ccf, mode='lines+markers', name='corr'))
fig1.add_hline(y=0, line_width=1)
fig1.add_vline(x=0, line_width=1)
fig1.update_layout(
    title=f"Lead–Lag VIX → SPX_Var (lag>0: VIX lead) | max @ lag={best_lag} (ρ={best_corr:.3f})",
    xaxis_title="Lag (périodes)",
    yaxis_title="Corrélation",
    template="plotly_white"
)
fig1.show()

# --- FIG 2: scatter au meilleur lag (go.Scatter) ---
if best_lag > 0:
    Xa = x[:-best_lag]; Ya = y[best_lag:]; xl = f"VIX décalé de {best_lag}"
elif best_lag < 0:
    k = -best_lag
    Xa = x[k:]; Ya = y[:-k]; xl = "VIX"
else:
    Xa = x; Ya = y; xl = "VIX (pas de décalage)"

fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=Xa, y=Ya, mode='markers', name='points'))
fig2.update_layout(
    title=f"Scatter au meilleur lag ({best_lag}) | ρ={np.corrcoef(Xa, Ya)[0,1]:.3f}",
    xaxis_title=xl,
    yaxis_title="SPX_Var",
    template="plotly_white"
)
fig2.show()


In [48]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# --- Séries de base (variations) ---
vix_ch = df_['VIX'].pct_change()
spxvar_ch = df_['SPX_Var'].pct_change()

# Finite masks (pas de NaN/Inf) pour CHAQUE série, séparément
finite_x = vix_ch.replace([np.inf, -np.inf], np.nan).notna().to_numpy()
finite_y = spxvar_ch.replace([np.inf, -np.inf], np.nan).notna().to_numpy()

# Numpy arrays bruts (pas de masque global ici)
x = vix_ch.to_numpy(dtype=float)      # VIX_t
y = spxvar_ch.to_numpy(dtype=float)   # SPXVar_t

# ========= COND PERSONNALISABLE (évaluée à la date t seulement) =========
# Exemple demandé : niveau de SPX_Var > médiane roulante 3j (causale)
cond_series = df_['SPX_Var'] < df_['SPX_Var'].rolling(3, min_periods=3).median()

# Autres exemples possibles :
# cond_series = spxvar_ch > spxvar_ch.rolling(3, min_periods=3).median()  # sur la variation
# cond_series = df_['VIX'] > df_['VIX'].rolling(5, min_periods=5).median() # sur VIX
# cond_series = ((df_['SPX_Var'] > df_['SPX_Var'].rolling(3, min_periods=3).median())
#                & (df_['VIX'] > df_['VIX'].rolling(5, min_periods=5).median()))

cond = cond_series.fillna(False).to_numpy()  # bool à l’instant t
# =========================================================================

max_lag = 60
lags = np.arange(-max_lag, max_lag + 1, dtype=int)

ccf = np.full(lags.shape, np.nan, dtype=float)
n_obs = np.zeros(lags.shape, dtype=int)

# Convention : corr( x_t , y_{t+L} )
# L > 0 : x "lead" y (on compare y à L jours dans le futur)
for i, L in enumerate(lags):
    if L > 0:
        # paires (x_t, y_{t+L}) pour t = 0..N-L-1
        valid_t = finite_x[:-L] & finite_y[L:] & cond[:-L]  # cond évaluée à t uniquement
        a = x[:-L][valid_t]
        b = y[L:][valid_t]
    elif L < 0:
        k = -L
        # paires (x_t, y_{t-L}) == (x_{t+k}, y_t) pour t = 0..N-k-1
        # on choisit l’ancre sur x_{t+k} -> cond évaluée à ces dates-là (indices k:)
        valid_t = finite_x[k:] & finite_y[:-k] & cond[k:]
        a = x[k:][valid_t]
        b = y[:-k][valid_t]
    else:
        valid_t = finite_x & finite_y & cond
        a = x[valid_t]
        b = y[valid_t]

    n = min(len(a), len(b))
    n_obs[i] = n
    if n > 1:
        c = np.corrcoef(a, b)[0, 1]
        if np.isfinite(c):
            ccf[i] = c

# Meilleur lag avec seuil d’observations
min_n = 10
valid = (n_obs >= min_n) & np.isfinite(ccf)
if np.any(valid):
    local_idx = np.nanargmax(ccf[valid])
    global_idx = np.where(valid)[0][local_idx]
    best_lag = int(lags[global_idx])
    best_corr = float(ccf[global_idx])
    best_n = int(n_obs[global_idx])
    title = f"Lead–Lag VIX → SPX_Var | max @ lag={best_lag} (ρ={best_corr:.3f}, n={best_n})"
else:
    title = "Lead–Lag VIX → SPX_Var | aucun lag valide (n < 10 ou corr=NaN)"

# --- Figure ---
fig = go.Figure()
fig.add_trace(go.Scatter(x=lags, y=ccf, mode='lines+markers', name="Corrélation", yaxis="y1"))
fig.add_trace(go.Bar(x=lags, y=n_obs, name="Nb obs", yaxis="y2", opacity=0.3))
fig.update_layout(
    title=title,
    xaxis=dict(title="Lag (jours)"),
    yaxis=dict(title="Corrélation", range=[-1, 1]),
    yaxis2=dict(title="Nb obs", overlaying="y", side="right"),
    template="plotly_white",
    bargap=0.05
)
fig.add_hline(y=0, line_width=1, line_color="black")
fig.add_vline(x=0, line_width=1, line_color="black")
fig.show()


In [None]:
df_['corr'] = df_["SPX"].pct_change().shift(-1).rolling(window = 40, min_periods = 2).corr(df_["VIX"].pct_change())

import numpy as np

# séries de base
x = df_["SPX"].pct_change().shift(-1).to_numpy(dtype=float)  # r_SPX_{t+1}
y = df_["VIX"].pct_change().to_numpy(dtype=float)            # ΔVIX_t

W = 40  # window
corr_trim80 = np.full(len(df_), np.nan, dtype=float)

for i in range(W - 1, len(df_)):
    # fenêtre courante
    a = x[i - W + 1 : i + 1]
    b = y[i - W + 1 : i + 1]

    # drop NaN/inf s’il y en a (sécurité)
    m = np.isfinite(a) & np.isfinite(b)
    a = a[m]; b = b[m]
    if a.size < 2:
        continue

    # seuils 80e percentile sur |a| et |b|
    qa = np.quantile(np.abs(a), 0.80)
    qb = np.quantile(np.abs(b), 0.80)

    # garde seulement les points “petite amplitude” dans CHAQUE série
    m2 = (np.abs(a) <= qa) & (np.abs(b) <= qb)
    a2 = a[m2]; b2 = b[m2]
    if a2.size >= 2:
        corr_trim80[i] = np.corrcoef(a2, b2)[0, 1]

df_["corr_trim80"] = corr_trim80


In [56]:
fig2 = go.Figure()
fig2.add_trace(go.Scatter(y=df_["corr_trim80"], x=df_.index, mode='markers', name='points'))
fig2.update_layout(
    title=f"Scatter au meilleur lag ({best_lag}) | ρ={np.corrcoef(Xa, Ya)[0,1]:.3f}",
    xaxis_title=xl,
    yaxis_title="SPX_Var",
    template="plotly_white"
)
fig2.show()


In [57]:
n = int(len(df_) * 0.7)

df_train, df_test = df_[:n], df_[n:]

X_train = df_train["VIX"].pct_change().fillna(0).values.reshape(-1, 1)
y_train = df_train["SPX"].pct_change().shift(-1).fillna(0).values

X_train, y_train = X_train, y_train

X_test = df_test["VIX"].pct_change().fillna(0).values.reshape(-1, 1)
y_test = df_test["SPX"].pct_change().shift(-1).fillna(0).values

model = Pipeline([
    ("scaler", RobustScaler()),
    ("model", HuberRegressor())
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
print("Test R²:", r2)

Test R²: 0.002706699377706512


In [58]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df_test.index, y=y_test,
    mode="lines", name="Valeurs vraies"
))
fig.add_trace(go.Scatter(
    x=df_test.index, y=y_pred,
    mode="lines", name="Prédictions"
))
fig.update_layout(
    title=f"Comparaison série temporelle (R²={r2_score(y_test, y_pred):.3f})",
    xaxis_title="Temps",
    yaxis_title="SPX return t+1",
    template="plotly_white"
)
fig.show()