In [1]:
# Day 5.4 – Permutation Sanity Check
# Purpose:
# Verify whether the absence of return predictability
# is structural or an artifact of temporal alignment.
#
# This notebook performs diagnostic analysis only.
# No model, feature, or validation logic is modified.


In [2]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd

project_root=Path().resolve().parent.parent
sys.path.append(str(project_root))


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [4]:
from return_predictability.src.load_data import load_data



In [5]:
df=load_data("tsla.csv")
price=df["price"]
price.head()

Date
2010-06-29    1.59267
2010-06-30    1.58867
2010-07-01    1.46400
2010-07-02    1.28000
2010-07-06    1.07400
Name: price, dtype: float64

In [6]:
returns=np.log(price).diff()
returns.head()
type(returns)

pandas.core.series.Series

In [7]:
X=pd.DataFrame(
    {
        "lag_1":returns.shift(1),
        "lag_5":returns.shift(5),
    }
)
y=returns.copy()
X.head()

Unnamed: 0_level_0,lag_1,lag_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-06-29,,
2010-06-30,,
2010-07-01,-0.002515,
2010-07-02,-0.081725,
2010-07-06,-0.134312,


In [8]:
mask=X.notnull().all(axis=1)
X_clean=X[mask]
y_clean=y[mask]

In [9]:
X_clean.shape, y_clean.shape


((3887, 2), (3887,))

In [10]:
predictions=[]
actuals=[]

for t in range(1,len(y_clean)):
    X_train=X_clean.iloc[:t]
    y_train=y_clean.iloc[:t]

    X_test=X_clean.iloc[t:t+1]
    y_test=y_clean.iloc[t]

    model=LinearRegression()
    model.fit(X_train,y_train)

    y_pred=model.predict(X_test)
    predictions.append(float(y_pred[0]))
    actuals.append(float(y_test))

In [11]:
r2=r2_score(actuals,predictions)
print(f"Walk-Forward R^2 with Clean Targets: {r2:.6f}")

Walk-Forward R^2 with Clean Targets: -0.014559


In [12]:
print(len(predictions)==len(actuals))
print(predictions[:5])
print(actuals[:5])

True
[0.09990577494050412, 0.03770336995625995, -0.05166324439617888, 0.03603266158229525, 0.10773112291384776]
[-0.003442344190972807, -0.0203170699439377, 0.06196355206706522, 0.08958593367123063, 0.002514470851039452]


In [13]:
X_ext = pd.DataFrame(
    {
        "lag_1": returns.shift(1),
        "lag_5": returns.shift(5),
        "lag_10": returns.shift(10),
        "lag_20": returns.shift(20),
    }
)

mask_ext = X_ext.notnull().all(axis=1)
X_ext_clean = X_ext[mask_ext]
y_ext_clean = y[mask_ext]

In [14]:
predictions_ext = []
actuals_ext = []

for t in range(1, len(y_ext_clean)):
    X_train = X_ext_clean.iloc[:t]
    y_train = y_ext_clean.iloc[:t]

    X_test = X_ext_clean.iloc[t:t+1]
    y_test = y_ext_clean.iloc[t]

    model = LinearRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    predictions_ext.append(float(y_pred[0]))
    actuals_ext.append(float(y_test))

r2_ext = r2_score(actuals_ext, predictions_ext)
print(f"Walk-Forward R^2 with Extended Lags: {r2_ext:.6f}")


Walk-Forward R^2 with Extended Lags: -0.861549


In [15]:
#Under walk-forward validation,
#a linear model trained on multiple lag horizons
#failed to produce positive out-of-sample R².

In [16]:
# next, we perform a permutation sanity check
# to verify whether this lack of predictability
# is structural or an artifact of temporal alignment.

In [17]:
rng=np.random.default_rng(seed=42)
y_permuted=y_clean.copy()
y_permuted[:] = rng.permutation(y_clean.values)


In [18]:
assert (y_clean.index==y_permuted.index).all()

In [19]:
predictions_permuted=[]
actuals_permuted=[]

for t in range(1,len(y_clean)):
    X_train=X_clean.iloc[:t]
    y_train=y_permuted.iloc[:t]

    X_test=X_clean.iloc[t:t+1]
    y_test=y_permuted.iloc[t]

    model=LinearRegression()
    model.fit(X_train,y_train)

    y_pred=model.predict(X_test)
    predictions_permuted.append(float(y_pred[0]))
    actuals_permuted.append(float(y_test))

In [20]:
r2=r2_score(actuals_permuted,predictions_permuted)
print(f"Walk-Forward R^2 with Permuted Targets: {r2:.6f}")

Walk-Forward R^2 with Permuted Targets: -0.007152


In [21]:

# When the target variable was permuted,
# performance remained at a similar level,
# indicating that the observed lack of predictability
# is not a pipeline artifact,
# but rather a consequence of the underlying problem structure.


In [24]:
from sklearn.linear_model import Ridge

predictions_ridge_ext = []
actuals_ridge_ext = []

for t in range(1, len(y_ext_clean)):
    X_train = X_ext_clean.iloc[:t]
    y_train = y_ext_clean.iloc[:t]

    X_test = X_ext_clean.iloc[t:t+1]
    y_test = y_ext_clean.iloc[t]

    model = Ridge(alpha=1.0)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    predictions_ridge_ext.append(float(y_pred[0]))
    actuals_ridge_ext.append(float(y_test))



In [25]:
r2_ridge = r2_score(actuals_ridge_ext, predictions_ridge_ext)
print(f"Walk-Forward R^2 with Ridge: {r2_ridge:.6f}")


Walk-Forward R^2 with Ridge: -0.004456


In [None]:
# Even under L2 regularization and extended lag specifications,
# the linear model failed to achieve positive out-of-sample R².
# This indicates that regularization does not recover
# any stable linear return predictability in this setting.
