In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

np.random.seed(0)

In [None]:
backtest_date = "2015-01-01"

Z_raw = pd.read_csv("data/Z_raw.csv", index_col="Date")
Z_raw.index = pd.to_datetime(Z_raw.index)

Z = Z_raw[:backtest_date] #training and validation data
Z_holdout = Z_raw[backtest_date:] #test/holdout data

for col in Z.columns:
    #No holdout data is used in computing the deciles
    col_arr, col_bins = pd.qcut(Z[col], q=10, labels=range(10), retbins=True)
    Z.loc[:, col] = pd.to_numeric(col_arr).astype(int)
    
    def map_to_quantile(val):
        return np.argmin(abs(val-col_bins[1:]))

    Z_holdout.loc[:,col] = Z_holdout.loc[:,col].apply(map_to_quantile)

Z_train, Z_val = train_test_split(Z, test_size=0.2, random_state=0)

In [None]:
#correlation matrix over the training and validation periods
Z.corr().round(2)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,5))

pd.concat([Z, Z_holdout]).rename(columns={"vix":"Volatility", "unemployment":"Unemployment",
                                               "inflation":"Inflation", "mort30us":"Mortgage"}).plot(ax=ax, fontsize="large")
ax.set_xlabel("Date", fontsize="x-large")
ax.set_ylabel("Decile", fontsize="x-large")
ax.set_yticks(range(10))
ax.set_yticklabels(range(1,10+1))
plt.axvline(backtest_date, color="black")
plt.tight_layout()
plt.show()

print("The average value of ||z_(t+1)-z_(t)||_1 = {:.2f}.".format(
    Z.diff().dropna().apply(lambda x: np.linalg.norm(x,1), axis=1).mean()))