In [1]:
import numpy as np
import os
import math
import pandas as pd
from datetime import date as pdate
from sklearn import linear_model
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import statistics

In [2]:
# demo of rank reduction of a correlation matrix

filepath = "./data/sofr_hist_curves2.csv" 

df_data = pd.read_csv(filepath, index_col = ["Tenor"]).T 
corr_matrix = df_data.corr()

egval, egvec = np.linalg.eig(corr_matrix)

egval[0], egvec[0]

(23.24993421972503,
 array([-1.85015760e-01, -5.01612288e-01, -6.54240592e-01, -2.17516006e-01,
         4.32883406e-01, -2.00905550e-01,  1.61403996e-02,  9.01732099e-02,
         3.85052105e-02,  1.59034313e-02, -2.23328962e-02,  2.65428895e-03,
        -2.03718221e-02,  2.50960065e-03,  2.12911553e-03,  5.36264180e-04,
         1.13497929e-05,  4.92228838e-05,  3.73190377e-06,  3.63584925e-05,
         3.63554845e-05,  4.11688081e-05,  1.57374189e-05, -3.01937152e-05]))

In [3]:
# target rank m
m=5
df_eigenvec = pd.DataFrame(egvec[:][:,0:m])     # E 
df_eigenval_sqrt = pd.DataFrame(np.sqrt(np.diag(egval[0:m])).tolist())  # sqrt(lambda)
factor_loadings = pd.DataFrame(np.dot(df_eigenvec, df_eigenval_sqrt).tolist()) # H hat matrix
norms = pd.DataFrame(np.dot(factor_loadings, factor_loadings.T).tolist()) # A matrix


In [4]:
factor_loadings

Unnamed: 0,0,1,2,3,4
0,-0.892112,-0.400177,-0.207369,-0.021021,0.023079
1,-0.924961,-0.377148,-0.039036,0.002977,-0.023037
2,-0.958426,-0.281362,0.036496,0.022951,-0.019005
3,-0.977645,-0.198232,0.065243,0.023697,-0.006247
4,-0.987686,-0.135669,0.075445,0.017507,0.00274
5,-0.993448,-0.085677,0.07374,0.01277,0.009302
6,-0.996544,-0.046524,0.066766,0.00887,0.013136
7,-0.997945,-0.017583,0.059598,0.00408,0.014206
8,-0.998504,0.004075,0.052613,-0.001146,0.013287
9,-0.998668,0.021067,0.044992,-0.006002,0.011066


In [5]:

rescaling = np.diag(np.reciprocal(np.sqrt(np.diag(norms.to_numpy()))))  # C matrix for scaling
factor_loadings_rescaled = pd.DataFrame(np.dot(rescaling, np.dot(df_eigenvec, df_eigenval_sqrt).tolist()))  # H hat scaled = sqrt(C) * E hat * sqrt(lambda hat)
corr_matrix_approx = pd.DataFrame(np.dot(factor_loadings_rescaled, factor_loadings_rescaled.T).tolist())    # A hat - new rank reduced correlation
#print(corr_matrix_approx)
#print(corr_matrix)
print(pd.DataFrame(corr_matrix_approx.to_numpy() - corr_matrix.to_numpy()))

              0             1             2             3             4   \
0  -1.110223e-16  1.284163e-04  1.715903e-05  7.170244e-06  1.324598e-05   
1   1.284163e-04  2.220446e-16  1.236069e-04  1.783065e-04  1.623170e-04   
2   1.715903e-05  1.236069e-04  2.220446e-16  3.259080e-05  6.186431e-05   
3   7.170244e-06  1.783065e-04  3.259080e-05 -1.110223e-16  2.270853e-05   
4   1.324598e-05  1.623170e-04  6.186431e-05  2.270853e-05  2.220446e-16   
5   9.913496e-06  1.148350e-04  4.787281e-05  3.439154e-05  1.143674e-05   
6   1.830759e-05  7.846176e-05  3.687038e-05  4.918908e-05  4.956358e-05   
7   2.871049e-05  5.034875e-05  3.382575e-05  6.195076e-05  7.253711e-05   
8   3.639897e-05  3.186090e-05  3.416434e-05  6.933597e-05  7.924926e-05   
9   4.266864e-05  2.967482e-05  3.662873e-05  7.338912e-05  8.193189e-05   
10  3.890977e-05  3.656113e-05  3.055719e-05  6.507816e-05  7.656684e-05   
11  3.250226e-05  4.997505e-05  2.426106e-05  5.413912e-05  6.898865e-05   
12  2.487314

In [6]:
corr_matrix_approx

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,1.0,0.983675,0.959154,0.93736,0.919506,0.905231,0.893938,0.885218,0.878587,0.873568,...,0.8604,0.85844,0.856348,0.854063,0.851534,0.848721,0.832819,0.821843,0.813578,0.804594
1,0.983675,1.0,0.991792,0.976812,0.961882,0.948242,0.936508,0.927128,0.919755,0.913836,...,0.895672,0.892794,0.889783,0.886601,0.88322,0.879616,0.860518,0.84685,0.836872,0.827482
2,0.959154,0.991792,1.0,0.995865,0.987948,0.979094,0.970624,0.963432,0.957517,0.95255,...,0.935817,0.93308,0.930245,0.927281,0.924174,0.920903,0.903786,0.891097,0.881995,0.873967
3,0.93736,0.976812,0.995865,1.0,0.997881,0.993324,0.988014,0.983059,0.978738,0.974936,...,0.960979,0.958607,0.956152,0.953593,0.950919,0.948116,0.933412,0.922133,0.91409,0.907258
4,0.919506,0.961882,0.987948,0.997881,1.0,0.9987,0.995858,0.992694,0.989687,0.986881,...,0.97557,0.973556,0.971463,0.969276,0.966988,0.964589,0.951889,0.94183,0.934634,0.928662
5,0.905231,0.948242,0.979094,0.993324,0.9987,1.0,0.999189,0.997522,0.995631,0.993698,...,0.984886,0.983227,0.981494,0.979673,0.97776,0.975747,0.964917,0.956045,0.949651,0.944408
6,0.893938,0.936508,0.970624,0.988014,0.995858,0.999189,1.0,0.999542,0.998568,0.997364,...,0.990749,0.98941,0.987999,0.986505,0.984924,0.983249,0.97403,0.966204,0.960499,0.955839
7,0.885218,0.927128,0.963432,0.983059,0.992694,0.997522,0.999542,1.0,0.999727,0.99909,...,0.994235,0.993151,0.991993,0.990752,0.989426,0.988007,0.979997,0.972962,0.967754,0.963494
8,0.878587,0.919755,0.957517,0.978738,0.989687,0.995631,0.998568,0.999727,1.0,0.999812,...,0.996387,0.995509,0.994553,0.993512,0.992383,0.991162,0.984067,0.977633,0.972782,0.968794
9,0.873568,0.913836,0.95255,0.974936,0.986881,0.993698,0.997364,0.99909,0.999812,1.0,...,0.997814,0.997115,0.996333,0.995463,0.994503,0.993447,0.987111,0.981181,0.976615,0.972826


In [7]:
# eigen vectors in one matrix
E_hat = np.array(
    [[0.5703, 0.2977, -.1687],
     [0.4716, 0.6229, 0.3192],
     [0.512, -0.3821, -.6639],
     [0.4361, -.6143, .6549]]
)
# eigen values in one matrix
lambda_hat = np.diag(np.array([2.8, .8889, .3078]))
lambda_hat

array([[2.8   , 0.    , 0.    ],
       [0.    , 0.8889, 0.    ],
       [0.    , 0.    , 0.3078]])

In [8]:
# compute the factor loadings or new H or Principal Components (PC)
H_hat = E_hat @ np.sqrt(lambda_hat)
H_hat @ H_hat.T     # display the resultant correlation matrix

array([[0.99821676, 0.90133025, 0.75094224, 0.49981622],
       [0.90133025, 0.99899681, 0.39929058, 0.30006976],
       [0.75094224, 0.39929058, 0.99944988, 0.70001137],
       [0.49981622, 0.30006976, 0.70001137, 0.99996586]])

In [15]:
# Notice above does not have 1 for the variances whereas the data is standardised e.g. X_1 ~ N(0,1)
# so it should be 1. Perform the following to normalise the corr matrix
norms = H_hat @ H_hat.T
# find scaling matrix
rescaling = np.diag(np.reciprocal(np.sqrt(np.diag(norms))))
# scale H_hat and finc correlation matrix again
H_hat_scaled = rescaling @ H_hat
H_hat_scaled @ H_hat_scaled.T

array([[1.        , 0.90258782, 0.75181952, 0.500271  ],
       [0.90258782, 1.        , 0.39960094, 0.30022551],
       [0.75181952, 0.39960094, 1.        , 0.70021595],
       [0.500271  , 0.30022551, 0.70021595, 1.        ]])

In [20]:
rescaling

array([[1.00089281, 0.        , 0.        , 0.        ],
       [0.        , 1.00050197, 0.        , 0.        ],
       [0.        , 0.        , 1.00027517, 0.        ],
       [0.        , 0.        , 0.        , 1.00001707]])

In [24]:
np.diag(np.sqrt(np.reciprocal(norms)))

array([1.00089281, 1.00050197, 1.00027517, 1.00001707])

---
# Lect7 pg 14 exercise

In [37]:
# eigen vectors in one matrix
E_hat = np.array(
    [[0.5703, 0.2977, -.1687],
     [0.4716, 0.6229, 0.3192],
     [0.512, -0.3821, -.6639],
     [0.4361, -.6143, .6549]]
)
# eigen values in one matrix
lambda_hat = np.diag(np.array([2.8, .8889, .3078]))
lambda_hat

array([[2.8   , 0.    , 0.    ],
       [0.    , 0.8889, 0.    ],
       [0.    , 0.    , 0.3078]])

In [38]:
# compute the factor loadings or new H or Principal Components (PC)
H_hat = E_hat @ np.sqrt(lambda_hat)
norms = H_hat @ H_hat.T     # display the resultant correlation matrix

In [43]:
norms

array([[0.99821676, 0.90133025, 0.75094224, 0.49981622],
       [0.90133025, 0.99899681, 0.39929058, 0.30006976],
       [0.75094224, 0.39929058, 0.99944988, 0.70001137],
       [0.49981622, 0.30006976, 0.70001137, 0.99996586]])

In [44]:
C_sqrt = np.diag(np.diag(np.reciprocal(np.sqrt(norms))))
C_sqrt

array([[1.00089281, 0.        , 0.        , 0.        ],
       [0.        , 1.00050197, 0.        , 0.        ],
       [0.        , 0.        , 1.00027517, 0.        ],
       [0.        , 0.        , 0.        , 1.00001707]])

In [None]:

H_hat_scaled = C_sqrt @ E_hat @ np.sqrt(lambda_hat)
H_hat_scaled

array([[ 0.95514643,  0.2809266 , -0.09367786],
       [ 0.78953386,  0.58757422,  0.17718018],
       [ 0.85697562, -0.36034872, -0.36843126],
       [ 0.72974733, -0.5791811 ,  0.36334293]])

In [42]:
H_hat_scaled @ H_hat_scaled.T

array([[1.        , 0.90258782, 0.75181952, 0.500271  ],
       [0.90258782, 1.        , 0.39960094, 0.30022551],
       [0.75181952, 0.39960094, 1.        , 0.70021595],
       [0.500271  , 0.30022551, 0.70021595, 1.        ]])