# RMT_Spain – Random Matrix Theory Analysis (Spanish Stock Market)

This notebook performs a minimal Random Matrix Theory (RMT) analysis of the
Spanish stock market around the COVID-19 crisis using **log-return data**
stored in `data/processed/log_ret_stocks_spain.xlsx`.

It corresponds to the analysis underlying the article:

Domínguez-Monterroza, A., Jiménez-Martín, A., & Mateos-Caballero, A. (2025).  
*Correlation Structure of the Spanish Stock Market Around COVID-19 Using Random Matrix Theory.*  
Computational Economics, 66, 4543–4558.  
https://doi.org/10.1007/s10614-024-10820-0

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (8, 5)
plt.rcParams['axes.grid'] = False

In [None]:
# Ruta al archivo de log-retornos
data_path = '../data/processed/log_ret_stocks_spain.xlsx'

# Leer el Excel
df = pd.read_excel(data_path)

# La columna Date debe existir
if 'Date' not in df.columns:
    raise ValueError("El archivo debe tener una columna 'Date' con fechas.")

# Convertir a índice de fechas
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date').sort_index()

# Extraer tickers desde encabezado
tickers = df.columns.tolist()

print('Número de activos:', len(tickers))
print('Tickers:', tickers)

log_returns = df.copy()
log_returns.head()

In [None]:
PERIODS = {
    'pre_covid':   ('2018-01-02', '2020-01-31'),
    'during_covid':('2020-02-03', '2021-01-02'),
    'post_covid':  ('2021-03-03', '2023-03-17'),
    'full':        ('2018-01-02', '2023-03-17'),
}
PERIODS

In [None]:
def slice_period(df, start, end):
    return df.loc[start:end]

def marchenko_pastur_bounds(q, sigma2=1.0):
    if q <= 0:
        raise ValueError("q debe ser positivo.")
    lam_min = sigma2 * (1 - np.sqrt(1/q))**2
    lam_max = sigma2 * (1 + np.sqrt(1/q))**2
    return lam_min, lam_max

In [None]:
corr_mats = {}

for name, (start, end) in PERIODS.items():
    sub = slice_period(log_returns, start, end)
    corr = sub.corr()
    corr_mats[name] = corr
    print(f"{name}: {corr.shape}")

In [None]:
rmt_summary = []

for name, (start, end) in PERIODS.items():
    sub = slice_period(log_returns, start, end)
    T = sub.shape[0]
    N = sub.shape[1]
    q = N / T

    corr = corr_mats[name].values

    eigvals, eigvecs = np.linalg.eigh(corr)
    idx = eigvals.argsort()[::-1]
    eigvals = eigvals[idx]
    eigvecs = eigvecs[:, idx]

    lam_min_mp, lam_max_mp = marchenko_pastur_bounds(q)

    rmt_summary.append({
        'period': name,
        'N_assets': N,
        'T_samples': T,
        'q': q,
        'lambda_min_emp': eigvals.min(),
        'lambda_max_emp': eigvals.max(),
        'lambda_min_mp': lam_min_mp,
        'lambda_max_mp': lam_max_mp,
    })

pd.DataFrame(rmt_summary)

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.ravel()

titles = {
    'pre_covid': 'Pre-COVID-19',
    'during_covid': 'During COVID-19',
    'post_covid': 'Post-COVID-19',
    'full': 'Full period'
}

for ax, (name, title) in zip(axes, titles.items()):
    c = corr_mats[name]
    im = ax.imshow(c, vmin=-1, vmax=1, cmap='coolwarm')
    ax.set_title(title)
    ax.set_xticks([])
    ax.set_yticks([])

fig.colorbar(im, ax=axes.tolist(), shrink=0.6)
plt.tight_layout()
plt.show()

In [None]:
period_to_plot = 'during_covid'

start, end = PERIODS[period_to_plot]
sub = slice_period(log_returns, start, end)
T = sub.shape[0]
N = sub.shape[1]
q = N / T

corr = corr_mats[period_to_plot].values
eigvals, _ = np.linalg.eigh(corr)
eigvals = np.sort(eigvals)

lam_min_mp, lam_max_mp = marchenko_pastur_bounds(q)

plt.figure(figsize=(7,4))
plt.hist(eigvals, bins=20, density=True, alpha=0.5, label='Empirical eigenvalues')

lam_grid = np.linspace(eigvals.min(), eigvals.max(), 500)
rho = np.zeros_like(lam_grid)
mask = (lam_grid >= lam_min_mp) & (lam_grid <= lam_max_mp)
rho[mask] = np.sqrt((lam_max_mp - lam_grid[mask]) * (lam_grid[mask] - lam_min_mp)) / (
    2 * np.pi * lam_grid[mask] * (1/q)
)

plt.plot(lam_grid, rho, lw=2, label='Marčenko–Pastur')
plt.axvline(lam_min_mp, color='k', ls='--', lw=1)
plt.axvline(lam_max_mp, color='k', ls='--', lw=1)

plt.title(f'Eigenvalue distribution – {period_to_plot}')
plt.xlabel('Eigenvalue')
plt.ylabel('Density')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
period_to_inspect = 'pre_covid'

start, end = PERIODS[period_to_inspect]
sub = slice_period(log_returns, start, end)
corr = corr_mats[period_to_inspect].values

eigvals, eigvecs = np.linalg.eigh(corr)
idx = eigvals.argsort()[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

v1 = eigvecs[:, 0]

x = np.arange(len(tickers))
plt.figure(figsize=(10,4))
plt.bar(x, v1)
plt.xticks(x, tickers, rotation=90, fontsize=6)
plt.title(f'Eigenvector of largest eigenvalue – {period_to_inspect}')
plt.ylabel('Component')
plt.tight_layout()
plt.show()