# TDA_MILA – Minimal Pipeline (MILA Topological Data Analysis)

Minimal notebook for reproducing the TDA analysis used in:

**Domínguez Monterroza, A., Mateos Caballero, A., Jiménez-Martín, A. (2023).**  
*Topological Data Analysis to Characterize Fluctuations in the Latin American Integrated Market.*  
Applied Computer Sciences in Engineering (WEA 2023), CCIS 1928, Springer.  
https://doi.org/10.1007/978-3-031-46739-4_18


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ripser import ripser
from persim import PersistenceLandscape
from pathlib import Path

plt.rcParams['figure.figsize'] = (8,4)

# Paths
ROOT = Path('..').resolve()
DATA = ROOT / 'data'
PROC = DATA / 'processed'
RES = ROOT / 'results'
PERS = RES / 'persistence'
LAND = RES / 'landscapes'

# Ensure folders
for d in [RES, PERS, LAND]:
    d.mkdir(parents=True, exist_ok=True)

In [None]:
prices_path = PROC / 'mila_prices.parquet'
print('Loading prices from:', prices_path)
prices = pd.read_parquet(prices_path)
prices.head()

In [None]:
log_ret = np.log(prices / prices.shift(1)).dropna()
log_ret.head()

## Sliding windows

In [None]:
def sliding_windows(df, size=50, step=1):
    idx = df.index
    wins = []
    for i in range(0, len(df) - size + 1, step):
        sub = df.iloc[i:i+size]
        wins.append((sub.index, sub))
    return wins

windows = sliding_windows(log_ret, size=50, step=1)
len(windows)

## Compute persistence diagrams for H1

In [None]:
H1_list = []

for k, (dates, dfw) in enumerate(windows):
    X = dfw.values
    out = ripser(X, maxdim=1)
    dgms = out['dgms']
    H1 = dgms[1] if len(dgms) > 1 else np.empty((0,2))
    H1_list.append(H1)
    np.save(PERS / f'H1_window_{k}.npy', H1)

len(H1_list)

## Persistence Landscapes (L1 and L2 norms)

In [None]:
records = []

for k, (dates, dfw) in enumerate(windows):
    H1 = H1_list[k]
    if H1.shape[0] == 0:
        L1 = 0.0
        L2 = 0.0
    else:
        pl = PersistenceLandscape(dgms=[H1])
        L1 = pl.p_norm(1)
        L2 = pl.p_norm(2)

    records.append({
        'window': k,
        'start_date': dates[0],
        'end_date': dates[-1],
        'L1_norm': float(L1),
        'L2_norm': float(L2)
    })

norms = pd.DataFrame(records)
norms.to_parquet(LAND / 'mila_landscape_norms.parquet')
norms.head()

## Plot norms over time

In [None]:
t = pd.to_datetime(norms['end_date'])
L1 = norms['L1_norm']
L2 = norms['L2_norm']

plt.figure(figsize=(10,4))
plt.plot(t, L1, label='L1')
plt.plot(t, L2, label='L2')
plt.legend()
plt.title('Persistence Landscape Norms (H1)')
plt.tight_layout()
plt.show()

## Example: Persistence Landscape for a selected window

In [None]:
example_window = 0
H1 = H1_list[example_window]

if H1.shape[0] > 0:
    pl = PersistenceLandscape(dgms=[H1])
    xs = np.linspace(H1[:,0].min(), H1[:,1].max(), 500)
    ys = pl(xs)

    plt.figure(figsize=(10,4))
    for i in range(ys.shape[0]):
        plt.plot(xs, ys[i,:])
    plt.title(f'Persistence Landscape – Window {example_window}')
    plt.tight_layout()
    plt.show()
else:
    print('No H1 features for this window.')