In [None]:
"""
Controllo di stazionarietà (ADF test) per ciascuna serie log‑prezzo prima di procedere alla matrice di cointegrazione.
Per ogni ticker stampa:
    ‑ stat ADF e p‑value sui livelli (log‑prezzi)
    ‑ stat ADF e p‑value sulle prime differenze (variazioni percentuali approx.)

Workflow:
1. Scarica gli Adj Close da Yahoo (yfinance)
2. Converte in log‑prezzi
3. Verifica che le serie siano I(1) (radice unitaria sui livelli, stazionarie sulle differenze)
4. Calcola la matrice di cointegrazione Engle‑Granger per tutte le coppie ordinate
"""

import itertools
import warnings
from typing import List

import numpy as np
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

warnings.filterwarnings("ignore", category=FutureWarning)

###################################################
### FUNZIONI
####################################################

def download_log_prices(tickers: List[str], start: str, end: str) -> pd.DataFrame:
    """Scarica gli 'Adj Close' e restituisce i log‑prezzi (colonne=ticker, index=date)."""
    px = (
        yf.download(tickers, start=start, end=end, auto_adjust=False, progress=False)[
            "Adj Close"
        ].dropna(how="all")
    )
    return np.log(px)


def adf_levels_and_diffs(series: pd.Series) -> dict:
    """Esegue ADF sui livelli e sulle prime differenze di *series*."""
    # Livelli
    adf_level_stat, adf_level_p, *_ = adfuller(series.dropna(), regression="c")
    # Prime differenze
    diff = series.diff().dropna()
    adf_diff_stat, adf_diff_p, *_ = adfuller(diff, regression="c")

    return {
        "ADF_prezzi": round(adf_level_stat, 4),
        "ADF_pValue_prezzi": round(adf_level_p, 4),
        "ADF_rendimenti": round(adf_diff_stat, 4),
        "ADF_pValue_rendimenti": round(adf_diff_p, 4),
    }


def engle_granger(y: pd.Series, x: pd.Series):
    """Regressione OLS y~x e ADF sui residui."""
    x_const = sm.add_constant(x)
    model = sm.OLS(y, x_const).fit()
    resid = model.resid.dropna()

    adf_stat, p_val, _, _, crit_vals, _ = adfuller(resid, maxlag=1, regression="c")
    beta = model.params[1]
    return beta, adf_stat, p_val, crit_vals


def full_cointegration_matrix(tickers: List[str], start: str, end: str) -> pd.DataFrame:
    """Calcola il test di cointegrazione Engle‑Granger per ogni coppia ordinata di ticker."""
    logprices = download_log_prices(tickers, start, end)

    results = []
    for i, j in itertools.permutations(range(len(tickers)), 2):  # N×(N‑1) combinazioni
        y = logprices.iloc[:, i].dropna()
        x = logprices.iloc[:, j].reindex_like(y).dropna()

        common_idx = y.index.intersection(x.index)
        if len(common_idx) < 30:
            continue  # dati insufficienti

        beta, adf_stat, p_val, crit = engle_granger(y[common_idx], x[common_idx])

        results.append(
            {
                "Y_ticker": tickers[i],
                "X_ticker": tickers[j],
                "Beta": round(beta, 4),
                "ADF_stat_eps": round(adf_stat, 4),
                "p_value_eps": round(p_val, 4),
                "crit_1%": crit["1%"],
                "crit_5%": crit["5%"],
                "crit_10%": crit["10%"],
            }
        )

    return pd.DataFrame(results).sort_values("p_value_eps").reset_index(drop=True)


def stazionarieta_report(tickers: List[str], start: str, end: str) -> pd.DataFrame:
    """Restituisce DataFrame con risultati ADF su livelli e differenze per ogni ticker."""
    logprices = download_log_prices(tickers, start, end)

    rows = []
    for tkr in tickers:
        res = adf_levels_and_diffs(logprices[tkr])
        res.update({"Ticker": tkr})
        rows.append(res)

    cols = [
        "Ticker",
        "ADF_prezzi",
        "ADF_pValue_prezzi",
        "ADF_rendimenti",
        "ADF_pValue_rendimenti",
    ]
    return pd.DataFrame(rows)[cols]

###################################################
### INPUT
####################################################


if __name__ == "__main__":
    START_DATE = "2021-01-01"
    END_DATE = "2024-12-31"
    TICKERS = ["AAPL","MSFT","AMZN","NVDA","ENEL.MI","INTC","RIOT","BBD",
    ]


###################################################
### OUTPUT
####################################################

    # 1) Report di stazionarietà dei singoli titoli
    adf_table = stazionarieta_report(TICKERS, START_DATE, END_DATE)
    print("\n=== TEST ADF SUI SINGOLI ASSET ===")
    print(adf_table.to_string(index=False))

    # 2) Matrice di cointegrazione
    matrix = full_cointegration_matrix(TICKERS, START_DATE, END_DATE)
    print("\n=== ENGLE-GRANGER ===")
    print(matrix.to_string(index=False))

    # (facoltativo) Salvare i risultati
    # adf_table.to_csv("adf_unitroot_report.csv", index=False)
    # matrix.to_csv("cointegration_matrix.csv", index=False)



=== TEST ADF SUI SINGOLI ASSET ===
 Ticker  ADF_prezzi  ADF_pValue_prezzi  ADF_rendimenti  ADF_pValue_rendimenti
   AAPL     -0.8643             0.7995        -19.5151                    0.0
   MSFT     -1.3575             0.6025        -20.3803                    0.0
   AMZN     -1.0247             0.7441        -31.4617                    0.0
   NVDA      0.0438             0.9620        -32.1010                    0.0
ENEL.MI     -1.3230             0.6186        -18.1907                    0.0
   INTC     -0.7464             0.8344        -30.1917                    0.0
   RIOT     -1.8890             0.3373        -20.7682                    0.0
    BBD     -1.9981             0.2874        -32.2603                    0.0

=== ENGLE-GRANGER ===
Y_ticker X_ticker    Beta  ADF_stat_eps  p_value_eps   crit_1%   crit_5%  crit_10%
    INTC     RIOT  0.2753       -3.9333       0.0018 -3.436887 -2.864426 -2.568307
    RIOT     INTC  3.3997       -3.6056       0.0057 -3.436887 -2.864426 