# Estimate covariance matrix of financial time series (FTS)

In [1]:
import re
import sys
import warnings
import numpy as np # type: ignore
import pandas as pd # type: ignore

sys.path.append('../modules')
import estimate_market_factors as emf # type: ignore

warnings.filterwarnings("ignore")
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
#pd.options.display.max_rows = 277

## Global variables

In [2]:
input_path_raw = "../input_files/raw_data"
input_path_processed = "../input_files/processed_data"
input_path_data_dictionary = "../input_files/data_dictionary"
log_path = "../logs"
output_path = "../output_files"
input_generation_date = "2024-08-21"
stock_index_name = "gdaxi"

time_delta = 240
time_step = 20

## Load data and filter duplicates by symbol and date

In [3]:
# Load data and filter duplicates by symbol and date
df_stock_index = pd.read_csv(
    "{}/df_all_markets_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)),
    low_memory = False
)

df_stock_index.drop_duplicates(
    subset = ["market_ticker", "date", "symbol"],
    keep = "first",
    inplace = True,
    ignore_index = True
)
df_stock_index["date"] = pd.to_datetime(df_stock_index["date"], errors = "coerce")

## Rolling window analysis

In [4]:
def get_market_list(df_stock_index, time_delta, time_step):
    market_args_list = df_stock_index[["date"]].sort_values(by = ["date"]).value_counts(sort = False).reset_index()
    market_args_list["final_date"] = market_args_list["date"] + pd.DateOffset(days = time_delta)
    market_args_list = (
        market_args_list
            .rename(columns = {"date" : "initial_date"})
            .iloc[::time_step]
            .reset_index()
            .drop(columns = ["count", "index"])
    )
    market_args_list["initial_date"] = market_args_list["initial_date"].astype(str)
    market_args_list["final_date"] = market_args_list["final_date"].astype(str)
    market_args_list#.values.tolist()

    return market_args_list

In [5]:
%%time
#time_step = 480

for market_ticker in ["^GDAXI"]:#df_stock_index["market_ticker"].unique():
    # Local parameters per market
    stock_index_name = re.sub("(\^)|(=X)", "", market_ticker.lower())
    market_name = df_stock_index[df_stock_index["market_ticker"] == market_ticker]["market_name"].unique()[0]
    market_country = df_stock_index[df_stock_index["market_ticker"] == market_ticker]["market_country"].unique()[0]
    market_type = df_stock_index[df_stock_index["market_ticker"] == market_ticker]["market_type"].unique()[0]
    print("----------------------------------- {} -----------------------------------\n".format(market_ticker))
    print(
        "- Filename: {}\n- Name: {}\n- Country: {}\n- Type: {}\n".format(
            stock_index_name,
            market_name,
            market_country,
            market_type
        )
    )

    # Loop of dates per market
    market_args_list = get_market_list(
        df_stock_index = df_stock_index[df_stock_index["market_ticker"] == market_ticker],
        time_delta = time_delta,
        time_step = time_step
    )

    # Estimate Market Efficiency
    df_normalized, df_normalized_eigenvalues = [], []
    df_residuals, df_residuals_eigenvalues = [], []
    
    for k in range(market_args_list.shape[0]):
        initial_date = market_args_list["initial_date"].values[k]
        final_date = market_args_list["final_date"].values[k]

        # Normalized Returns
        try:
            df_aux = emf.get_market_efficiency(
                df = df_stock_index[df_stock_index["market_ticker"] == market_ticker],
                column_ = "z_score_log_return",
                min_bins = 10,
                precision = 12,
                log_path = log_path,
                log_filename = "log_rolling_window_{}".format(stock_index_name),
                log_filename_entropy = "log_entropy_rolling_{}".format(stock_index_name),
                verbose = 1,
                tqdm_bar = True,
                market_args_list = (initial_date, final_date),
                bouchaud_filter = False,
                n = int(df_stock_index.shape[0] * 2),
                df_tracy_widom = pd.read_csv("{}/tracy_widom.csv".format(input_path_data_dictionary), low_memory = False),
                alphas = [0.01, 0.05, 0.10],
                k_max = 8,
                df_onatski = pd.read_csv("{}/onatski.csv".format(input_path_data_dictionary), low_memory = False),
                levels = [1, 2, 5, 10]
            )

            df_normalized.append(df_aux[0])
            df_normalized_eigenvalues.append(df_aux[1])

            print("Finished: {} - {}".format(initial_date, final_date))
        except:
            print("No done: {} - {}".format(initial_date, final_date))

        # Normalized residuals
        try:
            df_aux = emf.get_market_efficiency(
                df = df_stock_index[df_stock_index["market_ticker"] == market_ticker],
                column_ = "z_score_zlr_no_market",
                min_bins = 10,
                precision = 12,
                log_path = log_path,
                log_filename = "log_rolling_window_residuals_{}".format(stock_index_name),
                log_filename_entropy = "log_entropy_rolling_residuals_{}".format(stock_index_name),
                verbose = 1,
                tqdm_bar = True,
                market_args_list = (initial_date, final_date),
                bouchaud_filter = False,
                n = int(df_stock_index.shape[0] * 2),
                df_tracy_widom = pd.read_csv("{}/tracy_widom.csv".format(input_path_data_dictionary), low_memory = False),
                alphas = [0.01, 0.05, 0.10],
                k_max = 8,
                df_onatski = pd.read_csv("{}/onatski.csv".format(input_path_data_dictionary), low_memory = False),
                levels = [1, 2, 5, 10]
            )

            df_residuals.append(df_aux[0])
            df_residuals_eigenvalues.append(df_aux[1])

            print("Finished: {} - {}".format(initial_date, final_date))
        except:
            print("No done: {} - {}".format(initial_date, final_date))

    # Merge final dataframe per market
    df_normalized = pd.concat(df_normalized)
    df_normalized_eigenvalues = pd.concat(df_normalized_eigenvalues)

    df_residuals = pd.concat(df_residuals)
    df_residuals_eigenvalues = pd.concat(df_residuals_eigenvalues)

    df_normalized["market_ticker"] = market_ticker
    df_normalized["market_name"] = market_name
    df_normalized["market_country"] = market_country
    df_normalized["market_type"] = market_type

    df_normalized_eigenvalues["market_ticker"] = market_ticker
    df_normalized_eigenvalues["market_name"] = market_name
    df_normalized_eigenvalues["market_country"] = market_country
    df_normalized_eigenvalues["market_type"] = market_type

    df_residuals["market_ticker"] = market_ticker
    df_residuals["market_name"] = market_name
    df_residuals["market_country"] = market_country
    df_residuals["market_type"] = market_type

    df_residuals_eigenvalues["market_ticker"] = market_ticker
    df_residuals_eigenvalues["market_name"] = market_name
    df_residuals_eigenvalues["market_country"] = market_country
    df_residuals_eigenvalues["market_type"] = market_type

    # Relocate variables associated with Market ID
    df_normalized.insert(0, "market_ticker", df_normalized.pop("market_ticker"))
    df_normalized.insert(1, "market_name", df_normalized.pop("market_name"))
    df_normalized.insert(2, "market_country", df_normalized.pop("market_country"))
    df_normalized.insert(3, "market_type", df_normalized.pop("market_type"))

    df_normalized_eigenvalues.insert(0, "market_ticker", df_normalized_eigenvalues.pop("market_ticker"))
    df_normalized_eigenvalues.insert(1, "market_name", df_normalized_eigenvalues.pop("market_name"))
    df_normalized_eigenvalues.insert(2, "market_country", df_normalized_eigenvalues.pop("market_country"))
    df_normalized_eigenvalues.insert(3, "market_type", df_normalized_eigenvalues.pop("market_type"))

    df_residuals.insert(0, "market_ticker", df_residuals.pop("market_ticker"))
    df_residuals.insert(1, "market_name", df_residuals.pop("market_name"))
    df_residuals.insert(2, "market_country", df_residuals.pop("market_country"))
    df_residuals.insert(3, "market_type", df_residuals.pop("market_type"))

    df_residuals_eigenvalues.insert(0, "market_ticker", df_residuals_eigenvalues.pop("market_ticker"))
    df_residuals_eigenvalues.insert(1, "market_name", df_residuals_eigenvalues.pop("market_name"))
    df_residuals_eigenvalues.insert(2, "market_country", df_residuals_eigenvalues.pop("market_country"))
    df_residuals_eigenvalues.insert(3, "market_type", df_residuals_eigenvalues.pop("market_type"))

    # Local Saving of files
    temp = "{}/df_{}".format(output_path, stock_index_name)
    filename = "{}_rolling_window_{}.csv".format(temp, re.sub("-", "", input_generation_date))
    filename_e = "{}_eigenvalues_evolution_{}.csv".format(temp, re.sub("-", "", input_generation_date))
    filename_r = "{}_rolling_window_residuals_{}.csv".format(temp, re.sub("-", "", input_generation_date))
    filename_re = "{}_eigenvalues_evolution_residuals_{}.csv".format(temp, re.sub("-", "", input_generation_date))
    
    df_normalized.to_csv(filename, index = False)
    df_residuals.to_csv(filename_r, index = False)
    df_normalized_eigenvalues.to_csv(filename_e, index = False)
    df_residuals_eigenvalues.to_csv(filename_re, index = False)

    print("----------------------------------- DONE - {} -----------------------------------\n".format(market_ticker))



----------------------------------- ^GDAXI -----------------------------------

- Filename: gdaxi
- Name: DAX Performance Index
- Country: Germany
- Type: Developed



100%|████████████████████| 378/378 [00:02<00:00, 157.16it/s]


Finished: 2000-01-04 - 2000-08-31


100%|████████████████████| 378/378 [00:02<00:00, 139.57it/s]


Finished: 2000-01-04 - 2000-08-31


100%|████████████████████| 496/496 [00:02<00:00, 166.92it/s]


Finished: 2001-11-20 - 2002-07-18


100%|████████████████████| 496/496 [00:03<00:00, 152.74it/s]


Finished: 2001-11-20 - 2002-07-18


100%|████████████████████| 496/496 [00:03<00:00, 157.69it/s]


Finished: 2003-10-14 - 2004-06-10


100%|████████████████████| 496/496 [00:03<00:00, 160.08it/s]


Finished: 2003-10-14 - 2004-06-10


100%|████████████████████| 528/528 [00:04<00:00, 125.31it/s]


Finished: 2005-08-31 - 2006-04-28


100%|█████████████████████| 528/528 [00:05<00:00, 89.38it/s]


Finished: 2005-08-31 - 2006-04-28


100%|████████████████████| 595/595 [00:05<00:00, 110.75it/s]


Finished: 2007-07-19 - 2008-03-15


100%|████████████████████| 595/595 [00:05<00:00, 108.77it/s]


Finished: 2007-07-19 - 2008-03-15


100%|████████████████████| 595/595 [00:05<00:00, 115.56it/s]


Finished: 2009-06-12 - 2010-02-07


100%|████████████████████| 595/595 [00:04<00:00, 126.48it/s]


Finished: 2009-06-12 - 2010-02-07


100%|████████████████████| 630/630 [00:05<00:00, 110.23it/s]


Finished: 2011-04-29 - 2011-12-25


100%|████████████████████| 630/630 [00:05<00:00, 121.85it/s]


Finished: 2011-04-29 - 2011-12-25


100%|████████████████████| 630/630 [00:05<00:00, 123.84it/s]


Finished: 2013-03-18 - 2013-11-13


100%|████████████████████| 630/630 [00:05<00:00, 109.00it/s]


Finished: 2013-03-18 - 2013-11-13


100%|████████████████████| 703/703 [00:07<00:00, 100.00it/s]


Finished: 2015-02-12 - 2015-10-10


100%|█████████████████████| 703/703 [00:07<00:00, 95.91it/s]


Finished: 2015-02-12 - 2015-10-10


100%|████████████████████| 741/741 [00:07<00:00, 104.83it/s]


Finished: 2017-01-03 - 2017-08-31


100%|████████████████████| 741/741 [00:07<00:00, 100.73it/s]


Finished: 2017-01-03 - 2017-08-31


100%|█████████████████████| 820/820 [00:08<00:00, 97.39it/s]


Finished: 2018-11-26 - 2019-07-24


100%|████████████████████| 820/820 [00:07<00:00, 103.66it/s]


Finished: 2018-11-26 - 2019-07-24


100%|█████████████████████| 861/861 [00:09<00:00, 92.57it/s]


Finished: 2020-10-23 - 2021-06-20


100%|████████████████████| 861/861 [00:08<00:00, 100.20it/s]


Finished: 2020-10-23 - 2021-06-20


100%|█████████████████████| 946/946 [00:10<00:00, 93.91it/s]


Finished: 2022-09-12 - 2023-05-10


100%|████████████████████| 946/946 [00:09<00:00, 100.15it/s]


Finished: 2022-09-12 - 2023-05-10


0it [00:00, ?it/s]


No done: 2024-07-29 - 2025-03-26


0it [00:00, ?it/s]

No done: 2024-07-29 - 2025-03-26
----------------------------------- DONE - ^GDAXI -----------------------------------

CPU times: total: 40.3 s
Wall time: 3min 7s



