# Estimate optimal window

In [1]:
import os
import re
import sys
import json
import warnings
import subprocess
import numpy as np
import pandas as pd
import yfinance as yf
import datetime as dt

from tqdm import tqdm
from scipy.stats import norm
from functools import partial
from scipy.optimize import curve_fit
from multiprocessing import Pool, cpu_count

sys.path.append('../modules')
import estimate_optimal_window as eow

warnings.filterwarnings("ignore")
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)

## Global variables

In [2]:
input_path_raw = "../input_files/raw_data"
input_path_processed = "../input_files/processed_data"
input_path_data_dictionary = "../input_files/data_dictionary"
log_path = "../logs"
output_path = "../output_files"
input_generation_date = "2023-04-11"

## Load time series

In [3]:
df_stock_indexes = pd.read_csv("{}/df_stock_index_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)), low_memory = False)
df_currencies = pd.read_csv("{}/df_currency_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)), low_memory = False)

## Mean and variance evolution

We have these theoretical equations: 
$$\mu(t)=r_{x}+c_{1}t+b\ln{(1+t)}$$
$$\Upsilon(t)=c_{2}t-c_{1}b t\ln{(1+t)}-r_{x}b\ln{(1+t)}-b^{2}\ln^{2}{(1+t)}$$

In [4]:
df_stock_indexes_parameters = eow.estimate_mean_variance_parameters(
    df_fts = df_stock_indexes,
    drift_params_degree = 0,
    minimal_points = 20,
    p_norm = 1,
    log_path = log_path,
    log_filename = "log_stock_index_optimal_window_{}".format(re.sub("-", "", input_generation_date)),
    verbose = 1,
    tqdm_bar = True
)
df_stock_indexes_parameters

100%|███████████████████| 6457/6457 [07:09<00:00, 15.04it/s]


Unnamed: 0,symbol,window_size,drift_degree,step,time_series,p_norm,cumulant_1_mean,error_cumulant_1_mean,tfs_param_mean,error_tfs_param_mean,drift_coefficient_0_mean,error_drift_coefficient_0_mean,average_error_mean,rsquared_mean,cumulant_1_variance,error_cumulant_1_variance,cumulant_2_variance,error_cumulant_2_variance,tfs_param_variance,error_tfs_param_variance,drift_coefficient_1_variance,error_drift_coefficient_1_variance,average_error_variance,rsquared_variance
3,^GSPC,1,0,23930,log-return,1,2.245223e-08,4.014338e-10,-0.000065,0.000003,0.000439,0.000021,0.000103,0.193479,0.022549,385.845139,-3.384678e-07,2.391239e-09,-0.000001,0.023302,61.250405,1.048073e+06,0.000028,0.758148
4,^GSPC,1,0,23930,absolute log-return,1,-2.925093e-07,3.129147e-09,0.000067,0.000022,0.012331,0.000166,0.001266,0.583784,0.006516,36.206166,-1.605242e-07,1.410092e-09,-0.000002,0.012322,19.604996,1.089355e+05,0.000013,0.759949
5,^GSPC,1,0,23930,log-return volatility,1,-3.874940e-05,3.932296e-07,0.064746,0.002721,3.180370,0.020816,0.153650,0.513841,0.694326,2572.866596,-3.430981e-03,3.242344e-05,-0.000445,1.649886,2163.627764,8.017414e+06,0.284835,0.769874
6,^GSPC,2,0,23930,log-return,1,2.236592e-08,5.689221e-10,-0.000064,0.000004,0.000435,0.000030,0.000103,0.190993,0.020979,166.355539,-3.376793e-07,3.085307e-09,-0.000001,0.011575,57.050515,4.523941e+05,0.000028,0.757156
7,^GSPC,2,0,23930,absolute log-return,1,-2.945122e-07,4.414484e-09,0.000086,0.000030,0.012180,0.000233,0.001269,0.582614,0.006521,48.650404,-1.600671e-07,1.950906e-09,-0.000002,0.016483,19.650173,1.466089e+05,0.000013,0.758788
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19366,^MERV,322,0,6440,absolute log-return,1,-8.901244e-07,1.394873e-07,0.002335,0.000147,0.000263,0.000877,0.000690,0.942449,0.080946,34302.362654,-4.527429e-06,2.927241e-06,-0.000006,2.652515,30.414500,1.288868e+07,0.000300,0.291790
19367,^MERV,322,0,6440,log-return volatility,1,-8.133468e-06,9.297464e-07,0.020728,0.000980,0.004667,0.005846,0.004257,0.966363,0.080107,,-5.968873e-06,1.997064e-06,-0.000008,,107.630113,,0.000300,0.807616
19368,^MERV,323,0,6460,log-return,1,1.791667e-07,2.771313e-08,-0.000027,0.000029,0.000018,0.000175,0.000110,0.787222,0.153486,64243.383056,-4.663276e-06,2.590124e-06,-0.000003,1.416910,69.083449,2.891565e+07,0.000311,0.305392
19369,^MERV,323,0,6460,absolute log-return,1,-8.895594e-07,1.396125e-07,0.002335,0.000148,0.000269,0.000881,0.000699,0.942017,0.092163,109048.857496,-4.506730e-06,3.471767e-06,-0.000005,6.471316,34.807320,4.118438e+07,0.000300,0.291357


In [5]:
df_currencies_parameters = eow.estimate_mean_variance_parameters(
    df_fts = df_currencies,
    drift_params_degree = 0,
    minimal_points = 20,
    p_norm = 1,
    log_path = log_path,
    log_filename = "log_currency_optimal_window_{}".format(re.sub("-", "", input_generation_date)),
    verbose = 1,
    tqdm_bar = True
)
df_currencies_parameters

100%|███████████████████| 5553/5553 [07:17<00:00, 12.70it/s]


Unnamed: 0,symbol,window_size,drift_degree,step,time_series,p_norm,cumulant_1_mean,error_cumulant_1_mean,tfs_param_mean,error_tfs_param_mean,drift_coefficient_0_mean,error_drift_coefficient_0_mean,average_error_mean,rsquared_mean,cumulant_1_variance,error_cumulant_1_variance,cumulant_2_variance,error_cumulant_2_variance,tfs_param_variance,error_tfs_param_variance,drift_coefficient_1_variance,error_drift_coefficient_1_variance,average_error_variance,rsquared_variance
18,KWDUSD=X,8,0,5008,log-return,1,-3.677057e-08,1.614940e-09,0.000041,0.000003,-0.000198,0.000016,0.000017,0.532390,0.000016,5.978251e-07,1.853049e-07,1.510754e-08,1.229898e-03,0.000113,-0.005113,6.080665e-04,0.000004,0.554080
19,KWDUSD=X,8,0,5008,absolute log-return,1,-4.408026e-07,2.066446e-08,0.000487,0.000033,-0.000194,0.000209,0.000234,0.499745,0.000015,4.943234e-07,1.483369e-07,1.059856e-08,1.082724e-03,0.000090,-0.003818,5.394374e-04,0.000003,0.608862
20,KWDUSD=X,8,0,5008,log-return volatility,1,-9.525802e-04,2.751674e-05,1.027829,0.044328,5.897836,0.278904,0.329156,0.728830,0.018568,4.687064e-04,3.027160e-01,2.042709e-02,1.757200e+00,0.106825,-8.028548,5.333336e-01,5.099014,0.574865
24,KWDUSD=X,10,0,5010,log-return,1,-2.236479e-08,1.988466e-09,0.000010,0.000003,0.000004,0.000020,0.000017,0.425829,0.000016,6.742748e-07,1.801397e-07,1.635886e-08,1.206558e-03,0.000122,-0.005071,6.506449e-04,0.000004,0.551134
25,KWDUSD=X,10,0,5010,absolute log-return,1,-4.404344e-07,2.283106e-08,0.000474,0.000036,-0.000089,0.000227,0.000237,0.511138,0.000015,5.331646e-07,1.464758e-07,1.137506e-08,1.077432e-03,0.000095,-0.003827,5.661971e-04,0.000003,0.611100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16654,EURCOP=X,230,0,4600,absolute log-return,1,-4.389819e-07,1.024633e-07,0.001085,0.000080,-0.000137,0.000452,0.000309,0.933940,0.006276,5.823044e+03,7.147155e-08,1.919574e-07,1.426799e-06,1.323786,-5.467700,5.072924e+06,0.000004,0.859286
16655,EURCOP=X,230,0,4600,log-return volatility,1,-5.972767e-04,6.138464e-05,1.118638,0.048013,0.200176,0.270734,0.178057,0.973950,1.074801,1.434540e+05,1.308075e-02,2.999979e-02,1.478544e-03,197.464370,-919.249764,1.226923e+08,0.715801,0.882019
16656,EURCOP=X,231,0,4620,log-return,1,5.377486e-08,1.363668e-08,-0.000024,0.000011,0.000042,0.000061,0.000042,0.472570,0.004432,7.064669e+02,1.659854e-07,3.272351e-07,4.490840e-06,0.715800,-2.669836,4.255412e+05,0.000008,0.850674
16657,EURCOP=X,231,0,4620,absolute log-return,1,-4.425273e-07,9.824694e-08,0.001085,0.000077,-0.000128,0.000436,0.000298,0.934961,0.009362,,6.120289e-08,,8.338493e-07,,-9.780151,,0.000004,0.869152


## Optimal window resume

In [6]:
df_optimal_stock_indexes = eow.estimate_optimal_window(df_fts_parameters = df_stock_indexes_parameters)
df_optimal_stock_indexes

Unnamed: 0,symbol,window_size_mean,drift_degree,step_mean,time_series,p_norm,cumulant_1_mean,error_cumulant_1_mean,tfs_param_mean,error_tfs_param_mean,drift_coefficient_0_mean,error_drift_coefficient_0_mean,average_error_mean,rsquared_mean,window_size_variance,step_variance,cumulant_1_variance,error_cumulant_1_variance,cumulant_2_variance,error_cumulant_2_variance,tfs_param_variance,error_tfs_param_variance,drift_coefficient_1_variance,error_drift_coefficient_1_variance,average_error_variance,rsquared_variance
0,IMOEX.ME,123,0,2460,absolute log-return,1,-1.717904e-06,2.684662e-07,0.001517282,0.00012,0.000325,0.000612,0.000497,0.907432,111,2442,0.02714175,11201.11,-8.115617e-07,9.742583e-07,-3.712614e-06,1.532152,6.946133,2866574.0,2.1e-05,0.577444
1,IMOEX.ME,8,0,2480,log-return,1,-3.544343e-07,4.261258e-08,0.0005581424,3e-05,-0.003191,0.00016,0.000121,0.650278,123,2460,0.05282074,27142.21,-1.102459e-06,6.895034e-07,-2.502339e-06,1.285838,19.059649,9793880.0,2.4e-05,0.644114
2,IMOEX.ME,119,0,2380,log-return volatility,1,-0.0002892756,3.907213e-05,0.2630394,0.01697,0.079809,0.086006,0.064805,0.938111,123,2460,-0.5097047,34748.1,-0.002109315,0.002676373,0.0004899923,33.436524,-236.948024,16152910.0,0.03848,0.819728
3,^BVSP,1,0,7411,absolute log-return,1,-1.011179e-06,1.881608e-08,-0.002070356,4.1e-05,0.039779,0.000264,0.000937,0.850639,230,7360,-1.317591e-05,6.021091e-06,-5.281883e-07,1.737408e-07,0.004064467,0.000565,-0.053807,0.001781117,9e-06,0.99
4,^BVSP,1,0,7411,log-return,1,5.47185e-07,1.650222e-08,-0.004462956,3.6e-05,0.036736,0.000232,0.000523,0.841297,312,7176,-5.585791e-06,9.108442e-06,-4.032812e-07,7.175861e-07,0.008477506,0.00113,-0.0802,0.00132954,2.4e-05,0.98402
5,^BVSP,365,0,7300,log-return volatility,1,-0.0002399905,3.462545e-05,0.4336593,0.040838,0.200997,0.248198,0.1732,0.862723,312,7176,0.0002120042,0.0002391716,0.001248785,0.001245408,0.4866365,0.034176,-4.098489,0.06947303,0.042369,0.991708
6,^DJI,378,0,7560,absolute log-return,1,6.606349e-08,6.244535e-08,0.0008368163,7.6e-05,-0.000183,0.000464,0.000354,0.936601,385,7700,1.143715e-05,1.189843e-06,2.50898e-07,8.554131e-08,0.002162751,0.000578,-0.010988,0.00319737,4e-06,0.951458
7,^DJI,342,0,7866,log-return,1,-4.834087e-08,1.018046e-08,7.067917e-05,1.3e-05,-1.7e-05,8.3e-05,6.5e-05,0.588923,385,7700,1.624704e-05,1.406977e-06,5.000889e-07,1.440715e-07,0.003056728,0.000689,-0.015921,0.003723005,7e-06,0.954437
8,^DJI,393,0,7860,log-return volatility,1,-0.0001506933,2.084005e-05,0.6999839,0.026263,0.120298,0.161371,0.099873,0.981404,385,7700,0.002984243,0.0003066388,0.0202247,0.005327632,0.6691451,0.116407,-3.903542,0.5570037,0.239825,0.964292
9,^FCHI,418,0,8360,absolute log-return,1,-2.648135e-07,5.815541e-08,0.001302701,7.7e-05,0.000201,0.00048,0.000329,0.954456,392,8232,1.253519e-05,8.486109e-07,5.788856e-07,6.620714e-08,0.004436325,0.000228,-0.026332,0.00109465,3e-06,0.968224


In [7]:
df_optimal_currencies = eow.estimate_optimal_window(df_fts_parameters = df_currencies_parameters)
df_optimal_currencies

Unnamed: 0,symbol,window_size_mean,drift_degree,step_mean,time_series,p_norm,cumulant_1_mean,error_cumulant_1_mean,tfs_param_mean,error_tfs_param_mean,drift_coefficient_0_mean,error_drift_coefficient_0_mean,average_error_mean,rsquared_mean,window_size_variance,step_variance,cumulant_1_variance,error_cumulant_1_variance,cumulant_2_variance,error_cumulant_2_variance,tfs_param_variance,error_tfs_param_variance,drift_coefficient_1_variance,error_drift_coefficient_1_variance,average_error_variance,rsquared_variance
0,AUD=X,158,0,4266,absolute log-return,1,-2.928644e-06,3.415233e-07,0.001794,0.000272,0.002008,0.001548,0.001173,0.747264,210,4200,0.000271,1.461066e-05,2.254713e-04,1.591824e-05,0.082857,0.001741,-0.460110,6.638243e-03,0.000375,0.994320
1,AUD=X,197,0,4334,log-return,1,9.807118e-08,1.351824e-08,-0.000039,0.000010,-0.000016,0.000058,0.000045,0.738510,210,4200,0.000273,1.484622e-05,2.285568e-04,1.628531e-05,0.083421,0.001769,-0.463297,6.744251e-03,0.000383,0.994218
2,AUD=X,152,0,4256,log-return volatility,1,-2.136440e-05,1.556217e-06,0.029283,0.001249,0.008047,0.007119,0.005991,0.956404,210,4200,0.000523,3.206074e-05,8.796900e-04,6.934882e-05,0.166954,0.003763,-0.945141,1.395763e-02,0.001521,0.994376
3,BHDUSD=X,201,0,5025,absolute log-return,1,-1.966854e-06,1.271394e-07,0.004505,0.000234,-0.023263,0.001485,0.000214,0.959455,208,4992,0.036792,6.526197e+04,-1.575880e-06,4.908769e-06,-0.000005,9.546168,6.782822,1.203140e+07,0.000133,0.687609
4,BHDUSD=X,203,0,5075,log-return,1,1.888013e-09,2.579377e-09,-0.000014,0.000005,0.000109,0.000029,0.000004,0.633526,208,4992,0.033045,2.820848e+04,-1.273712e-06,2.223209e-06,-0.000005,4.242639,6.186942,5.281302e+06,0.000135,0.687094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,THB=X,156,0,4992,log-return,1,3.139528e-08,1.098149e-08,-0.000030,0.000011,0.000065,0.000062,0.000036,0.238445,225,4950,0.000024,8.292542e-07,6.864715e-07,8.100900e-08,0.003050,0.000274,-0.013293,1.435428e-03,0.000002,0.975179
80,THB=X,246,0,4920,log-return volatility,1,-4.917524e-04,7.059995e-05,1.551570,0.057504,0.141483,0.324839,0.227477,0.983879,225,4950,0.016206,6.802096e-04,3.497995e-01,4.372070e-02,2.253753,0.199900,-10.451013,9.932411e-01,1.243699,0.975322
81,ZAR=X,251,0,5020,absolute log-return,1,-9.376079e-07,1.656441e-07,0.001345,0.000140,0.000613,0.000802,0.000497,0.840824,251,5020,0.000015,3.582111e-06,4.925784e-07,2.091589e-07,0.003340,0.000657,-0.019655,2.533576e-03,0.000006,0.795208
82,ZAR=X,234,0,4914,log-return,1,2.796507e-08,1.051241e-08,0.000015,0.000009,-0.000029,0.000051,0.000038,0.635286,251,5020,0.000020,4.368003e-06,1.030416e-06,3.565101e-07,0.005208,0.000718,-0.031235,2.690091e-03,0.000010,0.865724


## Save optimal window size for no reprocessing

In [8]:
df_stock_indexes_parameters.to_csv("{}/df_stock_index_parameters_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)) , index = False)
df_currencies_parameters.to_csv("{}/df_currency_parameters_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)) , index = False)

In [9]:
df_optimal_stock_indexes.to_csv("{}/df_optimal_window_stock_index_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)) , index = False)
df_optimal_currencies.to_csv("{}/df_optimal_window_currency_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)) , index = False)