In [112]:
import pandas as pd
import pyarrow.parquet as pq
import numpy as np
import matplotlib.pyplot as plt
import wget
import dask
import os
from tqdm import tqdm
import glob
import os

from numpy import linalg as LA
import math
import sklearn.preprocessing
import bahc
import pyRMT

%matplotlib inline

## Download k-line spot data by every minute from binance

In [2]:
url_template = r"https://data.binance.vision/data/spot/daily/klines/{0}/1h/{0}-1h-{1}.zip"
dir_template = r"data/spot/daily/klines/{0}/1h/{0}-1h-{1}.zip"


# @dask.delayed
def download_one_date(url, path):
    try:
        tmp = wget.download(url, out=path)
        return True
    except:
        # print("{} download failed".format(url))
        return False

def download_all_dates(token_pair: str, dates, path):
    first_meet = False
    for each in dates:
        res = download_one_date(url_template.format(token_pair, str(each.date())), path)
        if first_meet == False and res == True:
            first_meet = True
        if first_meet == True and res == False:
            break
    # promises = [download_one_date(url_template.format(token_pair, str(each.date())), path) for each in dates]
    # alldata=dask.compute(promises) 
    
def get_asset_pairs(x):
    with open("asset_pairs.txt", "r") as f:
        names = f.read()
    names = names.replace("\t", "")
    names = names.replace("\n", "")
    names = names.split("/")
    x_names = list(filter(lambda each: each.endswith(x), names))
    print("x: {} results length: {}".format(x, len(x_names)))
    return x_names

@dask.delayed
def main_download(pair, dates):
    path = "data/spot/daily/klines/{0}/1h".format(pair)
    if not os.path.exists(path):
        os.makedirs(path)
    download_all_dates(pair, dates, path)
    

In [4]:
USDT_pairs = get_asset_pairs("USDT")
BUSD_pairs = get_asset_pairs("BUSD")

x: USDT results length: 394
x: BUSD results length: 350


In [5]:
USDT_pairs = ["BTCUSDT"]
dates = pd.date_range(start="2021-03-01",end="2023-01-15")
promises = [main_download(each, dates) for each in USDT_pairs]
dask.compute(promises)
# for i in tqdm(range(len(USDT_pairs))):
#     main_download(USDT_pairs[i], dates)

([None],)

In [5]:
dates = pd.date_range(start="2021-03-01",end="2023-01-15")
print(len(list(dates)))

686


## Data loading & pre-processing

In [12]:
@dask.delayed
def process_raw(pair, path):
    names = [
        "Open time",
        "Open",
        "High",
        "Low",
        "Close",
        "Volume",
        "Close time",
        "Quote asset volume",
        "Number of trades",
        "Taker buy base asset volume",
        "Taker buy quote asset volume",
        "Ignore",
    ]
    asset_data = pd.read_csv(path, names=names, header=None)
    # btcdata = pd.read_csv(dir_template.format(pair, date), names=names, header=None)
    asset_data["time"] = pd.to_datetime(asset_data["Open time"], unit='ms')
    asset_data[pair] = asset_data["Close"]
    date_indexed = asset_data.set_index("time")
    date_indexed.drop([
        "Open time",
        "Open",
        "High",
        "Low",
        "Close",
        "Volume",
        "Close time",
        "Quote asset volume",
        "Number of trades",
        "Taker buy base asset volume",
        "Taker buy quote asset volume",
        "Ignore",
        ], axis=1, inplace=True)
    # date_indexed.drop('Close time', axis=1, inplace=True)
    
    # date_indexed.drop('time', axis=1, inplace=True)

    # date_indexed["s"] = (date_indexed["isBuyerMaker"].astype(int)-0.5)*(-2)
    # date_indexed["mid"] = date_indexed["price"]
    return date_indexed


def load_one_pair(pair):
    files = glob.glob("data/spot/daily/klines/{}/1h/*".format(pair))
    files = [each for each in files if "(" not in each]
    if len(files) == 0:
        print(f"{pair} is empty, no files found")
        return False, None
    tasks = [process_raw(pair, each) for each in files]
    p_data_arr = dask.compute(tasks)
    result = pd.concat(p_data_arr[0])
    return True, result

def merge_assets(pd_arr, col: str):
    assets_close_matrix = pd_arr[0]
    for each in pd_arr[1:]:
        assets_close_matrix = assets_close_matrix.merge(each, how="outer", on=col)
        # assets_close_matrix = assets_close_matrix.join(each)
        # print(assets_close_matrix.shape)
        if assets_close_matrix.shape[0] > 16464:
            print(f"{each.columns} wrong rows: {assets_close_matrix.shape[0]}")
            assert True==False
    return assets_close_matrix

In [13]:
def main_load_and_merge_all_assets():
    existing_pairs = os.listdir("data/spot/daily/klines")
    print("Number of pairs: ", len(existing_pairs))
    chunk_sz = 10
    subsets = [existing_pairs[i:i + chunk_sz] for i in range(0, len(existing_pairs), chunk_sz)]
    # print(sum([len(each) for each in subsets]))
    for i in tqdm(range(len(subsets))):
        chunk = subsets[i]
        tmp_assets_arr = list()
        for each in chunk:
            success, tmp_asset = load_one_pair(each)
            if success:
                if tmp_asset.shape[0] > 16464:
                    print(f"{each} wrong rows: {tmp_asset.shape[0]}")
                tmp_assets_arr.append(tmp_asset)
        # tmp_assets_arr = [load_one_pair(each) for each in chunk]
        tmp_merge_result = merge_assets(tmp_assets_arr, "time")
        tmp_merge_result.to_pickle(f"data/clean/mk_{i}.pkl")
        
    
def inspect_assets_shape():
    existing_pairs = os.listdir("data/spot/daily/klines")
    print("Number of pairs: ", len(existing_pairs))
    # assets_pd_arr = [load_one_pair(each) for each in existing_pairs]
    for pair in existing_pairs:
        success, tmp_asset = load_one_pair(pair)
        assert tmp_asset.shape[0] <= 16464, f"{pair} wrong rows: {tmp_asset.shape[0]}"

res = main_load_and_merge_all_assets()
# inspect_assets_shape()

  0%|          | 0/40 [00:00<?, ?it/s]

Number of pairs:  394


 38%|███▊      | 15/40 [17:17<38:00, 91.22s/it]

GALUSDT is empty, no files found


100%|██████████| 40/40 [43:10<00:00, 64.77s/it]


In [8]:
def merge_all_chunks(arr_id):
    df_arr = [pd.read_pickle(f"data/clean/mk_{i}.pkl") for i in arr_id]
    # for each in df_arr:
    #     print(each.shape)
    res = merge_assets(df_arr, "time")
    return res

In [14]:
res = merge_all_chunks([i for i in range(40)])
res.to_pickle("data/clean/whole_usdt_merge.pkl")
print(res.shape)

## MVP functions

In [75]:
def eigenvalue_clipping(lambdas,v,lambda_plus):
    N=len(lambdas)
    
    # _s stands for _structure below
    sum_lambdas_gt_lambda_plus=np.sum(lambdas[lambdas>lambda_plus])
    
    sel_bulk=lambdas<=lambda_plus                     # these eigenvalues come from the seemingly random bulk
    N_bulk=np.sum(sel_bulk)
    sum_lambda_bulk=np.sum(lambdas[sel_bulk])        
    delta=sum_lambda_bulk/N_bulk                      # delta is their average, so as to conserver the trace of C
    
    lambdas_clean=lambdas
    lambdas_clean[lambdas_clean<=lambda_plus]=delta
    
    
    C_clean=np.zeros((N, N))
    v_m=np.matrix(v)
    
    for i in range(N-1):
        C_clean=C_clean+lambdas_clean[i] * np.dot(v_m[i,].T,v_m[i,]) 
        
    np.fill_diagonal(C_clean,1)
            
    return C_clean    


def solution_eig(C_asset):
    C_corr = C_asset.corr()
    l_e, V_e = LA.eig(C_corr)
    T, N = C_asset.shape
    q = N/T
    lambda_plus = (1+np.sqrt(q))**2
    
    C_clipped=eigenvalue_clipping(l_e,V_e,lambda_plus)
    return C_clipped

def weights_GVM(Sigma):
    Sigma_inv=LA.inv(Sigma)
    w_GVM=Sigma_inv.sum(axis=1)/Sigma_inv.sum()
    return w_GVM
    

## Data preprocessing

In [26]:
all_data = pd.read_pickle("data/clean/whole_usdt_merge.pkl")
all_data = all_data.reset_index()
all_data = all_data.drop(["time"], axis=1)

In [40]:
all_data

Unnamed: 0,1INCHDOWNUSDT,1INCHUPUSDT,1INCHUSDT,AAVEDOWNUSDT,AAVEUPUSDT,AAVEUSDT,ACAUSDT,ACHUSDT,ACMUSDT,ADADOWNUSDT,...,XVSUSDT,YFIDOWNUSDT,YFIIUSDT,YFIUPUSDT,YFIUSDT,YGGUSDT,ZECUSDT,ZENUSDT,ZILUSDT,ZRXUSDT
0,10.07,9.87,5.8799,1.327380,110.463,438.825,,,12.062,2.250567,...,85.699,0.001495,3044.99,8.890,50826.78,,246.49,96.655,0.20656,2.1644
1,10.21,9.73,5.8589,1.367000,108.459,434.066,,,11.980,2.230800,...,84.578,0.001558,2989.17,8.530,50035.84,,247.08,97.583,0.20509,2.1828
2,9.92,10.00,5.9137,1.320024,111.310,439.611,,,12.067,2.127842,...,83.166,0.001554,3000.41,8.550,50046.70,,248.45,97.487,0.20619,2.1800
3,9.97,9.93,5.9084,1.301529,112.411,443.585,,,12.025,2.241671,...,81.401,0.001595,2954.89,8.301,49476.40,,244.61,95.743,0.20330,2.1540
4,9.10,10.63,6.1123,1.232953,117.000,452.599,,,11.988,2.224815,...,82.750,0.001548,3000.85,8.642,50269.64,,246.76,97.111,0.20582,2.1780
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16447,,,,,,80.100,0.1340,0.00926,2.947,0.003657,...,5.030,,1355.00,,7092.00,0.2532,45.30,10.480,0.02575,0.2025
16448,,,,,,79.900,0.1324,0.00926,2.994,0.003627,...,5.100,,1357.30,,7106.00,0.2592,45.40,10.470,0.02584,0.2033
16449,,,,,,79.700,0.1315,0.00920,3.115,0.003646,...,5.050,,1357.10,,7068.00,0.2581,45.30,10.480,0.02579,0.2037
16450,,,,,,79.900,0.1312,0.00914,3.090,0.003607,...,5.050,,1352.20,,7086.00,0.2573,45.20,10.530,0.02595,0.2037


In [38]:
log_ret_all_data = np.log(all_data).diff()

In [39]:
log_ret_all_data

Unnamed: 0,1INCHDOWNUSDT,1INCHUPUSDT,1INCHUSDT,AAVEDOWNUSDT,AAVEUPUSDT,AAVEUSDT,ACAUSDT,ACHUSDT,ACMUSDT,ADADOWNUSDT,...,XVSUSDT,YFIDOWNUSDT,YFIIUSDT,YFIUPUSDT,YFIUSDT,YGGUSDT,ZECUSDT,ZENUSDT,ZILUSDT,ZRXUSDT
0,,,,,,,,,,,...,,,,,,,,,,
1,0.013807,-0.014286,-0.003578,0.029411,-0.018308,-0.010904,,,-0.006821,-0.008822,...,-0.013167,0.041277,-0.018502,-0.041338,-0.015684,,0.002391,0.009555,-0.007142,0.008465
2,-0.028815,0.027371,0.009310,-0.034969,0.025947,0.012694,,,0.007236,-0.047252,...,-0.016836,-0.002571,0.003753,0.002342,0.000217,,0.005529,-0.000984,0.005349,-0.001284
3,0.005028,-0.007025,-0.000897,-0.014110,0.009843,0.008999,,,-0.003487,0.052113,...,-0.021451,0.026041,-0.015288,-0.029555,-0.011461,,-0.015577,-0.018052,-0.014115,-0.011998
4,-0.091306,0.068120,0.033928,-0.054128,0.040012,0.020117,,,-0.003082,-0.007548,...,0.016436,-0.029910,0.015434,0.040258,0.015906,,0.008751,0.014187,0.012319,0.011080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16447,,,,,,-0.014870,0.016554,-0.002157,0.041218,0.004934,...,0.001990,,0.001182,,0.008070,-0.010998,0.002210,-0.011385,-0.002715,-0.002466
16448,,,,,,-0.002500,-0.012012,0.000000,0.015823,-0.008237,...,0.013821,,0.001696,,0.001972,0.023420,0.002205,-0.000955,0.003489,0.003943
16449,,,,,,-0.002506,-0.006821,-0.006501,0.039619,0.005225,...,-0.009852,,-0.000147,,-0.005362,-0.004253,-0.002205,0.000955,-0.001937,0.001966
16450,,,,,,0.002506,-0.002284,-0.006543,-0.008058,-0.010754,...,0.000000,,-0.003617,,0.002543,-0.003104,-0.002210,0.004760,0.006185,0.000000


In [52]:
t0 = 8000
t1 = 16000
X_raw = log_ret_all_data.iloc[t0:t1].dropna(axis=1)
X_raw.shape

(8000, 253)

In [91]:
X_raw.corr()

Unnamed: 0,AAVEUSDT,ACAUSDT,ACHUSDT,ACMUSDT,ADADOWNUSDT,ADAUPUSDT,ADAUSDT,ADXUSDT,AIONUSDT,AKROUSDT,...,XTZUSDT,XVGUSDT,XVSUSDT,YFIIUSDT,YFIUSDT,YGGUSDT,ZECUSDT,ZENUSDT,ZILUSDT,ZRXUSDT
AAVEUSDT,1.000000,0.465521,0.363003,0.327156,-0.708693,0.729831,0.736551,0.435246,0.456637,0.324599,...,0.726704,0.512552,0.486918,0.338476,0.671246,0.556852,0.671601,0.739018,0.611431,0.660354
ACAUSDT,0.465521,1.000000,0.254878,0.231939,-0.477591,0.503788,0.498507,0.295856,0.324452,0.256360,...,0.475516,0.376905,0.350151,0.227443,0.416938,0.386248,0.426991,0.491965,0.400706,0.440867
ACHUSDT,0.363003,0.254878,1.000000,0.182490,-0.352001,0.366492,0.366688,0.250333,0.254680,0.188118,...,0.366507,0.288903,0.269732,0.165734,0.334636,0.292232,0.343007,0.363811,0.309452,0.345196
ACMUSDT,0.327156,0.231939,0.182490,1.000000,-0.294462,0.334394,0.325068,0.216291,0.225981,0.170151,...,0.316936,0.242755,0.231858,0.171604,0.298179,0.276605,0.292125,0.333239,0.272907,0.297963
ADADOWNUSDT,-0.708693,-0.477591,-0.352001,-0.294462,1.000000,-0.946106,-0.970946,-0.429676,-0.448752,-0.313265,...,-0.716178,-0.513560,-0.477608,-0.328551,-0.641666,-0.547134,-0.651060,-0.710417,-0.596555,-0.636893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YGGUSDT,0.556852,0.386248,0.292232,0.276605,-0.547134,0.572281,0.572376,0.339285,0.371548,0.268202,...,0.556322,0.409893,0.407018,0.245597,0.506064,1.000000,0.509585,0.569424,0.480456,0.517748
ZECUSDT,0.671601,0.426991,0.343007,0.292125,-0.651060,0.669974,0.675180,0.396619,0.413297,0.307359,...,0.686419,0.492024,0.442127,0.312196,0.621167,0.509585,1.000000,0.750314,0.578014,0.624160
ZENUSDT,0.739018,0.491965,0.363811,0.333239,-0.710417,0.743331,0.746207,0.432758,0.472546,0.354821,...,0.748615,0.546561,0.497591,0.353123,0.672551,0.569424,0.750314,1.000000,0.642299,0.696396
ZILUSDT,0.611431,0.400706,0.309452,0.272907,-0.596555,0.621830,0.625048,0.369601,0.391592,0.292011,...,0.628758,0.437435,0.410750,0.278101,0.553083,0.480456,0.578014,0.642299,1.000000,0.582886


In [96]:
all_data_length = X_raw.shape[0]
T = 500
N = 253

step_range = 4500
step_size = 5

In [115]:
no_arr = {"in": [], "out": []}
eig_arr = {"in": [], "out": []}
bahc_arr = {"in": [], "out": []}
nls_arr = {"in": [], "out": []}


for i in tqdm(range(0, step_range, step_size)):
    # |train|    test     |
    # |--T--|------T------|
    C_asset_in = X_raw.iloc[i:i+T]
    C_asset_out = X_raw.iloc[i+T:i+6*T]
    
    # covariance matrix
    C_in_cov = C_asset_in.cov().values
    C_out_cov = C_asset_out.cov().values
    
    # 1. no correlation cleaning
    C_corr = C_asset_in.corr()
    # print(type(C_corr))
    w_no = weights_GVM(C_corr)
    
    # print(type(w_no))
    
    # 2. eigenvalue clipping
    C_clipped_in = solution_eig(C_asset_in)
    w_eig = weights_GVM(C_clipped_in)
    
    # print(type(w_eig))
    
    # 3. cleaning with bahc
    X_centered = sklearn.preprocessing.StandardScaler(with_mean=True,
                           with_std=False).fit_transform(C_asset_in.values)  # column-wise!'
    Sigma_BAHC=bahc.filterCovariance(X_centered.T)
    w_GBM_BAHC=weights_GVM(Sigma_BAHC)
    
    # 4. cleaning with nls
    Sigma_NLS=pyRMT.optimalShrinkage(X_centered)
    w_GBM_NLS=weights_GVM(Sigma_NLS)
    
    # print(type(w_GBM_BAHC))
    # print(type(w_GBM_BAHC), type(Sigma_BAHC), type(W_eig))
    
    # in sample risk
    sigma_in_no = w_no.T@(C_in_cov@w_no)
    sigma_in_eig= (w_eig.T@(C_in_cov@w_eig)).item(0)
    sigma_in_BAHC= w_GBM_BAHC@(C_in_cov@w_GBM_BAHC)
    sigma_in_NLS= w_GBM_NLS@(C_in_cov@w_GBM_NLS)
    
    print(type(sigma_in_eig))
    
    # out sample risk
    sigma_out_no= w_no.T@(C_out_cov@w_no)
    sigma_out_eig= (w_eig.T@(C_out_cov@w_eig)).item(0)
    sigma_out_BAHC= w_GBM_BAHC@(C_out_cov@w_GBM_BAHC)
    sigma_out_NLS= w_GBM_NLS@(C_out_cov@w_GBM_NLS)
    
    
    no_arr["in"].append(sigma_in_no)
    eig_arr["in"].append(sigma_in_eig)
    bahc_arr["in"].append(sigma_in_BAHC)
    nls_arr["in"].append(sigma_in_NLS)
    
    no_arr["out"].append(sigma_out_no)
    eig_arr["out"].append(sigma_out_eig)
    bahc_arr["out"].append(sigma_out_BAHC)
    nls_arr["out"].append(sigma_out_NLS)
    
#     print(f"""i: {i}, no: {sigma_in_no}, {sigma_out_no} | 
#           eig: {sigma_in_eig}, {sigma_out_eig} | bahc: {sigma_in_BAHC}, {sigma_out_BAHC}
#           nls: {sigma_in_NLS}, {sigma_out_NLS}""")
    print(f"""i: {i}, no: {sigma_out_no/sigma_in_no-1} | 
          eig: {sigma_out_eig/sigma_in_eig-1} | bahc: {sigma_out_BAHC/sigma_in_BAHC-1}
          nls: {sigma_out_NLS/sigma_in_NLS-1}""")
    
    # break

  0%|          | 1/900 [00:06<1:37:08,  6.48s/it]

<class 'float'>
i: 0, no: 0.6382268419169692 | 
          eig: 0.8327981754906262 | bahc: 46.28264429505714
          nls: 0.7981524009029008


  0%|          | 2/900 [00:11<1:29:55,  6.01s/it]

<class 'float'>
i: 5, no: 0.555150474594001 | 
          eig: 0.7778374639752483 | bahc: 43.96680545948683
          nls: 0.8116459986694253


  0%|          | 3/900 [00:16<1:24:48,  5.67s/it]

<class 'float'>
i: 10, no: 0.5818718267225054 | 
          eig: 0.8554579827519648 | bahc: 44.22709537414996
          nls: 0.8105522188984486


  0%|          | 4/900 [00:21<1:21:00,  5.42s/it]

<class 'float'>
i: 15, no: 0.6102850628946981 | 
          eig: 0.5016880951419067 | bahc: 43.38687234999912
          nls: 0.8153885545868078


  1%|          | 5/900 [00:25<1:18:24,  5.26s/it]

<class 'float'>
i: 20, no: 0.5365221180901645 | 
          eig: -0.21919664310330766 | bahc: 46.33888095256961
          nls: 0.8230508031241006


  1%|          | 6/900 [00:31<1:17:41,  5.21s/it]

<class 'float'>
i: 25, no: 0.4365980735100412 | 
          eig: 0.799505452532767 | bahc: 50.50403329713656
          nls: 0.8041689503238463


  1%|          | 6/900 [00:33<1:22:03,  5.51s/it]


KeyboardInterrupt: 

In [123]:
@dask.delayed
def run_one_iteration(i):
    # |train|    test     |
    # |--T--|------T------|
    C_asset_in = X_raw.iloc[i:i+T]
    C_asset_out = X_raw.iloc[i+T:i+6*T]

    # covariance matrix
    C_in_cov = C_asset_in.cov().values
    C_out_cov = C_asset_out.cov().values

    # 1. no correlation cleaning
    C_corr = C_asset_in.corr()
    # print(type(C_corr))
    w_no = weights_GVM(C_corr)

    # print(type(w_no))

    # 2. eigenvalue clipping
    C_clipped_in = solution_eig(C_asset_in)
    w_eig = weights_GVM(C_clipped_in)

    # print(type(w_eig))

    # 3. cleaning with bahc
    X_centered = sklearn.preprocessing.StandardScaler(with_mean=True,
                            with_std=False).fit_transform(C_asset_in.values)  # column-wise!'
    Sigma_BAHC=bahc.filterCovariance(X_centered.T)
    w_GBM_BAHC=weights_GVM(Sigma_BAHC)

    # 4. cleaning with nls
    Sigma_NLS=pyRMT.optimalShrinkage(X_centered)
    w_GBM_NLS=weights_GVM(Sigma_NLS)

    # print(type(w_GBM_BAHC))
    # print(type(w_GBM_BAHC), type(Sigma_BAHC), type(W_eig))

    # in sample risk
    sigma_in_no = w_no.T@(C_in_cov@w_no)
    sigma_in_eig= (w_eig.T@(C_in_cov@w_eig)).item(0)
    sigma_in_BAHC= w_GBM_BAHC@(C_in_cov@w_GBM_BAHC)
    sigma_in_NLS= w_GBM_NLS@(C_in_cov@w_GBM_NLS)

    # print(type(sigma_in_eig))

    # out sample risk
    sigma_out_no= w_no.T@(C_out_cov@w_no)
    sigma_out_eig= (w_eig.T@(C_out_cov@w_eig)).item(0)
    sigma_out_BAHC= w_GBM_BAHC@(C_out_cov@w_GBM_BAHC)
    sigma_out_NLS= w_GBM_NLS@(C_out_cov@w_GBM_NLS)


    return sigma_in_no, sigma_in_eig, sigma_in_BAHC, sigma_in_NLS, \
        sigma_out_no, sigma_out_eig, sigma_out_BAHC, sigma_out_NLS 

In [124]:
no_arr = {"in": [], "out": []}
eig_arr = {"in": [], "out": []}
bahc_arr = {"in": [], "out": []}
nls_arr = {"in": [], "out": []}

step_range = 4500

promises = [run_one_iteration(i) for i in range(0, step_range, step_size)]
res = dask.compute(promises)

for each in res[0]:
    sigma_in_no, sigma_in_eig, sigma_in_BAHC, sigma_in_NLS, \
        sigma_out_no, sigma_out_eig, sigma_out_BAHC, sigma_out_NLS = each
    no_arr["in"].append(sigma_in_no)
    eig_arr["in"].append(sigma_in_eig)
    bahc_arr["in"].append(sigma_in_BAHC)
    nls_arr["in"].append(sigma_in_NLS)
    
    no_arr["out"].append(sigma_out_no)
    eig_arr["out"].append(sigma_out_eig)
    bahc_arr["out"].append(sigma_out_BAHC)
    nls_arr["out"].append(sigma_out_NLS)
    
res_df = pd.DataFrame()
res_df["no_arr_in"] = no_arr["in"]
res_df["eig_arr_in"] = eig_arr["in"]
res_df["bahc_arr_in"] = bahc_arr["in"]
res_df["nls_arr_in"] = nls_arr["in"]

res_df["no_arr_out"] = no_arr["out"]
res_df["eig_arr_out"] = eig_arr["out"]
res_df["bahc_arr_out"] = bahc_arr["out"]
res_df["nls_arr_out"] = nls_arr["out"]

res_df.to_pickle("in_out_risk.pkl")

print(res_df)

     no_arr_in  eig_arr_in   bahc_arr_in  nls_arr_in  no_arr_out  eig_arr_out  \
0     0.000008    0.000073  9.820557e-09    0.000083    0.000013     0.000134   
1     0.000009    0.000088  9.807227e-09    0.000082    0.000014     0.000157   
2     0.000009    0.000069  9.889239e-09    0.000082    0.000014     0.000127   
3     0.000009    0.000080  1.131399e-08    0.000082    0.000014     0.000120   
4     0.000010    0.000203  9.829093e-09    0.000082    0.000015     0.000158   
..         ...         ...           ...         ...         ...          ...   
895   0.000007    0.000044  1.392583e-08    0.000056    0.000006     0.000038   
896   0.000007    0.000051  1.406467e-08    0.000055    0.000006     0.000063   
897   0.000007    0.000042  1.392951e-08    0.000055    0.000006     0.000049   
898   0.000007    0.000062  1.433721e-08    0.000056    0.000006     0.000054   
899   0.000007    0.000048  1.414776e-08    0.000055    0.000006     0.000047   

     bahc_arr_out  nls_arr_

In [116]:
@dask.delayed
def fun(a):
    return a + 1

promises = [fun(a) for a in range(10)]
res = dask.compute(promises)
print(res)


# 0800 330800

([1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
