In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import dcor as dc
from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram
from scipy.spatial.distance import squareform
from scipy.stats import norm
from math import sqrt,  tanh, ceil, log, cos, pi, sin
import seaborn as sns

In [None]:
import os
original_dir = os.getcwd()
os.chdir('..\\src\\robustOptimPack\\wrapping')
from wrapping_funcs import *
os.chdir(original_dir)

In [None]:
n_reps = 50
n_obs_vec = [100,250,500]
res_mat_noout= np.ones((len(n_obs_vec),n_reps, 4 ))
delta_t = 1/360
mean_vec = np.array([-0.45, -0.25, -0.2, -0.15, -0.1, -0.05, 0.05, 0.08, 0.1, 0.15, 0.2, 0.21, 0.25, 0.3, 0.45])*delta_t
sd_vec = np.array([0.5, 0.2, 0.3, 0.25, 0.3, 0.2, 0.25, 0.23, 0.2, 0.4, 0.2, 0.25, 0.3, 0.15, 0.5]) *np.sqrt(delta_t)
sd_white_noise = sqrt(0.01)
n_clusts = 24
n_assets = 48
link_method = 'average'
out_fraction = 0.4
assets_in_groups = np.repeat([6,5,4,3,2,1], [1,1,2,2,5,13])
out_dist = 128

#### Basic simulation without outliers 

In [None]:
res_mat_noout= np.ones((len(n_obs_vec),n_reps, 4 ))

In [None]:
 for cur_nobs in range(len(n_obs_vec)): 
     n_obs = n_obs_vec[cur_nobs]
     #print("current n_obs is ", n_obs)
     
     for cur_rep in range(n_reps):
        
         np.random.seed(cur_rep)
         #print('current rep', cur_rep+1)
         data_assets = np.zeros((n_obs, 1))
         
         for group_ind in assets_in_groups: 
             #print('group_ind is ', group_ind)
             #initialise data matrix for that group: 
             data_group_ind = np.zeros((n_obs, group_ind))
             #generate normal variate for that group  
             y_l_t = np.random.normal(size = n_obs) 
             
             for asset_ind in range(group_ind): 
                 
                 #sample mean and volatility for current asset in current group 
                 asset_i_mean = np.random.choice(mean_vec, size = 1)
                 asset_i_vol = np.random.choice(sd_vec, size = 1)
                 
                 data_group_ind[:,asset_ind] = asset_i_mean + np.multiply(y_l_t, asset_i_vol)
                 #print('data_group shape ', data_group_ind.shape)
             data_assets = np.column_stack((data_assets, data_group_ind))
             #print('data_asset shape ', data_assets.shape)
         
         dat_sim = data_assets[:,1:]
         #correlation based clustering
         dat_sim_cor = np.corrcoef(dat_sim.T)
         dat_sim_cor_diss = squareform(np.round((1- dat_sim_cor)/2,6))
         cor_clust = linkage(dat_sim_cor_diss, method =link_method )
         cor_clusts = cut_tree(cor_clust, n_clusters= n_clusts)
         res_mat_noout[cur_nobs][cur_rep][0] = np.mean(cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #dcor based clustering
         dat_sim_dcor = pairwise_dcor(dat_sim)
         dat_sim_dcor_diss = squareform(np.round((1- dat_sim_dcor)/2,6))
         dcor_clust = linkage(dat_sim_dcor_diss, method = link_method)
         dcor_clusts = cut_tree(dcor_clust, n_clusters= n_clusts)
         res_mat_noout[cur_nobs][cur_rep][1] = np.mean(dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #w_cor based clustering 
         dat_sim_w_cor = wrapped_covariance_correlation(dat_sim)[1]
         dat_sim_w_cor_diss = squareform(np.round((1- dat_sim_w_cor)/2,6))
         w_cor_clust = linkage(dat_sim_w_cor_diss, method =link_method )
         w_cor_clusts = cut_tree(w_cor_clust, n_clusters= n_clusts)
         res_mat_noout[cur_nobs][cur_rep][2] = np.mean(w_cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))

         #w_dcor based clustering 
         dat_sim_w_dcor = wrapped_dcor(dat_sim)
         dat_sim_w_dcor_diss = squareform(np.round((1- dat_sim_w_dcor)/2,6))
         w_dcor_clust = linkage(dat_sim_w_dcor_diss, method =link_method )
         w_dcor_clusts = cut_tree(w_dcor_clust, n_clusters= n_clusts)
         res_mat_noout[cur_nobs][cur_rep][3] = np.mean(w_dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                          1,1,1,1,1,1,1,1,1,1]).astype(int))


In [None]:
print( "n = 50: ", np.mean(res_mat_noout[0], axis = 0))
print( "n = 100: ", np.mean(res_mat_noout[1], axis = 0))
print( "n = 250: ", np.mean(res_mat_noout[2], axis = 0))

### Simulation with comonotonic setting and rowwise outliers 

In [None]:
out_dim = 48
res_mat_out_rowwise= np.ones((len(n_obs_vec),n_reps, 4 ))

In [None]:
 for cur_nobs in range(len(n_obs_vec)): 
     n_obs = n_obs_vec[cur_nobs]
     #print("current n_obs is ", n_obs)
     
     for cur_rep in range(n_reps):
        
         np.random.seed(cur_rep)
         #print('current rep', cur_rep+1)
         data_assets = np.zeros((n_obs, 1))
         
         for group_ind in assets_in_groups: 
             #print('group_ind is ', group_ind)
             #initialise data matrix for that group: 
             data_group_ind = np.zeros((n_obs, group_ind))
             #generate normal variate for that group  
             y_l_t = np.random.normal(size = n_obs) 
             
             for asset_ind in range(group_ind): 
                 
                 #sample mean and volatility for current asset in current group 
                 asset_i_mean = np.random.choice(mean_vec, size = 1)
                 asset_i_vol = np.random.choice(sd_vec, size = 1)
                 
                 data_group_ind[:,asset_ind] = asset_i_mean + np.multiply(y_l_t, asset_i_vol)
                 #print('data_group shape ', data_group_ind.shape)
             data_assets = np.column_stack((data_assets, data_group_ind))
             #print('data_asset shape ', data_assets.shape)
         
         dat_sim = data_assets[:,1:]
         
         # generate casewise outliers 
         outliers_ind = np.random.randint(n_obs, size= round(out_fraction *n_obs) )
         dat_sim[outliers_ind,:] = (np.max(dat_sim) *out_dist)/sqrt(out_dim)
         
         #correlation based clustering
         dat_sim_cor = np.corrcoef(dat_sim.T)
         dat_sim_cor_diss = squareform(np.round((1- dat_sim_cor)/2,6))
         cor_clust = linkage(dat_sim_cor_diss, method =link_method )
         cor_clusts = cut_tree(cor_clust, n_clusters= n_clusts)
         res_mat_out_rowwise[cur_nobs][cur_rep][0] = np.mean(cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #dcor based clustering
         dat_sim_dcor = pairwise_dcor(dat_sim)
         dat_sim_dcor_diss = squareform(np.round((1- dat_sim_dcor)/2,6))
         dcor_clust = linkage(dat_sim_dcor_diss, method = link_method)
         dcor_clusts = cut_tree(dcor_clust, n_clusters= n_clusts)
         res_mat_out_rowwise[cur_nobs][cur_rep][1] = np.mean(dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #w_cor based clustering 
         dat_sim_w_cor = wrapped_covariance_correlation(dat_sim)[1]
         dat_sim_w_cor_diss = squareform(np.round((1- dat_sim_w_cor)/2,6))
         w_cor_clust = linkage(dat_sim_w_cor_diss, method =link_method )
         w_cor_clusts = cut_tree(w_cor_clust, n_clusters= n_clusts)
         res_mat_out_rowwise[cur_nobs][cur_rep][2] = np.mean(w_cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))

         #w_dcor based clustering 
         dat_sim_w_dcor = wrapped_dcor(dat_sim)
         dat_sim_w_dcor_diss = squareform(np.round((1- dat_sim_w_dcor)/2,6))
         w_dcor_clust = linkage(dat_sim_w_dcor_diss, method =link_method )
         w_dcor_clusts = cut_tree(w_dcor_clust, n_clusters= n_clusts)
         res_mat_out_rowwise[cur_nobs][cur_rep][3] = np.mean(w_dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                          1,1,1,1,1,1,1,1,1,1]).astype(int))

In [None]:
print( "n = 50: ", np.mean(res_mat_out_rowwise[0], axis = 0))
print( "n = 100: ", np.mean(res_mat_out_rowwise[1], axis = 0))
print( "n = 250: ", np.mean(res_mat_out_rowwise[2], axis = 0))

#### cellwise outliers on 16 columns 

In [None]:
out_dim = 16
res_mat_out_16= np.ones((len(n_obs_vec),n_reps, 4 ))

In [None]:
for cur_nobs in range(len(n_obs_vec)): 
     n_obs = n_obs_vec[cur_nobs]
     #print("current n_obs is ", n_obs)
     
     for cur_rep in range(n_reps):
        
         np.random.seed(cur_rep)
         #print('current rep', cur_rep+1)
         data_assets = np.zeros((n_obs, 1))
         
         # generate index of outlying columns 
         out_cols = np.sort( np.random.randint(n_assets, size = out_dim))
         out_col_counter = 0
         
         
         for group_ind in assets_in_groups: 
             #print('group_ind is ', group_ind)
             #initialise data matrix for that group: 
             data_group_ind = np.zeros((n_obs, group_ind))
             #generate normal variate for that group  
             y_l_t = np.random.normal(size = n_obs) 
             
             for asset_ind in range(group_ind): 
                 
                 #sample mean and volatility for current asset in current group 
                 asset_i_mean = np.random.choice(mean_vec, size = 1)
                 asset_i_vol = np.random.choice(sd_vec, size = 1)
                 
                 data_group_ind[:,asset_ind] = asset_i_mean + np.multiply(y_l_t, asset_i_vol)
                 #print('data_group shape ', data_group_ind.shape)
                 
                 # generate outliers if the current column should contain outliers 
                 if out_col_counter in out_cols: 
                     #print('out col ', out_col_counter, ' max ', np.max(data_group_ind[:,asset_ind]))
                     outliers_ind = np.random.randint(n_obs, size= round(out_fraction *n_obs) )
                     data_group_ind[outliers_ind,asset_ind] = (np.max(data_group_ind[:,asset_ind]) *out_dist)/sqrt(out_dim)
                     #print('out col ', out_col_counter, ' max ', np.max(data_group_ind[:,asset_ind]))
                 out_col_counter +=1 
             data_assets = np.column_stack((data_assets, data_group_ind))
             #print('data_asset shape ', data_assets.shape)
         
         dat_sim = data_assets[:,1:]
         
         # generate casewise outliers 
         outliers_ind = np.random.randint(n_obs, size= round(out_fraction *n_obs) )
         dat_sim[outliers_ind,:] = (np.max(dat_sim) *out_dist)/sqrt(out_dim)
         
         #correlation based clustering
         dat_sim_cor = np.corrcoef(dat_sim.T)
         dat_sim_cor_diss = squareform(np.round((1- dat_sim_cor)/2,6))
         cor_clust = linkage(dat_sim_cor_diss, method =link_method )
         cor_clusts = cut_tree(cor_clust, n_clusters= n_clusts)
         res_mat_out_16[cur_nobs][cur_rep][0] = np.mean(cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #dcor based clustering
         dat_sim_dcor = pairwise_dcor(dat_sim)
         dat_sim_dcor_diss = squareform(np.round((1- dat_sim_dcor)/2,6))
         dcor_clust = linkage(dat_sim_dcor_diss, method = link_method)
         dcor_clusts = cut_tree(dcor_clust, n_clusters= n_clusts)
         res_mat_out_16[cur_nobs][cur_rep][1] = np.mean(dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #w_cor based clustering 
         dat_sim_w_cor = wrapped_covariance_correlation(dat_sim)[1]
         dat_sim_w_cor_diss = squareform(np.round((1- dat_sim_w_cor)/2,6))
         w_cor_clust = linkage(dat_sim_w_cor_diss, method =link_method )
         w_cor_clusts = cut_tree(w_cor_clust, n_clusters= n_clusts)
         res_mat_out_16[cur_nobs][cur_rep][2] = np.mean(w_cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))

         #w_dcor based clustering 
         dat_sim_w_dcor = wrapped_dcor(dat_sim)
         dat_sim_w_dcor_diss = squareform(np.round((1- dat_sim_w_dcor)/2,6))
         w_dcor_clust = linkage(dat_sim_w_dcor_diss, method =link_method )
         w_dcor_clusts = cut_tree(w_dcor_clust, n_clusters= n_clusts)
         res_mat_out_16[cur_nobs][cur_rep][3] = np.mean(w_dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                          1,1,1,1,1,1,1,1,1,1]).astype(int))

In [None]:
print( "n = 100: ", np.mean(res_mat_out_16[0], axis = 0))
print( "n = 250: ", np.mean(res_mat_out_16[1], axis = 0))
print( "n = 500: ", np.mean(res_mat_out_16[2], axis = 0))

#### cellwise outliers on 32 columns 

In [None]:
out_dim = 32
res_mat_out_32= np.ones((len(n_obs_vec),n_reps, 4 ))

In [None]:
for cur_nobs in range(len(n_obs_vec)): 
     n_obs = n_obs_vec[cur_nobs]
     #print("current n_obs is ", n_obs)
     
     for cur_rep in range(n_reps):
        
         np.random.seed(cur_rep)
         #print('current rep', cur_rep+1)
         data_assets = np.zeros((n_obs, 1))
         
         # generate index of outlying columns 
         out_cols = np.sort( np.random.randint(n_assets, size = out_dim))
         out_col_counter = 0
         
         
         for group_ind in assets_in_groups: 
             #print('group_ind is ', group_ind)
             #initialise data matrix for that group: 
             data_group_ind = np.zeros((n_obs, group_ind))
             #generate normal variate for that group  
             y_l_t = np.random.normal(size = n_obs) 
             
             for asset_ind in range(group_ind): 
                 
                 #sample mean and volatility for current asset in current group 
                 asset_i_mean = np.random.choice(mean_vec, size = 1)
                 asset_i_vol = np.random.choice(sd_vec, size = 1)
                 
                 data_group_ind[:,asset_ind] = asset_i_mean + np.multiply(y_l_t, asset_i_vol)
                 #print('data_group shape ', data_group_ind.shape)
                 
                 # generate outliers if the current column should contain outliers 
                 if out_col_counter in out_cols: 
                     #print('out col ', out_col_counter, ' max ', np.max(data_group_ind[:,asset_ind]))
                     outliers_ind = np.random.randint(n_obs, size= round(out_fraction *n_obs) )
                     data_group_ind[outliers_ind,asset_ind] = (np.max(data_group_ind[:,asset_ind]) *out_dist)/sqrt(out_dim)
                     #print('out col ', out_col_counter, ' max ', np.max(data_group_ind[:,asset_ind]))
                 out_col_counter +=1 
             data_assets = np.column_stack((data_assets, data_group_ind))
             #print('data_asset shape ', data_assets.shape)
         
         dat_sim = data_assets[:,1:]
         
         # generate casewise outliers 
         outliers_ind = np.random.randint(n_obs, size= round(out_fraction *n_obs) )
         dat_sim[outliers_ind,:] = (np.max(dat_sim) *out_dist)/sqrt(out_dim)
         
         #correlation based clustering
         dat_sim_cor = np.corrcoef(dat_sim.T)
         dat_sim_cor_diss = squareform(np.round((1- dat_sim_cor)/2,6))
         cor_clust = linkage(dat_sim_cor_diss, method =link_method )
         cor_clusts = cut_tree(cor_clust, n_clusters= n_clusts)
         res_mat_out_32[cur_nobs][cur_rep][0] = np.mean(cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #dcor based clustering
         dat_sim_dcor = pairwise_dcor(dat_sim)
         dat_sim_dcor_diss = squareform(np.round((1- dat_sim_dcor)/2,6))
         dcor_clust = linkage(dat_sim_dcor_diss, method = link_method)
         dcor_clusts = cut_tree(dcor_clust, n_clusters= n_clusts)
         res_mat_out_32[cur_nobs][cur_rep][1] = np.mean(dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #w_cor based clustering 
         dat_sim_w_cor = wrapped_covariance_correlation(dat_sim)[1]
         dat_sim_w_cor_diss = squareform(np.round((1- dat_sim_w_cor)/2,6))
         w_cor_clust = linkage(dat_sim_w_cor_diss, method =link_method )
         w_cor_clusts = cut_tree(w_cor_clust, n_clusters= n_clusts)
         res_mat_out_32[cur_nobs][cur_rep][2] = np.mean(w_cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))

         #w_dcor based clustering 
         dat_sim_w_dcor = wrapped_dcor(dat_sim)
         dat_sim_w_dcor_diss = squareform(np.round((1- dat_sim_w_dcor)/2,6))
         w_dcor_clust = linkage(dat_sim_w_dcor_diss, method =link_method )
         w_dcor_clusts = cut_tree(w_dcor_clust, n_clusters= n_clusts)
         res_mat_out_32[cur_nobs][cur_rep][3] = np.mean(w_dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                          1,1,1,1,1,1,1,1,1,1]).astype(int))

In [None]:
print( "n = 100: ", np.mean(res_mat_out_32[0], axis = 0))
print( "n = 250: ", np.mean(res_mat_out_32[1], axis = 0))
print( "n = 500: ", np.mean(res_mat_out_32[2], axis = 0))

#### cellwise outliers on 48 columns 

In [None]:
out_dim = 48
res_mat_out_48= np.ones((len(n_obs_vec),n_reps, 4 ))

In [None]:
for cur_nobs in range(len(n_obs_vec)): 
     n_obs = n_obs_vec[cur_nobs]
     #print("current n_obs is ", n_obs)
     
     for cur_rep in range(n_reps):
        
         np.random.seed(cur_rep)
         #print('current rep', cur_rep+1)
         data_assets = np.zeros((n_obs, 1))
         
         # generate index of outlying columns 
         out_cols = np.sort( np.random.randint(n_assets, size = out_dim))
         out_col_counter = 0
         
         
         for group_ind in assets_in_groups: 
             #print('group_ind is ', group_ind)
             #initialise data matrix for that group: 
             data_group_ind = np.zeros((n_obs, group_ind))
             #generate normal variate for that group  
             y_l_t = np.random.normal(size = n_obs) 
             
             for asset_ind in range(group_ind): 
                 
                 #sample mean and volatility for current asset in current group 
                 asset_i_mean = np.random.choice(mean_vec, size = 1)
                 asset_i_vol = np.random.choice(sd_vec, size = 1)
                 
                 data_group_ind[:,asset_ind] = asset_i_mean + np.multiply(y_l_t, asset_i_vol)
                 #print('data_group shape ', data_group_ind.shape)
                 
                 # generate outliers if the current column should contain outliers 
                 if out_col_counter in out_cols: 
                     #print('out col ', out_col_counter, ' max ', np.max(data_group_ind[:,asset_ind]))
                     outliers_ind = np.random.randint(n_obs, size= round(out_fraction *n_obs) )
                     data_group_ind[outliers_ind,asset_ind] = (np.max(data_group_ind[:,asset_ind]) *out_dist)/sqrt(out_dim)
                     #print('out col ', out_col_counter, ' max ', np.max(data_group_ind[:,asset_ind]))
                 out_col_counter +=1 
             data_assets = np.column_stack((data_assets, data_group_ind))
             #print('data_asset shape ', data_assets.shape)
         
         dat_sim = data_assets[:,1:]
         
         # generate casewise outliers 
         outliers_ind = np.random.randint(n_obs, size= round(out_fraction *n_obs) )
         dat_sim[outliers_ind,:] = (np.max(dat_sim) *out_dist)/sqrt(out_dim)
         
         #correlation based clustering
         dat_sim_cor = np.corrcoef(dat_sim.T)
         dat_sim_cor_diss = squareform(np.round((1- dat_sim_cor)/2,6))
         cor_clust = linkage(dat_sim_cor_diss, method =link_method )
         cor_clusts = cut_tree(cor_clust, n_clusters= n_clusts)
         res_mat_out_48[cur_nobs][cur_rep][0] = np.mean(cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #dcor based clustering
         dat_sim_dcor = pairwise_dcor(dat_sim)
         dat_sim_dcor_diss = squareform(np.round((1- dat_sim_dcor)/2,6))
         dcor_clust = linkage(dat_sim_dcor_diss, method = link_method)
         dcor_clusts = cut_tree(dcor_clust, n_clusters= n_clusts)
         res_mat_out_48[cur_nobs][cur_rep][1] = np.mean(dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))
        
         #w_cor based clustering 
         dat_sim_w_cor = wrapped_covariance_correlation(dat_sim)[1]
         dat_sim_w_cor_diss = squareform(np.round((1- dat_sim_w_cor)/2,6))
         w_cor_clust = linkage(dat_sim_w_cor_diss, method =link_method )
         w_cor_clusts = cut_tree(w_cor_clust, n_clusters= n_clusts)
         res_mat_out_48[cur_nobs][cur_rep][2] = np.mean(w_cor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                        1,1,1,1,1,1,1,1,1,1]).astype(int))

         #w_dcor based clustering 
         dat_sim_w_dcor = wrapped_dcor(dat_sim)
         dat_sim_w_dcor_diss = squareform(np.round((1- dat_sim_w_dcor)/2,6))
         w_dcor_clust = linkage(dat_sim_w_dcor_diss, method =link_method )
         w_dcor_clusts = cut_tree(w_dcor_clust, n_clusters= n_clusts)
         res_mat_out_48[cur_nobs][cur_rep][3] = np.mean(w_dcor_clusts.flatten() == np.repeat(np.linspace(start=0, stop=23, num=24), [6,5,4,4,3,3,2,2,2,2,2,1,1,1,
                                                                                          1,1,1,1,1,1,1,1,1,1]).astype(int))

In [None]:
print( "n = 100: ", np.mean(res_mat_out_48[0], axis = 0))
print( "n = 250: ", np.mean(res_mat_out_48[1], axis = 0))
print( "n = 500: ", np.mean(res_mat_out_48[2], axis = 0))