In [148]:
import pandas as pd
pd.__version__

'0.23.0'

# Introduction

## Description

The calculations for [erlang matrix implementation efficiencies](https://nbviewer.jupyter.org/urls/github.com/flmath/matrix_implementations_in_erlang/raw/master/jupyter/main.ipynb), with [methodology](https://nbviewer.jupyter.org/urls/github.com/flmath/empirical-growth-testing/raw/master/Empirical_growth_testing.ipynb) descripted in my other [repo](https://github.com/flmath/empirical-growth-testing).

In [149]:
from urllib.request import urlopen
import re
import json

response = urlopen(
    "https://github.com/flmath/matrix_implementations_in_erlang/raw/master/examples/results")
data=[]
for line in response:
    decoded = line.decode('utf-8')
    if "{"==decoded[0]:
        decoded=re.sub('{','{\"',decoded)
        decoded=re.sub(':','\":',decoded)
        decoded=re.sub(', ',', \"',decoded)
        data.append(json.loads(decoded))
        
LoadedTable = pd.DataFrame.from_dict(data, orient='columns')
LoadedTable.head()

Unnamed: 0,ExecutionTime,Height,Operation,Representation,Runs,Width
0,1078,100,get_value,matrix_as_bit_map,100,100
1,348,100,get_value,matrix_as_ext_bit_map,100,100
2,21,100,get_value,matrix_as_digraph,100,100
3,19,100,get_value,matrix_as_ets_bin,100,100
4,13,100,get_value,matrix_as_map,100,100


Lets process the tables the same way as in [empirical growth testing](https://nbviewer.jupyter.org/github/flmath/empirical-growth-testing/blob/master/Empirical_growth_testing.ipynb).

In [150]:
LoadedTable = LoadedTable.loc[:,['ExecutionTime','Height','Operation','Representation']]

In [151]:
MinTable = (LoadedTable.groupby(['Operation','Representation'])
            .apply(lambda x: pd.Series({'Max': x['ExecutionTime'].max()})))

In [152]:
SortedTable = LoadedTable.set_index(['Operation','Representation','Height'])
SortedTable = SortedTable.unstack(2)
IndexTuples = [('ExecutionTime','100'), ('ExecutionTime','200'), ('ExecutionTime','300'), ('ExecutionTime','400'), 
               ('ExecutionTime','500'), ('ExecutionTime','600'), ('ExecutionTime','700'), 
               ('ExecutionTime','800'), ('ExecutionTime','900'), ('ExecutionTime','1000')]
SortedTable = SortedTable.reindex(columns = pd.MultiIndex.from_tuples(IndexTuples))
SortedTable.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime
Unnamed: 0_level_1,Unnamed: 1_level_1,100,200,300,400,500,600,700,800,900,1000
Operation,Representation,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
cols_sums,matrix_as_array,149107,698647,1673644,3328258,5315118,7969973,10883773,14856932,19837491,32788776
cols_sums,matrix_as_array_of_arrays,163495,846333,1965130,3481540,5531289,8061123,11006039,15028130,18675651,22708192


Create the fixed legend:

In [153]:
idx = pd.IndexSlice
ColsSums = SortedTable.loc[idx['cols_sums',:],idx[:]]
[(i, ColsSums.index.levels[1][i]) for i in range(0,ColsSums.index.levels[1].size)]

[(0, 'matrix_as_array'),
 (1, 'matrix_as_array_of_arrays'),
 (2, 'matrix_as_big_tuple'),
 (3, 'matrix_as_bit_map'),
 (4, 'matrix_as_dict'),
 (5, 'matrix_as_digraph'),
 (6, 'matrix_as_ets'),
 (7, 'matrix_as_ets_bin'),
 (8, 'matrix_as_ets_list'),
 (9, 'matrix_as_ext_bit_map'),
 (10, 'matrix_as_gb_tree'),
 (11, 'matrix_as_list_map'),
 (12, 'matrix_as_list_of_lists'),
 (13, 'matrix_as_map'),
 (14, 'matrix_as_sofs'),
 (15, 'matrix_as_tuple_of_tuples')]

In [154]:
ColsSums = SortedTable.loc[idx['cols_sums',:],idx[:]]
RowsSums = SortedTable.loc[idx['rows_sums',:],idx[:]]
SetValue = SortedTable.loc[idx['set_value',:],idx[:]]
GetValue = SortedTable.loc[idx['get_value',:],idx[:]]

## Tools preparation

Now I will try to use [methodology](https://nbviewer.jupyter.org/urls/github.com/flmath/empirical-growth-testing/raw/master/Empirical_growth_testing.ipynb) to find which of implementations are more promising:

In [155]:
import numpy as np
import scipy as sc

Because our series is quite short, we will reduce amount of sample points for [derivation](http://web.media.mit.edu/~crtaylor/calculator.html), to receive the longer series $g$.

In [156]:
def df_1_small(f,h):
    f = np.float64(f)
    h = np.float64(h)
    df = lambda f, h: ((-3*f[0]+4*f[1]-1*f[2])/(2*1.0*h))
    return [df(f[j:j+3], 1) for j in range(0,f.size-3)]
def d_g(f,h):
    return df_1_small(np.log(f),h)

In [157]:
import statsmodels.tsa.stattools as stat
import statsmodels.api as sm
import scipy.stats as sps

In [158]:
ColsSums.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime,ExecutionTime
Unnamed: 0_level_1,Unnamed: 1_level_1,100,200,300,400,500,600,700,800,900,1000
Operation,Representation,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
cols_sums,matrix_as_array,149107,698647,1673644,3328258,5315118,7969973,10883773,14856932,19837491,32788776
cols_sums,matrix_as_array_of_arrays,163495,846333,1965130,3481540,5531289,8061123,11006039,15028130,18675651,22708192
cols_sums,matrix_as_big_tuple,32050,92253,319090,366064,625388,863448,1234684,2388208,3653456,11325482
cols_sums,matrix_as_bit_map,3810040,28384618,53035475,106308550,365437658,226659932,349774361,475854253,589586379,2484937761
cols_sums,matrix_as_dict,236984,1311307,4352125,9008138,15995373,22315833,31890347,42940168,53118754,67434534


## Columns sums

In [159]:
Statistic = ColsSums.agg(lambda x: pd.Series({'f' : x.values}), axis = 1)

In [160]:
Statistic['g'] = Statistic.aggregate(lambda x: np.log(x['f']), axis=1)

In [161]:
Statistic['dg'] = Statistic.aggregate(lambda x: d_g(x.values[0],100), axis=1)

In [162]:
Statistic[['spearmanr_r', 'spearmanr_pval']] = Statistic.aggregate(\
lambda x: sps.spearmanr(range(len(x['dg'])),x['dg']), axis=1).apply(pd.Series)

In [163]:
Statistic[['kpss_r', 'kpss_pval']] =\
Statistic.apply(lambda x: stat.kpss(x['dg'], regression='c')[0:2], axis=1).apply(pd.Series)

In [164]:
#Statistic[['adf_r', 'adf_pval']] =\
#Statistic.apply(\
#lambda x: stat.adfuller(x['dg'], maxlag=None, regression='c', autolag='AIC')[0:2], axis=1).apply(pd.Series)

#ValueError: ('maxlag should be < nobs', 'occurred at index (cols_sums, matrix_as_array)')

Augmented Dickey–Fuller test should have more than 7 values to work properly.

In [165]:
Statistic[['reg_slope', 'reg_intercept', 'reg_rv', 'reg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.array(range(len(x['dg'])))+0.000001,x['dg'])[0:4],\
                                                        axis=1).apply(pd.Series)

In [166]:
Statistic[['deg_slope', 'deg_intercept', 'deg_rv', 'deg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.log(np.array(range(len(x['g'])))+1.01),x['g'])[0:4],\
                                                        axis=1).apply(pd.Series)

In [167]:
Statistic[['deg_int_slope']] = np.ceil(Statistic[['deg_slope']])

In [168]:
Statistic[['dg_end']] = Statistic.aggregate(lambda x: np.linalg.norm(x['dg'][-4:]),axis=1).apply(pd.Series)

In [169]:
Statistic[['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'kpss_r', 'kpss_pval', 'reg_slope', 'reg_pval',\
           'deg_slope', 'deg_pval', 'dg_end']]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,kpss_r,kpss_pval,reg_slope,reg_pval,deg_slope,deg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
cols_sums,matrix_as_array,3.0,-0.964286,0.000454,0.571429,0.025579,-0.226003,0.009217,2.271832,1.235562e-11,0.80923
cols_sums,matrix_as_array_of_arrays,3.0,-0.964286,0.000454,0.571429,0.025579,-0.236036,0.020884,2.130276,3.985739e-13,0.805691
cols_sums,matrix_as_big_tuple,3.0,-0.392857,0.383317,0.571429,0.025579,-0.120378,0.341448,2.293873,4.931583e-06,1.073191
cols_sums,matrix_as_bit_map,3.0,-0.678571,0.09375,0.571429,0.025579,-0.30646,0.202703,2.443228,1.751444e-06,2.369779
cols_sums,matrix_as_dict,3.0,-0.892857,0.006807,0.571429,0.025579,-0.266445,0.003776,2.476448,3.062645e-11,0.922407
cols_sums,matrix_as_digraph,3.0,-0.75,0.052181,0.571429,0.025579,-0.280718,0.0454,2.379524,8.360162e-10,1.374765
cols_sums,matrix_as_ets,4.0,-1.0,0.0,0.571429,0.025579,-0.367776,0.002108,3.607034,1.264318e-11,1.251653
cols_sums,matrix_as_ets_bin,2.0,-0.535714,0.215217,0.571429,0.025579,-0.434525,0.058998,1.900358,2.748773e-05,1.60539
cols_sums,matrix_as_ets_list,4.0,-1.0,0.0,0.571429,0.025579,-0.370429,0.00219,3.615191,2.611982e-11,1.245559
cols_sums,matrix_as_ext_bit_map,3.0,-0.678571,0.09375,0.571429,0.025579,-0.28674,0.134504,2.454971,1.839272e-07,2.242111


Lets limit our results to representations that have $g$ monotonicly decreasing.

In [170]:
Statistic.loc[(Statistic['spearmanr_pval']<0.05, Statistic['spearmanr_r']<0.0),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end']]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cols_sums,matrix_as_array,3.0,-0.964286,0.000454,-0.226003,0.009217,0.80923
cols_sums,matrix_as_array_of_arrays,3.0,-0.964286,0.000454,-0.236036,0.020884,0.805691
cols_sums,matrix_as_dict,3.0,-0.892857,0.006807,-0.266445,0.003776,0.922407
cols_sums,matrix_as_ets,4.0,-1.0,0.0,-0.367776,0.002108,1.251653
cols_sums,matrix_as_ets_list,4.0,-1.0,0.0,-0.370429,0.00219,1.245559
cols_sums,matrix_as_gb_tree,3.0,-1.0,0.0,-0.226942,0.010558,0.804812
cols_sums,matrix_as_list_map,3.0,-1.0,0.0,-0.266079,0.00395,0.860612
cols_sums,matrix_as_sofs,3.0,-1.0,0.0,-0.277363,0.000769,0.786182


And check derivate of the $g$ closes to $0$.

In [171]:
Statistic.loc[(Statistic['dg_end']<0.01),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end' ]]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


We have a few candidates for less than exponetial growth (which is impressive since matrices grow in quadratic matter and we need to traverse all elements). The maximal slope suggest that we should try up to $5$th derivate, to check if we can get polynomial.

The $f$ has 10 data points and we need $5$th derivate so we need 5 sample point in stencil method.

In [172]:
def df_1(f,h):
    f = np.float64(f)
    h = np.float64(h)
    df = lambda f, h:((-137*f[0]+300*f[1]-300*f[2]+200*f[3]-75*f[4]+12*f[5])/(60*1.0*h))
    return [df(f[j:j+6], 1) for j in range(0,f.size-6)]

In [173]:
def df_2(f,h):
    f = np.float64(f)
    h = np.float64(h)
    df = lambda f, h:((45*f[0]-154*f[1]+214*f[2]-156*f[3]+61*f[4]-10*f[5])/(12*1.0*np.power(h,2)))
    return [df(f[j:j+6], 1) for j in range(0,f.size-6)]

In [174]:
def df_3(f,h):
    f = np.float64(f)
    h = np.float64(h)
    df = lambda f, h:((-17*f[0]+71*f[1]-118*f[2]+98*f[3]-41*f[4]+7*f[5])/(4*1.0*np.power(h,3)))
    return [df(f[j:j+6], 1) for j in range(0,f.size-6)]

In [175]:
def df_4(f,h):
    f = np.float64(f)
    h = np.float64(h)
    df = lambda f, h:((3*f[0]-14*f[1]+26*f[2]-24*f[3]+11*f[4]-2*f[5])/(1*1.0*np.power(h,4)))
    return [df(f[j:j+6], 1) for j in range(0,f.size-6)]

In [176]:
def df_5(f,h):
    f = np.float64(f)
    h = np.float64(h)
    df = lambda f, h:((-1*f[0]+5*f[1]-10*f[2]+10*f[3]-5*f[4]+1*f[5])/(1*1.0*np.power(h,5)))
    return [df(f[j:j+6], 1) for j in range(0,f.size-6)]

In [177]:
Statistic['df_1'] = Statistic.aggregate(lambda x: df_1(x['f'],100),axis=1)
Statistic['df_2'] = Statistic.aggregate(lambda x: df_2(x['f'],100),axis=1)
Statistic['df_3'] = Statistic.aggregate(lambda x: df_3(x['f'],100),axis=1)
Statistic['df_4'] = Statistic.aggregate(lambda x: df_4(x['f'],100),axis=1)
Statistic['df_5'] = Statistic.aggregate(lambda x: df_5(x['f'],100),axis=1)

In [178]:
Statistic['df_45'] = Statistic.apply(lambda x: np.divide(x['df_5'],x['df_4']),axis=1)
Statistic['df_34'] = Statistic.apply(lambda x: np.divide(x['df_4'],x['df_3']),axis=1)
Statistic['df_23'] = Statistic.apply(lambda x: np.divide(x['df_3'],x['df_2']),axis=1)

In [179]:
Statistic.aggregate(lambda x: np.linalg.norm(x['df_34']),axis=1)

Operation  Representation           
cols_sums  matrix_as_array               1.857831
           matrix_as_array_of_arrays     1.994941
           matrix_as_big_tuple           1.680135
           matrix_as_bit_map             1.902769
           matrix_as_dict                1.855826
           matrix_as_digraph             1.825195
           matrix_as_ets                 1.905938
           matrix_as_ets_bin             1.834722
           matrix_as_ets_list            1.730902
           matrix_as_ext_bit_map         1.905516
           matrix_as_gb_tree             1.772036
           matrix_as_list_map            1.660134
           matrix_as_list_of_lists       2.027784
           matrix_as_map                 2.250369
           matrix_as_sofs               39.678741
           matrix_as_tuple_of_tuples     1.938516
dtype: float64

In [180]:
Statistic.aggregate(lambda x: np.linalg.norm(x['df_45']),axis=1)

Operation  Representation           
cols_sums  matrix_as_array              0.807952
           matrix_as_array_of_arrays    0.843554
           matrix_as_big_tuple          0.930799
           matrix_as_bit_map            0.818151
           matrix_as_dict               0.800525
           matrix_as_digraph            0.789680
           matrix_as_ets                0.814233
           matrix_as_ets_bin            0.800354
           matrix_as_ets_list           0.912585
           matrix_as_ext_bit_map        0.823196
           matrix_as_gb_tree            0.791692
           matrix_as_list_map           4.470228
           matrix_as_list_of_lists      0.876517
           matrix_as_map                0.877562
           matrix_as_sofs               0.974298
           matrix_as_tuple_of_tuples    0.808146
dtype: float64

Nothing close to $0$ here. We shouldn't expect other result anyway, since we actually calcualte $n$ sums of $n$ elements.

I would consider: matrix_as_ets_list and matrix_as_ets representaions as the winners here.

## Rows sums

In [181]:
Statistic = RowsSums.agg(lambda x: pd.Series({'f' : x.values}), axis = 1)

In [182]:
Statistic['g'] = Statistic.aggregate(lambda x: np.log(x['f']), axis=1)
Statistic['dg'] = Statistic.aggregate(lambda x: d_g(x.values[0],100), axis=1)
Statistic[['spearmanr_r', 'spearmanr_pval']] = Statistic.aggregate(\
lambda x: sps.spearmanr(range(len(x['dg'])),x['dg']), axis=1).apply(pd.Series)
Statistic[['kpss_r', 'kpss_pval']] =\
Statistic.apply(lambda x: stat.kpss(x['dg'], regression='c')[0:2], axis=1).apply(pd.Series)
Statistic[['reg_slope', 'reg_intercept', 'reg_rv', 'reg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.array(range(len(x['dg'])))+0.000001,x['dg'])[0:4],\
                                                        axis=1).apply(pd.Series)
Statistic[['deg_slope', 'deg_intercept', 'deg_rv', 'deg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.log(np.array(range(len(x['g'])))+1.01),x['g'])[0:4],\
                                                        axis=1).apply(pd.Series)
Statistic[['deg_int_slope']] = np.ceil(Statistic[['deg_slope']])
Statistic[['dg_end']] = Statistic.aggregate(lambda x: np.linalg.norm(x['dg'][-4:]),axis=1).apply(pd.Series)
Statistic.loc[(Statistic['spearmanr_pval']<0.05, Statistic['spearmanr_r']<0.0),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end']]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
rows_sums,matrix_as_array_of_arrays,2.0,-0.964286,0.000454,-0.160786,0.003852,0.765682
rows_sums,matrix_as_dict,3.0,-0.821429,0.023449,-0.240744,0.011572,1.111041
rows_sums,matrix_as_digraph,3.0,-0.821429,0.023449,-0.251721,0.068895,1.610392
rows_sums,matrix_as_ets,4.0,-1.0,0.0,-0.358523,0.002351,1.227026
rows_sums,matrix_as_ets_list,4.0,-1.0,0.0,-0.369579,0.002646,1.255817
rows_sums,matrix_as_gb_tree,3.0,-0.964286,0.000454,-0.247141,0.012199,0.900924
rows_sums,matrix_as_list_map,3.0,-1.0,0.0,-0.256175,0.011264,0.863774
rows_sums,matrix_as_tuple_of_tuples,2.0,-1.0,0.0,-0.200711,0.009798,0.762857


In [183]:
Statistic.loc[(Statistic['dg_end']<0.01),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end' ]]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


In [184]:
Statistic['df_1'] = Statistic.aggregate(lambda x: df_1(x['f'],100),axis=1)
Statistic['df_2'] = Statistic.aggregate(lambda x: df_2(x['f'],100),axis=1)
Statistic['df_3'] = Statistic.aggregate(lambda x: df_3(x['f'],100),axis=1)
Statistic['df_4'] = Statistic.aggregate(lambda x: df_4(x['f'],100),axis=1)
Statistic['df_5'] = Statistic.aggregate(lambda x: df_5(x['f'],100),axis=1)

In [185]:
Statistic['df_45'] = Statistic.apply(lambda x: np.divide(x['df_5'],x['df_4']),axis=1)
Statistic['df_34'] = Statistic.apply(lambda x: np.divide(x['df_4'],x['df_3']),axis=1)
Statistic['df_23'] = Statistic.apply(lambda x: np.divide(x['df_3'],x['df_2']),axis=1)

In [186]:
Statistic.aggregate(lambda x: np.linalg.norm(x['df_34']),axis=1)

Operation  Representation           
rows_sums  matrix_as_array              2.613959
           matrix_as_array_of_arrays    2.028645
           matrix_as_big_tuple          1.952124
           matrix_as_bit_map            2.319883
           matrix_as_dict               1.778336
           matrix_as_digraph            2.805228
           matrix_as_ets                2.002838
           matrix_as_ets_bin            1.767660
           matrix_as_ets_list           1.693608
           matrix_as_ext_bit_map        1.842619
           matrix_as_gb_tree            2.123571
           matrix_as_list_map           2.223922
           matrix_as_list_of_lists      2.046762
           matrix_as_map                2.122551
           matrix_as_sofs               1.917065
           matrix_as_tuple_of_tuples    1.660016
dtype: float64

In [187]:
Statistic.aggregate(lambda x: np.linalg.norm(x['df_45']),axis=1)

Operation  Representation           
rows_sums  matrix_as_array              0.985994
           matrix_as_array_of_arrays    0.897879
           matrix_as_big_tuple          0.855578
           matrix_as_bit_map            0.943127
           matrix_as_dict               0.758392
           matrix_as_digraph            0.903289
           matrix_as_ets                0.847879
           matrix_as_ets_bin            0.776313
           matrix_as_ets_list           0.887177
           matrix_as_ext_bit_map        0.792262
           matrix_as_gb_tree            0.882869
           matrix_as_list_map           0.816029
           matrix_as_list_of_lists      0.892832
           matrix_as_map                0.911937
           matrix_as_sofs               0.804490
           matrix_as_tuple_of_tuples    0.742855
dtype: float64

Nothing close to $0$ here.
I would consider: matrix_as_ets_list I would consider: matrix_as_ets as the winners here, again.

## Get value

In [188]:
Statistic = GetValue.agg(lambda x: pd.Series({'f' : x.values}), axis = 1)

In [189]:
Statistic['g'] = Statistic.aggregate(lambda x: np.log(x['f']), axis=1)
Statistic['dg'] = Statistic.aggregate(lambda x: d_g(x.values[0],100), axis=1)
Statistic[['spearmanr_r', 'spearmanr_pval']] = Statistic.aggregate(\
lambda x: sps.spearmanr(range(len(x['dg'])),x['dg']), axis=1).apply(pd.Series)
Statistic[['kpss_r', 'kpss_pval']] =\
Statistic.apply(lambda x: stat.kpss(x['dg'], regression='c')[0:2], axis=1).apply(pd.Series)
Statistic[['reg_slope', 'reg_intercept', 'reg_rv', 'reg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.array(range(len(x['dg'])))+0.000001,x['dg'])[0:4],\
                                                        axis=1).apply(pd.Series)
Statistic[['deg_slope', 'deg_intercept', 'deg_rv', 'deg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.log(np.array(range(len(x['g'])))+1.01),x['g'])[0:4],\
                                                        axis=1).apply(pd.Series)
Statistic[['deg_int_slope']] = np.ceil(Statistic[['deg_slope']])
Statistic[['dg_end']] = Statistic.aggregate(lambda x: np.linalg.norm(x['dg'][-4:]),axis=1).apply(pd.Series)
Statistic.loc[(Statistic['spearmanr_pval']<0.05, Statistic['spearmanr_r']<0.0),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end']]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
get_value,matrix_as_list_of_lists,1.0,-0.928571,0.002519,-0.098575,0.006139,0.368922


In [190]:
Statistic.loc[(Statistic['dg_end']<0.01),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end' ]]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
get_value,matrix_as_tuple_of_tuples,1.0,-0.133631,0.775162,-0.071913,0.451968,2.220446e-16


In [191]:
Statistic['df_1'] = Statistic.aggregate(lambda x: df_1(x['f'],100),axis=1)
Statistic['df_2'] = Statistic.aggregate(lambda x: df_2(x['f'],100),axis=1)
Statistic['df_3'] = Statistic.aggregate(lambda x: df_3(x['f'],100),axis=1)
Statistic['df_4'] = Statistic.aggregate(lambda x: df_4(x['f'],100),axis=1)
Statistic['df_5'] = Statistic.aggregate(lambda x: df_5(x['f'],100),axis=1)

In [192]:
Statistic['df_45'] = Statistic.apply(lambda x: np.divide(x['df_5'],x['df_4']),axis=1)
Statistic['df_34'] = Statistic.apply(lambda x: np.divide(x['df_4'],x['df_3']),axis=1)
Statistic['df_23'] = Statistic.apply(lambda x: np.divide(x['df_3'],x['df_2']),axis=1)

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until


In [193]:
Statistic.aggregate(lambda x: np.linalg.norm(x['df_34']),axis=1)

Operation  Representation           
get_value  matrix_as_array              10.117708
           matrix_as_array_of_arrays     1.719737
           matrix_as_big_tuple           1.823091
           matrix_as_bit_map             1.519721
           matrix_as_dict               18.303486
           matrix_as_digraph             1.918997
           matrix_as_ets                 1.804186
           matrix_as_ets_bin             2.234789
           matrix_as_ets_list            1.859150
           matrix_as_ext_bit_map         1.841844
           matrix_as_gb_tree            23.732689
           matrix_as_list_map            1.811863
           matrix_as_list_of_lists       1.847315
           matrix_as_map                 1.880854
           matrix_as_sofs                2.002354
           matrix_as_tuple_of_tuples          NaN
dtype: float64

In [194]:
Statistic.aggregate(lambda x: np.linalg.norm(x['df_45']),axis=1)

Operation  Representation           
get_value  matrix_as_array              1.015371
           matrix_as_array_of_arrays    0.754457
           matrix_as_big_tuple          0.793391
           matrix_as_bit_map            0.685537
           matrix_as_dict               0.906626
           matrix_as_digraph            0.836670
           matrix_as_ets                0.791102
           matrix_as_ets_bin            0.799002
           matrix_as_ets_list           0.816405
           matrix_as_ext_bit_map        0.806759
           matrix_as_gb_tree            0.807273
           matrix_as_list_map           0.796159
           matrix_as_list_of_lists      0.796688
           matrix_as_map                0.730900
           matrix_as_sofs               0.868134
           matrix_as_tuple_of_tuples         NaN
dtype: float64

In [195]:
Statistic['df_12'] = Statistic.apply(lambda x: np.divide(x['df_2'],x['df_1']),axis=1)

  """Entry point for launching an IPython kernel.


In [196]:
Statistic.aggregate(lambda x: np.linalg.norm(x['df_12']),axis=1)

Operation  Representation           
get_value  matrix_as_array                4.516163
           matrix_as_array_of_arrays      6.685351
           matrix_as_big_tuple            6.815066
           matrix_as_bit_map              5.471452
           matrix_as_dict                 5.022303
           matrix_as_digraph              7.152847
           matrix_as_ets                 11.244136
           matrix_as_ets_bin              5.731388
           matrix_as_ets_list             6.817641
           matrix_as_ext_bit_map          6.840521
           matrix_as_gb_tree              4.117129
           matrix_as_list_map             6.398131
           matrix_as_list_of_lists        3.183019
           matrix_as_map                101.902053
           matrix_as_sofs                12.579066
           matrix_as_tuple_of_tuples           NaN
dtype: float64

Lets check matrix_as_tuple_of_tuples and matrix_as_list_of_lists:

In [197]:
Statistic['f'][('get_value','matrix_as_tuple_of_tuples')]

array([4, 7, 5, 5, 5, 5, 5, 5, 5, 5])

In [198]:
Statistic['df_1'][('get_value','matrix_as_tuple_of_tuples')]

[12.283333333333333, -4.566666666666666, 0.0, 0.0]

In [199]:
Statistic['f'][('get_value','matrix_as_list_of_lists')]

array([ 114,  215,  303,  418,  525,  625,  731,  797,  973, 1136])

So N/A values appear because matrix_as_tuple_of_tuples stabilizes very quickly. Small number of values spoils derivate analysis, there is no way/goal to limit its results to the last values.
The matrix_as_list_of_lists implementation also shows nicely slowing growth.

## Set value

In [203]:
Statistic = SetValue.agg(lambda x: pd.Series({'f' : x.values}), axis = 1)

In [204]:
Statistic['g'] = Statistic.aggregate(lambda x: np.log(x['f']), axis=1)
Statistic['dg'] = Statistic.aggregate(lambda x: d_g(x.values[0],100), axis=1)
Statistic[['spearmanr_r', 'spearmanr_pval']] = Statistic.aggregate(\
lambda x: sps.spearmanr(range(len(x['dg'])),x['dg']), axis=1).apply(pd.Series)
Statistic[['kpss_r', 'kpss_pval']] =\
Statistic.apply(lambda x: stat.kpss(x['dg'], regression='c')[0:2], axis=1).apply(pd.Series)
Statistic[['reg_slope', 'reg_intercept', 'reg_rv', 'reg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.array(range(len(x['dg'])))+0.000001,x['dg'])[0:4],\
                                                        axis=1).apply(pd.Series)
Statistic[['deg_slope', 'deg_intercept', 'deg_rv', 'deg_pval']] = Statistic.aggregate(\
                        lambda x: sps.linregress(np.log(np.array(range(len(x['g'])))+1.01),x['g'])[0:4],\
                                                        axis=1).apply(pd.Series)
Statistic[['deg_int_slope']] = np.ceil(Statistic[['deg_slope']])
Statistic[['dg_end']] = Statistic.aggregate(lambda x: np.linalg.norm(x['dg'][-4:]),axis=1).apply(pd.Series)
Statistic.loc[(Statistic['spearmanr_pval']<0.05, Statistic['spearmanr_r']<0.0),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end']]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


In [205]:
Statistic.loc[(Statistic['dg_end']<0.01),\
              ['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end' ]]

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


Setting value looks tragic.

In [213]:
Statistic[['deg_int_slope', 'spearmanr_r', 'spearmanr_pval', 'reg_slope', 'reg_pval', 'dg_end' ]]\
.sort_values('spearmanr_pval')

Unnamed: 0_level_0,Unnamed: 1_level_0,deg_int_slope,spearmanr_r,spearmanr_pval,reg_slope,reg_pval,dg_end
Operation,Representation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
set_value,matrix_as_array,-0.0,0.607143,0.148231,0.092753,0.172308,0.413688
set_value,matrix_as_dict,2.0,-0.428571,0.337368,-0.142133,0.425155,1.496104
set_value,matrix_as_sofs,3.0,-0.392857,0.383317,-0.213769,0.190192,0.633382
set_value,matrix_as_big_tuple,3.0,-0.321429,0.482072,-0.22084,0.532566,3.065729
set_value,matrix_as_digraph,1.0,-0.321429,0.482072,-0.034509,0.566728,0.364183
set_value,matrix_as_tuple_of_tuples,2.0,0.321429,0.482072,0.085289,0.42431,1.239495
set_value,matrix_as_bit_map,1.0,0.285714,0.534509,0.039611,0.659023,0.976291
set_value,matrix_as_ets_bin,1.0,-0.285714,0.534509,-0.012436,0.686126,0.221836
set_value,matrix_as_list_map,1.0,-0.178571,0.701658,-0.022697,0.888929,1.519999
set_value,matrix_as_ets,1.0,-0.14825,0.75108,0.000124,0.990252,0.109434


The matrix_as_array has the closest behaviour to monotonically decreasing, matrix_as_dict and matrix_as_sofs can be considered.

## Conclusion

Aside of taking into consideration of the implementations that are the best on the provided data, we should consider the ones which give hopes for efficiency with data of a bigger size. From above projections we should consider:
- For rows and columns sums: matrix_as_ets_list and matrix_as_ets representaions.
- For the get_value function the matrix_as_tuple_of_tuples is clear winner, but also the matrix_as_list_of_lists.
- For setting values the results are not so clear, but the matrix_as_array is recomendated. Also matrix_as_dict and matrix_as_sofs should be considered.

In case other implementations will show better results, it is good to check if they don't show to big growth
with above tables.