In [137]:
import wiggum as wg
import pandas as pd
import numpy as np
import timeit
import seaborn as sns
import warnings
from itertools import chain
warnings.filterwarnings('ignore')

In [138]:
final=pd.DataFrame()

pearson = wg.All_Pearson()
pearson.get_trend_vars(labeled_df)
pearson.regression_vars
    
pearson_2 = wg.trends.All_Pearson_V2()
pearson_2.get_trend_vars(labeled_df)
pearson_2.regression_vars

objs=['pearson', 'pearson_2']
prereqs='from __main__ import labeled_df, pearson, pearson_2, objs, commands'
commands=["{}.compute_correlation_table(labeled_df.df, 'agg_trend')",
          'labeled_df.get_subgroup_trends_1lev([{}])',
         "{}.get_trends(labeled_df.df, 'agg_trend')",
         "{}.get_trends(labeled_df.df, 'sub_trend')"]

def test_scalability():
    times=[]
    for command in commands:
        for pearson_obj in objs:

            statement=command.format(pearson_obj)
            print(statement)
            t=timeit.repeat(statement, prereqs, repeat=10, number=100)
            times.append(t)
            print(t)
    timed = [ele for lis in times for ele in lis]
    return timed

def create_df(N, num_dep_indep, number_cluster, num_splitby):
    timings=test_scalability()
    function_names=['cct', 'get_subgroup_trends', 'get_trends (agg)', 'get_trends (sub)']
    result_df=pd.DataFrame(columns=['times', 'data', 'function'])
    # setting function column
    function=[]
    for ele in function_names:
        function.extend([ele]*20)
    # setting version column
    version=[['v1']*10, ['v2']*10]*len(function_names)
    version=list(chain.from_iterable(version))
    # setting trial column
    trial=list(range(10))*(len(function_names)*2)

    data_name='synthetic {}'.format(N)
            
    result_df['times']=timings
    result_df['function']=function
    result_df['data']=str(data_name)
    result_df['trial']=trial
    result_df['version']=version
    result_df['size']=N
    result_df['dep_indep_vars']=num_dep_indep
    result_df['number_cluster']=number_cluster
    result_df['num_splitby']=num_splitby
    
    return result_df


In [139]:
import mlsim
from mlsim import sp_plot

In [140]:
np.random.seed(20210627)


r_clusters =     [-.8, .5, .4, .7, -.6, .5, .4,-.9]  # magnitude correlation coefficient of clusters
cluster_spread = [ .3,-.2, .1,-.4, .2, -.1, .2,.4] # pearson correlation of means
p_sp_clusters =  [  1,  0,  0,  0,  1,   0,  0,  1] # portion of clusters with SP  #1 if r neg 0 if rpos
cluster_size =   [2,  3]#
domain_range = [0, 20, 0, 20] # of all data

In [141]:
n_list = [1000, 10000, 100000]
number_cluster_list=[2, 4, 8, 16, 32]
num_dep_indep_list = [4, 8, 16]
num_splitby_list = [4, 8, 16]

In [142]:
# generate data
for N in n_list:
    for number_cluster in number_cluster_list:
        for num_dep_indep in num_dep_indep_list:
            for num_splitby in num_splitby_list:
                k = [number_cluster]*40
                n_view = len(k)
                p_clusters = [[1/k_i]*k_i for k_i in k]
                many_sp_df = mlsim.geometric_indep_views_gmm_sp(n_view,r_clusters,cluster_size,cluster_spread,p_sp_clusters,
                            domain_range,k,N,p_clusters)
                # print(many_sp_df.shape)
                labeled_df = wg.LabeledDataFrame(many_sp_df)
                
                n_view = int(len(labeled_df.df.columns) / 3)

                # set dependent and independent for some xi, ignore for the rest
                dep_indep_list = np.random.choice(n_view*2, num_dep_indep)
                #print(dep_indep_list)
                roles = {'x'+str(i+1):['ignore'] if i not in dep_indep_list else ['independent','dependent'] 
                            for i in range(n_view*2)}

                # vars without 'x' in them are splitbys
                splitby_var_list = [cn for cn in labeled_df.df.columns if not('x' in cn)]
                # set splitby for some variable, ignore for the rest
                splitby_list = np.random.choice(splitby_var_list, num_splitby)
                roles.update( {c:['splitby'] if c in splitby_list else ['ignore'] for c in splitby_var_list})    

                count_list = []

                var_types = {'x'+str(i+1):'continuous' for i in range(n_view*2)}
                var_types.update( {c:'categorical' for c in splitby_var_list})
                weighting = {}

                labeled_df.set_counts(count_list)
                labeled_df.set_roles(roles)
                labeled_df.set_var_types(var_types)
                labeled_df.meta_df
                
                print('N:', N)
                print('Number of clusters:', number_cluster)
                print('Number of dep indep vars:', num_dep_indep)
                print('Number of sliptby vars:', num_splitby)
                
                result_df=create_df(N, num_dep_indep, number_cluster, num_splitby)
                final=pd.concat([final, result_df])
                del labeled_df



N: 1000
Number of clusters: 2
Number of dep indep vars: 4
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.566995200002566, 0.2165091999922879, 0.20319349999772385, 0.20875729998806491, 0.21480720001272857, 0.222394800017355, 0.22383730000001378, 0.19635539999580942, 0.2010669000155758, 0.20063189999200404]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.28550149998045526, 0.19937129999743775, 0.22220349998679012, 0.2146358999889344, 0.21811119999620132, 0.19825759998639114, 0.20364039999549277, 0.1981095999944955, 0.19491619998007081, 0.19959709999966435]
labeled_df.get_subgroup_trends_1lev([pearson])
[5.589007100003073, 1.4831459000124596, 1.4892809999873862, 1.7226495000068098, 1.9532680999836884, 1.9016441999992821, 1.88191690001986, 1.8364791999920271, 1.8476949000032619, 1.9009309000102803]
labeled_df.get_subgroup_trends_1lev([pearson_2])
[1.845817099994747, 1.7345376999874134, 1.6139312000013888, 1.5013978000206407, 1.

[0.7115555000200402, 0.7038545999967027, 0.739082399988547, 0.6905733000021428, 0.6715320000075735, 0.6712874999793712, 0.6533663999871351, 0.6618131000141148, 0.6709965000045486, 0.7074118000164162]
N: 1000
Number of clusters: 2
Number of dep indep vars: 8
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.35692330001620576, 0.3549387000093702, 0.3360240000183694, 0.3432322999869939, 0.3583436999761034, 0.350791599979857, 0.334201200021198, 0.35264629998710006, 0.33735390001675114, 0.4240302999969572]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.27568749999045394, 0.1924730000027921, 0.19754369999282062, 0.19420949998311698, 0.19803150001098402, 0.20955110000795685, 0.1930041000014171, 0.21114469997701235, 0.19487479998497292, 0.19862849998753518]
labeled_df.get_subgroup_trends_1lev([pearson])
[1.9303304000059143, 1.562308699998539, 1.5367285000102129, 1.6949454999994487, 1.8382840000267606, 1.8032339000201318, 1.8186147999

[0.7133442999911495, 0.8289999999979045, 0.7529656999977306, 0.9017011999967508, 0.7023737999843433, 0.6909029000089504, 0.6750555000035092, 0.6837040999962483, 0.6998066999949515, 0.7404862999974284]
N: 1000
Number of clusters: 2
Number of dep indep vars: 16
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.6667466999788303, 0.6488727999967523, 0.6481489000143483, 0.6611764999979641, 0.6527668999915477, 0.642320800019661, 0.651817799982382, 0.6467692000151146, 0.6458509000076447, 0.6440326999872923]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.19473389998893254, 0.2133466999803204, 0.20202589998370968, 0.20654849999118596, 0.23247450002236292, 0.20020679998560809, 0.1995965999958571, 0.20996520001790486, 0.3018271999899298, 0.34078849997604266]
labeled_df.get_subgroup_trends_1lev([pearson])
[5.0062637999944855, 1.7420108999940567, 1.9159725999925286, 2.2091592000215314, 2.226989599992521, 1.939357299997937, 1.650336599996

[1.5148661999846809, 1.5041642999858595, 1.4542944999993779, 0.9329136000014842, 0.8762860000133514, 0.8486619000032078, 0.8784282000269741, 0.9384213000012096, 0.9532270000199787, 0.9359468999900855]
N: 1000
Number of clusters: 4
Number of dep indep vars: 8
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.38968130000284873, 0.2570871999778319, 0.4103578999929596, 0.33622440000181086, 0.27955340000335127, 0.3009399000147823, 0.25134350001462735, 0.2544748999935109, 0.252232400001958, 0.19914550002431497]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.2674482000002172, 0.32366059999912977, 0.23893960000714287, 0.2777228999766521, 0.24365200000465848, 0.19740090001141652, 0.21158240002114326, 0.21804549999069422, 0.20248849998461083, 0.20962719997623935]
labeled_df.get_subgroup_trends_1lev([pearson])
[2.5581872000184376, 2.352010300004622, 2.3827385999902617, 2.3211939000175335, 2.4219183000095654, 2.078535299981013, 2.0027040

[0.651328799984185, 0.6468740999989677, 0.6408306000230368, 0.6336806000035722, 0.6569677000225056, 0.6395760000159498, 0.6400949000089895, 0.650557300017681, 0.6358697999967262, 0.6362853000173345]
N: 1000
Number of clusters: 4
Number of dep indep vars: 16
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.7974885000148788, 0.7715099999913946, 0.7708439999842085, 0.7622985999914818, 0.7775917999970261, 0.7701993999944534, 0.7607575999863911, 0.7845910000032745, 0.7759270999813452, 0.7705856999964453]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.1890590999973938, 0.19980060000671074, 0.18895559999509715, 0.19079839999903925, 0.19979159999638796, 0.18620039999950677, 0.19089850000455044, 0.18814389998442493, 0.19169619999593124, 0.19709079997846857]
labeled_df.get_subgroup_trends_1lev([pearson])
[3.669489300023997, 1.5161463000113145, 1.553043299994897, 1.522091899998486, 1.5334107000089716, 1.535364200011827, 1.5391531999921

[0.665406999993138, 0.6601899999950547, 0.6455891999939922, 0.6489860999863595, 0.6360014000092633, 0.6518027999845799, 0.6545404999924358, 0.6437207999988459, 0.6454651999811176, 0.6336003000033088]
N: 1000
Number of clusters: 8
Number of dep indep vars: 4
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.19351479999022558, 0.20014060000539757, 0.20435849999194033, 0.1903106999816373, 0.19369339998229407, 0.1892443000106141, 0.19393400000990368, 0.21876350001548417, 0.18813490000320598, 0.1896374000061769]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.19716460001654923, 0.2011097999929916, 0.19200030001229607, 0.21100910002132878, 0.18890899998950772, 0.20552449999377131, 0.22606070002075285, 0.19669370001065545, 0.20258169999578968, 0.18800090000149794]
labeled_df.get_subgroup_trends_1lev([pearson])
[2.1062664999917615, 1.499937000015052, 1.4984663000213914, 1.483583199995337, 1.6840392000158317, 1.7998211000231095, 1.800

[0.6531897000095341, 0.6302965999930166, 0.6245496999763418, 0.6401698999979999, 0.6959158000245225, 0.6692989000002854, 0.6739152999944054, 0.6416257000237238, 0.7165633999975398, 0.7241040000226349]
N: 1000
Number of clusters: 8
Number of dep indep vars: 16
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.3420419000030961, 0.32916199997998774, 0.34070890001021326, 0.3281749000016134, 0.33516750001581386, 0.3345654000004288, 0.33100040000863373, 0.3419908999931067, 0.3284840999986045, 0.34202700000605546]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.1876402999914717, 0.20241719999467023, 0.18750170001294464, 0.1990996999957133, 0.1948938999848906, 0.18811099999584258, 0.19764860000577755, 0.1878895999980159, 0.18877909998991527, 0.2002020999789238]
labeled_df.get_subgroup_trends_1lev([pearson])
[3.3353644000017084, 1.5156363999994937, 1.5170359000039753, 1.5286450000130571, 1.5251953999977559, 1.5214003000000957, 1.527434

[0.6666762999957427, 0.6734123000060208, 0.7017119000083767, 0.7092866000020877, 0.6897075999877416, 0.6938444999977946, 0.6972040999971796, 0.7058327000122517, 0.6798886000178754, 0.6967712999903597]
N: 1000
Number of clusters: 16
Number of dep indep vars: 4
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.24406629998702556, 0.21150640002451837, 0.2001170000003185, 0.2129277999920305, 0.23083339998265728, 0.23996289999922737, 0.22815660000196658, 0.2593232000072021, 0.2697017999889795, 0.2642262000008486]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.2602931000001263, 0.2544268000056036, 0.26061560001107864, 0.2546371999778785, 0.27404409999144264, 0.282094700000016, 0.24049210001248866, 0.26741760000004433, 0.24546060001011938, 0.2563908000010997]
labeled_df.get_subgroup_trends_1lev([pearson])
[2.5090026000107173, 1.8040372999967076, 1.5940524000034202, 1.4917033000092488, 1.5763307999877725, 1.5220340999949258, 1.4993558

[0.7463632999861147, 0.7038844000198878, 0.756319700012682, 0.7517253000114579, 0.7541627000027802, 0.5979252000106499, 0.5889364000177011, 0.5857956999971066, 0.685474099998828, 0.603000000002794]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[0.6511109000130091, 0.6431778999976814, 0.6387323999952059, 0.6375594999990426, 0.6300301000010222, 0.6489893000107259, 0.6509738999884576, 0.6310244000051171, 0.6535142000066116, 0.6310077999951318]
N: 1000
Number of clusters: 16
Number of dep indep vars: 8
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.3536846000060905, 0.3382020999852102, 0.3561164000129793, 0.347312800004147, 0.34669639999629, 0.3275275000196416, 0.34251979997497983, 0.3277578999986872, 0.3323736999882385, 0.33759640000062063]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.19064250000519678, 0.2020695999963209, 0.1909464000200387, 0.20247300001210533, 0.19990710000274703, 0.19228630000725389, 0.1891639000095

[1.1627444000041578, 1.1431111000129022, 1.1563931000127923, 1.1736256000003777, 1.1520135999890044, 1.139780299999984, 1.1439319000055548, 1.1440276999783237, 1.1423315999854822, 1.134802300017327]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[0.6509325999941211, 0.6825069999904372, 0.6496630000183359, 0.6366764999984298, 0.6484967999858782, 0.6397752999910153, 0.6332970000221394, 0.6361326000187546, 0.6312256999954116, 0.6223989999853075]
N: 1000
Number of clusters: 32
Number of dep indep vars: 4
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.8540398000041023, 0.8391689999843948, 0.8548394999816082, 0.8406285000091884, 0.8448600999836344, 0.8494475000188686, 0.9597270999802276, 0.9620231000008062, 0.8410667999996804, 0.8478371000092011]
pearson_2.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.18684270000085235, 0.20977549999952316, 0.1892048000008799, 0.19133600001805462, 0.22700039998744614, 0.25507059998926707, 0.23390200000

[0.6448509000183549, 0.6405813999881502, 0.6302607000106946, 0.6290434000256937, 0.632922899996629, 0.6399087000172585, 0.6272671999759041, 0.6370781000005081, 0.6398882000066806, 0.6198629999998957]
pearson.get_trends(labeled_df.df, 'sub_trend')
[0.5892068000102881, 0.5768234000133816, 0.6166899999952875, 0.5746505999995861, 0.5691974000073969, 0.5762281999923289, 0.5632113000028767, 0.5771436999784783, 0.5626474999880884, 0.5781457000121009]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[0.6365399999776855, 0.6485133999958634, 0.6344415000057779, 0.6189784999878611, 0.6322045000270009, 0.6291684999887366, 0.6320716999762226, 0.631947299989406, 0.6439373999892268, 0.6993613000086043]
N: 1000
Number of clusters: 32
Number of dep indep vars: 8
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.33941139999660663, 0.3276649999897927, 0.3393579999974463, 0.32457279998925515, 0.3229779000103008, 0.34284550001029857, 0.33521719998680055, 0.324407500

[0.6415266999974847, 0.642070000001695, 0.6132393000007141, 0.6292594999831636, 0.6270867999992333, 0.8100243999797385, 0.7232553999929223, 0.6313394000171684, 0.6165520000213291, 0.6328934000048321]
pearson.get_trends(labeled_df.df, 'sub_trend')
[1.1503679000015836, 1.1234433999925386, 1.2218493999971543, 1.1307572000077926, 1.1272904000070412, 1.1330226999998558, 1.1153178999957163, 1.1175704000052065, 1.1474369999777991, 1.1069917999848258]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[0.6396081000275444, 0.6279635999817401, 0.6212404000107199, 0.6282068000000436, 0.6246725000091828, 0.6134399000147823, 0.6271845000155736, 0.6212431000021752, 0.6257610000029672, 0.6250877999991644]
N: 1000
Number of clusters: 32
Number of dep indep vars: 16
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.8482833000016399, 0.8265117999981157, 0.8612463000172284, 0.8390681000018958, 0.8233917000179645, 0.8434134999988601, 0.8408205000159796, 0.8365467999

[1.3228368999843951, 1.30553039998631, 1.328228800004581, 1.3125763999996707, 1.340999300009571, 1.3189515999983996, 1.3234230000234675, 1.3041240999882575, 1.3134573000133969, 1.2948651000042446]
pearson.get_trends(labeled_df.df, 'sub_trend')
[0.5762697999889497, 0.570985300000757, 0.5668723999988288, 0.5720175000024028, 0.572842200024752, 0.5866939999978058, 0.5608507999859285, 0.5708664999983739, 0.5638463000068441, 0.5741666000103578]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.306348899990553, 1.313358999992488, 1.31022290000692, 1.32926519998, 1.308158200001344, 1.3317662999907043, 1.3898354999837466, 1.308812899980694, 1.3802344000141602, 1.491933200013591]
N: 10000
Number of clusters: 2
Number of dep indep vars: 8
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.3605260000040289, 0.3343702999991365, 0.33968089998234063, 0.34031879997928627, 0.33201569999800995, 0.3395844000042416, 0.3378777999896556, 0.3365912999724969, 0.334628

[1.414146900002379, 1.5042964999738615, 1.4117540000006557, 1.4067294000124093, 1.400679599988507, 1.419869400007883, 1.3919571999867912, 1.397316500020679, 1.399715000006836, 1.404849100013962]
pearson.get_trends(labeled_df.df, 'sub_trend')
[2.0120029999816325, 2.0052759000100195, 2.0190992999996524, 2.0118234000110533, 2.0064080000156537, 2.019333700009156, 2.034830399992643, 2.004594400001224, 1.9976730999769643, 2.064546900015557]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.491706200002227, 1.31213679999928, 1.350453799997922, 1.3277600000146776, 1.3293284999963362, 1.3137318000080995, 1.3248763000010513, 1.405637400021078, 1.344967799988808, 1.3160973999765702]
N: 10000
Number of clusters: 2
Number of dep indep vars: 16
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[1.7260189999942668, 1.69133430000511, 1.654899099987233, 1.6389059000066482, 1.6580938999832142, 1.8106324000109453, 1.6559289999713656, 1.6703330999880563, 1.642384000

[1.3575582999910694, 1.3429587999999058, 1.3305388999870047, 1.341216800006805, 1.3391891000210308, 1.319997999991756, 1.3313431000278797, 1.356211300007999, 1.3264829000108875, 1.3130628000071738]
pearson.get_trends(labeled_df.df, 'sub_trend')
[0.5222543000127189, 0.5086586000106763, 0.5171424000000115, 0.5054554000089411, 0.521434299997054, 0.5187598000047728, 0.5364539999864064, 0.5318743999814615, 0.6705498000083026, 0.6191015000222251]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.7046774000045843, 1.8386829000082798, 1.7925243999925442, 1.8229479000146966, 1.8221218999824487, 1.7466845999879297, 1.6512865999829955, 1.3302955999970436, 1.4332401000137907, 1.346252799994545]
N: 10000
Number of clusters: 4
Number of dep indep vars: 4
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.30803350001224317, 0.28444769998895936, 0.2842348999984097, 0.27761660001124255, 0.2860872999881394, 0.2794974999851547, 0.27526330002001487, 0.27221809999

[1.3168618000054266, 1.3020813000039198, 1.324315600009868, 1.299056899995776, 1.3052650000026915, 1.3052432999829762, 1.3167865999857895, 1.3030029000074137, 1.3173858000081964, 1.3034309000067879]
pearson.get_trends(labeled_df.df, 'sub_trend')
[0.9292935999983456, 0.9245419999933802, 0.9132343000092078, 0.9255538000143133, 0.9217816999880597, 0.9207762999867555, 0.9391191999893636, 0.9066442999755964, 0.9306422999943607, 0.9198374999978114]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.3231353999872226, 1.3058900000178255, 1.320154899993213, 1.3052619000081904, 1.3053989000036381, 1.3090866999991704, 1.3044104000146035, 1.3000599000079092, 1.2940316000021994, 1.2994354000256862]
N: 10000
Number of clusters: 4
Number of dep indep vars: 16
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.7025578999891877, 0.6742045999853872, 0.6654277000052389, 0.6679911999963224, 0.6695158999937121, 0.6772305000049528, 0.6653891000023577, 0.6723925999831

[1.424399200011976, 1.3300097000028472, 1.3259736999752931, 1.3213816999923438, 1.3321457999991253, 1.3216277999745216, 1.3305703999940306, 1.3130796999903396, 1.3126694000093266, 1.3404547999962233]
pearson.get_trends(labeled_df.df, 'sub_trend')
[0.5736184999987017, 0.5763452999817673, 0.5699979999917559, 0.5760387000045739, 0.5633719000034034, 0.5715644000156317, 0.5777384999964852, 0.5798351000121329, 0.574683200014988, 0.5727057000040077]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.3507511999923736, 1.350112700019963, 1.3221523000102025, 1.3177245999977458, 1.312922899989644, 1.322561200009659, 1.324177399976179, 1.3053727000078652, 1.3104204999981448, 1.3434812999912538]
N: 10000
Number of clusters: 8
Number of dep indep vars: 4
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.3599841999821365, 0.3368396000005305, 0.34085330000380054, 0.3384063999983482, 0.33512070000870153, 0.3369269999966491, 0.3358458000002429, 0.335938400006853

[1.3146906000038143, 1.3064058999880217, 1.3068552000040654, 1.3162351999781094, 1.3003679999965243, 1.30638789999648, 1.3031932000012603, 1.3087147999904118, 1.295293400005903, 1.438165300001856]
pearson.get_trends(labeled_df.df, 'sub_trend')
[0.9316436999943107, 0.9992957999929786, 0.919727000000421, 0.9225583999941591, 0.9245813000015914, 0.9032947999949101, 0.9063998000056017, 0.910895699984394, 0.9163718999770936, 0.9201227999874391]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.3188128000183497, 1.3026297999895178, 1.3107859999872744, 1.2897971000056714, 1.2990511999814771, 1.2962447000027169, 1.3255001000070479, 1.329909599997336, 1.312912299996242, 1.296993800002383]
N: 10000
Number of clusters: 8
Number of dep indep vars: 8
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.6950330999970902, 0.6820337000244763, 0.7131970999762416, 0.6727007999725174, 0.6892128000035882, 0.6814822999876924, 0.6735937999910675, 0.6758476000104565, 0

[1.3024240999948233, 1.2986416000057943, 1.2969479000021238, 1.320218599983491, 1.2998386999825016, 1.3097705000254791, 1.30082209999091, 1.3127076999808196, 1.2994368999789003, 1.2912953999766614]
pearson.get_trends(labeled_df.df, 'sub_trend')
[1.9239455999922939, 1.9160028000187594, 1.9315471999871079, 1.9939003000035882, 2.0834259999974165, 1.9277136999880895, 1.951785099983681, 2.0589059999911115, 1.9104558000108227, 1.9064631999935955]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.3136200000008103, 1.3141703000001144, 1.2998769000114407, 1.3311196000140626, 1.3065217999974266, 1.3170306000101846, 1.3054788000008557, 1.320509699988179, 1.3064305000007153, 1.2980391000164673]
N: 10000
Number of clusters: 16
Number of dep indep vars: 4
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[1.799398300005123, 1.6377275999984704, 1.7387388999923132, 1.622969699994428, 1.629960399994161, 1.623291199997766, 1.6363100000016857, 1.6335939999844413, 1

[1.3733584999863524, 1.3385772000183351, 1.3312374000088312, 1.3347571000049356, 1.320901099999901, 1.3764073999773245, 1.335700600000564, 1.3756953999982215, 1.521764200006146, 1.830733799986774]
pearson.get_trends(labeled_df.df, 'sub_trend')
[1.143925299984403, 1.2088793000148144, 1.100991699990118, 1.0534048000117764, 1.1155653999885544, 1.0863512000069022, 0.9525145000079647, 0.9460822000110056, 0.9433695999905467, 0.987905599991791]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.3553785999829415, 1.3163735000125598, 1.3518871999986004, 1.325375400017947, 1.3140806999872439, 1.3471565999789163, 1.3254080999759026, 1.3261428999830969, 1.3153527999820653, 1.313757900003111]
N: 10000
Number of clusters: 16
Number of dep indep vars: 8
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.6988589000247885, 0.6779760000063106, 0.7072836000006646, 0.680575799982762, 0.6740380999981426, 0.680274999991525, 0.6781241999997292, 0.6714059000078123, 0.6

[1.3146593999990728, 1.3054039999842644, 1.354952100024093, 1.3354634999996051, 1.3152285999967717, 1.3189297999779228, 1.5849900000030175, 1.3196154999895953, 1.3308877000235952, 1.3312237999925856]
pearson.get_trends(labeled_df.df, 'sub_trend')
[2.2426801999972668, 2.146463599987328, 2.141183200001251, 2.1481831000128295, 2.188000199996168, 2.213258000003407, 2.189034700015327, 2.138503700000001, 2.1460702000185847, 2.1675812999892514]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.3295561999839265, 1.3190227000159211, 1.3202359999995679, 1.3049887999950442, 1.2901933000248391, 1.3106835999933537, 1.3133573999803048, 1.3076618999766652, 1.3255052999884356, 1.3017106999759562]
N: 10000
Number of clusters: 16
Number of dep indep vars: 16
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[1.8790756999806035, 1.827365600009216, 1.851629899989348, 1.8420730000070762, 1.8376537000003736, 1.8328637000231538, 1.8401875000272412, 1.8418223000189755,

[1.310811300005298, 1.3028821000189055, 1.3078628000221215, 1.3141658999957144, 1.2953009000048041, 1.3081968999758828, 1.3297300999984145, 1.3138338999997359, 1.2977728999976534, 1.2995256999856792]
pearson.get_trends(labeled_df.df, 'sub_trend')
[0.5718619000108447, 0.5671382000145968, 0.5734434000041801, 0.5665975000010803, 0.5664866000006441, 0.5666766999929678, 0.5686209999839775, 0.5549401000025682, 0.5675628999888431, 0.5665933999989647]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.327457500010496, 1.307125499995891, 1.3039099000161514, 1.3088764999993145, 1.2970505999983288, 1.2966302999993786, 1.298858800000744, 1.3095912999997381, 1.301148500002455, 1.3029144000029191]
N: 10000
Number of clusters: 32
Number of dep indep vars: 8
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[0.5197833999991417, 0.44350890000350773, 0.3384336000017356, 0.3473346999962814, 0.3307959999947343, 0.3366717000026256, 0.3467663000046741, 0.33373749998281

[1.3390739999886137, 1.3082406999892555, 1.3230809999804478, 1.3257410999794956, 1.2864326999988407, 1.3050129000039306, 1.3032494000217412, 1.3755908999883104, 1.5261309999914374, 1.685622199991485]
pearson.get_trends(labeled_df.df, 'sub_trend')
[2.8234947999881115, 2.909090000001015, 2.9083562000014354, 2.463724299974274, 2.3872812000045087, 2.378595199988922, 2.3959654000063892, 2.411803900002269, 2.3758455000061076, 2.375836099992739]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[1.3351114999968559, 1.3032322999788448, 1.3038468000013381, 1.3204943000164349, 1.3221238999976777, 1.304718299972592, 1.3252637999830768, 1.308534099982353, 1.2999483000021428, 1.3128774000215344]
N: 10000
Number of clusters: 32
Number of dep indep vars: 16
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[2.3020057000103407, 2.080075299978489, 2.101128400012385, 2.0724363999906927, 2.100365899997996, 2.1101720000151545, 2.0787062000017613, 2.1158006000041496, 2.

[9.705696599994553, 9.615179900021758, 9.651845799991861, 9.545974399981787, 9.551180999987992, 9.540527500008466, 9.523737199982861, 9.568160299997544, 9.611551699985284, 10.966852700017625]
pearson.get_trends(labeled_df.df, 'sub_trend')
[2.2331913000089116, 2.041606299986597, 1.8797097000060603, 1.9321881999785546, 1.884581199992681, 1.8609927000070456, 1.8807096000236925, 1.8834814000001643, 1.877148200001102, 1.872253700013971]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[9.652665600006003, 9.821068899997044, 9.681280100019649, 9.653602600010345, 9.6169280000031, 9.631036899983883, 9.587903000006918, 9.59675860000425, 9.669816800014814, 10.359418700012611]
N: 100000
Number of clusters: 2
Number of dep indep vars: 4
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[1.8289237999997567, 1.6096450999903027, 1.6202913000015542, 1.6470860000117682, 1.6114022000110708, 1.6272549999994226, 1.6121203000075184, 1.6159124000114389, 1.63804780002101

[9.687988999998197, 9.63065629999619, 9.848360000003595, 9.885525100020459, 9.606044999993173, 9.630507800000487, 9.652531600004295, 9.632868799992139, 9.618083999986993, 9.630997400003253]
pearson.get_trends(labeled_df.df, 'sub_trend')
[4.155829499999527, 4.599186000006739, 4.755308899999363, 4.710988900013035, 4.290333199984161, 4.153304700012086, 4.1431928999954835, 4.176964400016004, 4.160909999976866, 4.146678699995391]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[9.674402400007239, 9.666293099988252, 9.655379400006495, 9.65874040001654, 9.740533900010632, 9.625164000026416, 9.886860600003274, 9.627999300020747, 10.177525000006426, 10.348236299993005]
N: 100000
Number of clusters: 2
Number of dep indep vars: 16
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[4.360942999977851, 4.209847499994794, 3.994912900001509, 3.928649100009352, 3.9425610000034794, 3.914842199999839, 4.127092600014294, 4.406993099983083, 4.414985399984289, 4.285858

[11.779276799992658, 11.731696000002557, 11.498851399985142, 11.510388800001238, 11.641771499998868, 14.226740400015842, 12.058628800004954, 11.43392350000795, 11.990399000002071, 11.469758399995044]
pearson.get_trends(labeled_df.df, 'sub_trend')
[2.613428799988469, 2.312427700002445, 2.2379016999911983, 2.0686879999993835, 2.065242999990005, 2.201423299993621, 2.0437191000091843, 2.668166999996174, 2.4761572999996133, 2.6987888000148814]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[11.989957799989497, 12.028659300005529, 12.002979400014738, 13.96540119999554, 12.52450410000165, 11.441790500015486, 11.515269600000465, 11.437701499991817, 11.394755699991947, 11.431067099998472]
N: 100000
Number of clusters: 4
Number of dep indep vars: 4
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[1.9789092999999411, 1.732053099985933, 1.684647500020219, 1.7067900999973062, 1.7249431000091136, 1.7149070000159554, 1.7148890999960713, 1.72879270001431, 1.69

[9.564297299977625, 9.585250099975383, 9.59977109997999, 9.628072799998336, 9.620064400020055, 10.428806400013855, 9.710564900015015, 9.603164600004675, 9.575764200009871, 9.712278999999398]
pearson.get_trends(labeled_df.df, 'sub_trend')
[5.42140780002228, 5.35073539998848, 5.346823799976846, 5.338321400020504, 5.384567400004016, 5.335889100009808, 5.3977743000141345, 5.346842500002822, 5.324860199994873, 5.349865299998783]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[9.574471400002949, 9.733400600001914, 10.531559700000798, 9.614212899992708, 9.620059000008041, 9.564568100002361, 9.619832299998961, 9.567686800000956, 9.577921699994477, 9.583995299995877]
N: 100000
Number of clusters: 4
Number of dep indep vars: 8
Number of sliptby vars: 16
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[5.224461600009818, 5.044980399979977, 5.069145700021181, 5.07015539999702, 5.044662099971902, 5.207271799998125, 5.177650400000857, 5.08124569998472, 5.062161999987438, 5.0332085000

[10.52814110001782, 9.699455099995248, 9.697664800012717, 10.559943400003249, 9.671676900004968, 9.59120090000215, 9.695162599993637, 9.685841899976367, 9.67632080000476, 9.664717599982396]
pearson.get_trends(labeled_df.df, 'sub_trend')
[24.629340000014054, 24.57563459998346, 26.074259900022298, 24.98183569998946, 24.427290299994638, 24.353927000018302, 24.67018700001063, 24.44077709998237, 24.366365400026552, 24.345550700003514]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[9.59572169999592, 9.574371900002006, 9.88236349998624, 9.595545399992261, 10.079664299992146, 10.137719999998808, 9.578136700001778, 9.582389000017429, 9.5749555999937, 9.58442970001488]
N: 100000
Number of clusters: 8
Number of dep indep vars: 4
Number of sliptby vars: 4
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[24.70062859999598, 24.03981109999586, 24.04344109998783, 24.325327900005504, 25.52416010000161, 24.25704759999644, 23.988482800021302, 24.091346700006397, 24.494535999983782, 24.22

[9.601970500021707, 9.601805899990723, 9.611677400011104, 9.605099999986123, 9.592429099982837, 9.611773999989964, 9.687349800020456, 9.63271689999965, 9.950556100026006, 10.25679090002086]
pearson.get_trends(labeled_df.df, 'sub_trend')
[5.471709700010251, 5.3797125999990385, 5.395829199987929, 5.405977799993707, 5.365530200011563, 5.398664899985306, 5.3869128999940585, 5.4098117999965325, 5.387274399981834, 5.38002250000136]
pearson_2.get_trends(labeled_df.df, 'sub_trend')
[9.658558800001629, 9.79589589999523, 9.630379499983974, 9.976401199994143, 9.924187700002221, 10.33908390000579, 10.067475699994247, 9.668588800006546, 9.741643199988175, 9.693758400011575]
N: 100000
Number of clusters: 8
Number of dep indep vars: 8
Number of sliptby vars: 8
pearson.compute_correlation_table(labeled_df.df, 'agg_trend')
[5.345002299989574, 5.1485966999898665, 5.174809800024377, 5.134234800003469, 5.14334839998628, 5.168255999975372, 5.169867199991131, 5.137314299994614, 5.147459400002845, 5.15653119

KeyboardInterrupt: 

In [143]:
final

Unnamed: 0,times,data,function,trial,version,size,dep_indep_vars,number_cluster,num_splitby
0,0.566995,synthetic 1000,cct,0,v1,1000,4,2,4
1,0.216509,synthetic 1000,cct,1,v1,1000,4,2,4
2,0.203193,synthetic 1000,cct,2,v1,1000,4,2,4
3,0.208757,synthetic 1000,cct,3,v1,1000,4,2,4
4,0.214807,synthetic 1000,cct,4,v1,1000,4,2,4
...,...,...,...,...,...,...,...,...,...
75,10.204769,synthetic 100000,get_trends (sub),5,v2,100000,8,8,8
76,9.585520,synthetic 100000,get_trends (sub),6,v2,100000,8,8,8
77,9.562629,synthetic 100000,get_trends (sub),7,v2,100000,8,8,8
78,9.530497,synthetic 100000,get_trends (sub),8,v2,100000,8,8,8


In [147]:
final.to_csv('wiggum/data')

In [134]:
print('hello world')

hello world
