In [1]:
from numpy import linalg as LA
import numpy as np
import pandas as pd
import math
import networkx as nx
import community


def compute_C_minus_C0(lambdas,v,lambda_plus):
    N=len(lambdas)
    C_clean=np.zeros((N, N))
    
    v_m=np.matrix(v)
    # _s stands for _structure below
    for i in range(N-1):
        if lambdas[i]>lambda_plus:
            C_clean=C_clean+lambdas[i] * np.dot(v_m[i,].T,v_m[i,]) 
    return C_clean    
    
    
def LouvainCorrelationClustering(R):   # R is a matrix of return
    N=R.shape[1]
    T=R.shape[0]

    q=N*1./T
    lambda_plus=(1.+np.sqrt(q))**2

    C=R.corr()
    lambdas, v = LA.eigh(C)
    
    C_s=compute_C_minus_C0(lambdas,v,lambda_plus)
    C_s=np.abs(C_s)
    
    mygraph= nx.from_numpy_matrix(np.abs(C_s))
    partition = community.community_louvain.best_partition(mygraph)

    DF=pd.DataFrame.from_dict(partition,orient="index")
    return(DF)




In [2]:
from rpy2.robjects.packages import importr
utils = importr('utils')
utils.install_packages("data.table")
 


rpy2.rinterface.NULL

In [3]:
import rpy2.robjects as robjects
r = robjects.r

#importr("data.table")
newfunc=r.source("libClusteringGiadaMarsiliFast.R")
aggregateClusters=robjects.globalenv["aggregateClusters"]


In [4]:
import numpy as np

import feather   # pip install feather-format

DF=feather.read_dataframe("data/clean/us_equities_logreturns.feather")

DF_cut=DF.iloc[12000:14000]

In [5]:
sel=DF_cut.isnull().sum(axis=0)>0
DF_cut=DF_cut.drop(columns=DF_cut.columns[sel])

In [6]:
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()

C=DF_cut.corr()
myres=aggregateClusters(C.values)

In [7]:
myres


0,1,2,3,4,5,6,7,8
Lc,ListVector with 49 elements.  1  FloatVector with 1 elements.  inf  3  FloatVector with 1 elements.  inf  10  FloatVector with 1 elements.  inf  ...  ...  260  FloatVector with 1 elements.  inf  264  FloatVector with 1 elements.  0.000000  265  FloatVector with 1 elements.  0.000000,,,,,,,
1,FloatVector with 1 elements.  inf,,,,,,,
inf,,,,,,,,
3,FloatVector with 1 elements.  inf,,,,,,,
inf,,,,,,,,
10,FloatVector with 1 elements.  inf,,,,,,,
inf,,,,,,,,
...,...,,,,,,,
260,FloatVector with 1 elements.  inf,,,,,,,
inf,,,,,,,,

0,1
1,FloatVector with 1 elements.  inf
inf,
3,FloatVector with 1 elements.  inf
inf,
10,FloatVector with 1 elements.  inf
inf,
...,...
260,FloatVector with 1 elements.  inf
inf,
264,FloatVector with 1 elements.  0.000000

0
inf

0
inf

0
inf

0
inf

0
0.0

0
0.0

0,1
'260','272'

0,1
1,IntVector with 1 elements.  1
1,
2,IntVector with 1 elements.  1
1,
3,IntVector with 1 elements.  3
3,
...,...
284,IntVector with 1 elements.  73
73,
285,IntVector with 1 elements.  3

0
1

0
1

0
3

0
73

0
3

0
1

0,1,2,3,4,5,6,7,8
1,IntVector with 71 elements.  1  2  27  61  ...  175  286  163  85,,,,,,,
1,2,27.0,61.0,...,175.0,286.0,163.0,85.0
3,IntVector with 87 elements.  3  8  51  99  ...  156  278  29  42,,,,,,,
3,8,51.0,99.0,...,156.0,278.0,29.0,42.0
10,IntVector with 19 elements.  10  96  12  50  ...  104  90  78  49,,,,,,,
10,96,12.0,50.0,...,104.0,90.0,78.0,49.0
...,...,,,,,,,
260,IntVector with 3 elements.  260  275  272,,,,,,,
260,275,272.0,,,,,,
264,IntVector with 1 elements.  264,,,,,,,

0,1,2,3,4,5,6,7,8
1,2,27,61,...,175,286,163,85

0,1,2,3,4,5,6,7,8
3,8,51,99,...,156,278,29,42

0,1,2,3,4,5,6,7,8
10,96,12,50,...,104,90,78,49

0,1,2
260,275,272

0
264

0
265

0,1
1,FloatVector with 1 elements.  0.000000
0.000000,
3,FloatVector with 1 elements.  0.000000
0.000000,
10,FloatVector with 1 elements.  0.000000
0.000000,
...,...
260,FloatVector with 1 elements.  0.000000
0.000000,
264,FloatVector with 1 elements.  1.000000

0
0.0

0
0.0

0
0.0

0
0.0

0
1.0

0
1.0

0,1
1,FloatVector with 1 elements.  71.000000
71.000000,
3,FloatVector with 1 elements.  87.000000
87.000000,
10,FloatVector with 1 elements.  19.000000
19.000000,
...,...
260,FloatVector with 1 elements.  3.000000
3.000000,
264,FloatVector with 1 elements.  1.000000

0
71.0

0
87.0

0
19.0

0
3.0

0
1.0

0
1.0


In [10]:
myres[3]   # this is a dictionary (List in the R language) which contains the membership of each cluster, on cluster at a time

## there are 49 clusters (with the chosen parameters), which is way more than Louvain yields when applied onces. 

np.array(myres[3][0]) # first cluster

array([  1,   2,  27,  61, 242,  32,  87, 117,  70,  72, 172, 112, 127,
       132, 217, 207,   6,   7,   9,  55,  18,  17,  20,  80, 257, 151,
        75,  58,  57, 116,  59,  22, 142, 174,  31,  44, 159, 162, 200,
       196,  37,  68, 118, 101, 125, 157, 194,  81, 191, 184, 188,  56,
       183, 212, 124, 136,  93,  48, 100,  91, 119, 141, 206, 248, 103,
        95,  43, 175, 286, 163,  85], dtype=int32)