In [None]:
from numpy import linalg as LA
import numpy as np
import pandas as pd
import math
import networkx as nx
import community

def compute_C_minus_C0(lambdas,v,lambda_plus,removeMarketMode=True):
    N=len(lambdas)
    C_clean=np.zeros((N, N))
    
    order = np.argsort(lambdas)
    lambdas,v = lambdas[order],v[:,order]
    
    v_m=np.matrix(v)

    # note that the eivenvalues are sorted
    for i in range(1*removeMarketMode,N):                            
        if lambdas[i]>lambda_plus: 
            C_clean=C_clean+lambdas[i] * np.dot(v_m[:,i],v_m[:,i].T)  
    return C_clean    
    
    
def LouvainCorrelationClustering(R):   # R is a matrix of return
    N=R.shape[1]
    T=R.shape[0]

    q=N*1./T
    lambda_plus=(1.+np.sqrt(q))**2

    C=R.corr()
    lambdas, v = LA.eigh(C)


            
    C_s=compute_C_minus_C0(lambdas,v,lambda_plus)
    
    mygraph= nx.from_numpy_array(np.abs(C_s))
    partition = community.community_louvain.best_partition(mygraph)

    DF=pd.DataFrame.from_dict(partition,orient="index")
    return DF, mygraph, partition 


In [None]:
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import yfinance as yf

all_data = pd.DataFrame()
for ticker in tickers:
    try:
        data = yf.download(ticker, start="2000-01-01", end="2023-01-01",progress=False)
        all_data[ticker] = data['Close']
    except Exception as e:
        print(f"Failed to retrieve data for {ticker}. Error: {e}")

print(all_data.head())

In [None]:
data_subset=all_data.dropna(axis=1)

r=np.log(data_subset).diff()
T=int(np.floor(r.shape[1]/3))
ret=[1]
for t in range(T+1,r.shape[0]):
    R = r.iloc[t-T:t]
    df, G, partition = LouvainCorrelationClustering(R.T)
    pre_state=df.iloc[T-1][0]
    I = df[df[0]==pre_state].index.tolist()
    my_list = [x+1 for x in I[:-1]]
    ar=R.iloc[my_list].mean(axis=0)
    pos = np.sign(ar.values)
    ret.append(np.dot(pos, np.exp(r.iloc[t].values)-1)/len(pos)+1)

df_chopped = r.iloc[T:]
df_chopped['rolling_ret']=ret
df_chopped['cumulative_perf']=df_chopped['rolling_ret'].cumprod()
df_chopped['cumulative_perf'].plot()
plt.xlabel('time')
plt.ylabel('USD')
plt.title('Cumulative_return')