In [1]:
# Import system requirements
import sys, os

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import time

# Use NetworkX graphic package to create a signed network
import networkx as nx

In [12]:
# Import data

dataset = 'sp100' # values: 'asset_class','ftse','sectors','sp100'

os.chdir("../data_modified")

corr_tensor = np.load('%s_corr.npy' % (dataset)) #list of correlation matrices for each date
dates = np.load('%s_dates.npy' % (dataset)) #list of timestamps
nodes = np.load('%s_nodes.npy' % (dataset)) #list of tickers

num_examples = corr_tensor.shape[0] #number of dates
dim = corr_tensor.shape[1] #number of assets


In [3]:
# Helper Functions

def make_graph(corr_mat, node_labels, graph_type):

    G = nx.Graph()
    G.add_nodes_from(node_labels)
    dim = corr_mat.shape[0]

    if not dim == len(node_labels):
        raise ValueError('number node labels not = corr matrix dimensions')

    if graph_type=='signed':
        for i in range(dim):
            for j in range(i+1, dim):
                if corr_mat[i,j] < 0:
                    G.add_edge(node_labels[i], node_labels[j], sign=-1)
                elif corr_mat[i,j] > 0:
                    G.add_edge(node_labels[i], node_labels[j], sign=1)
    
    if graph_type=='corr':
        for i in range(dim):
            for j in range(i+1, dim):
                if corr_mat[i,j] != 0.000:
                    G.add_edge(node_labels[i], node_labels[j])
    
    if graph_type=='uncorr':
        for i in range(dim):
            for j in range(i+1, dim):
                if corr_mat[i,j] == 0.000:
                    G.add_edge(node_labels[i], node_labels[j])
    
    density = (2*G.number_of_edges())/(G.number_of_nodes()*(G.number_of_nodes() - 1))
                
    return G, density

def get_max_deg(G):
    degree_sequence = sorted([d for n, d in G.degree()], reverse=True)

    return max(degree_sequence)

def clique_size(clique):
    return len(clique)

In [4]:
print("num examples: %d, matrix dim: %d" % (num_examples, dim))

num examples: 42, matrix dim: 90


In [5]:
corr_mat = corr_tensor[int(num_examples/2), :, :].copy() #take the correlation matrix for a specific date (for visualization)
corr_mat[(corr_mat > -1*0.9) & (corr_mat < 0.9)] = 0 #arbitrary threshold, for visualization purposes
G, density = make_graph(corr_mat, nodes, 'corr')

In [7]:
# Run classical max clique algm 
from networkx.algorithms.approximation.clique import max_clique as class_max_clique

clique_array = []
clique_size_array = []
date_array = []
density_array = []
threshold_array = []
time_array = []

count = 0 #count to keep track of progress when running
for i in np.arange(0.1, 1, 0.1):
    for j in range(1, int(num_examples/5)): #division by 5 is just to speed up this for-loop, not necessary to have every month
        
        corr_mat = corr_tensor[j*5, :, :].copy()
        corr_mat[(corr_mat > -1*i) & (corr_mat < i)] = 0
        
        G, density = make_graph(corr_mat, nodes, 'corr')
        
        count += 1
        if count % 10 == 0: print("count: %d" % (count))
        
        try:
            t = time.clock()
            max_clique = class_max_clique(G)
            elapsed = time.clock() - t
        except Exception as err:
            print(err)
            print("Error on matrix %d with threshold %f" % (j*5, i))
        else:
            print(j)
            clique_array.append(max_clique)
            clique_size_array.append(len(max_clique))
            time_array.append(elapsed)
            density_array.append(density)
            date_array.append(dates[j*5])
            threshold_array.append(i)



count: 10
count: 20
count: 30
count: 40
count: 50
count: 60


In [8]:
assert len(clique_array) == len(date_array) == len(density_array) == len(threshold_array) == len(clique_size_array) == len(time_array)

In [10]:
os.chdir("../result_files")

# Create Pandas DataFrame for class results
pd.DataFrame(data={"date": date_array, "threshold": threshold_array, "density": density_array,
                "max_clique": clique_array, "clique_size": clique_size_array, 
                "computation_time": time_array}).to_csv("maxclique_class_%s_res.csv" % (dataset))