In [25]:
import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import time

### Use NetworkX graphic package to create a signed network
import networkx as nx

In [26]:
dataset = 'sp100' # values: 'asset_class','ftse','sectors','sp100'

os.chdir("../data_modified")

corr_tensor = np.load('%s_corr.npy' % (dataset))
dates = np.load('%s_dates.npy' % (dataset))
nodes = np.load('%s_nodes.npy' % (dataset))

num_examples = corr_tensor.shape[0] #number of dates
dim = corr_tensor.shape[1] #number of assets

In [27]:
# Helper Functions

def make_graph(corr_mat, node_labels, graph_type):

    G = nx.Graph()
    G.add_nodes_from(node_labels)
    dim = corr_mat.shape[0]

    if not dim == len(node_labels):
        raise ValueError('number node labels not = corr matrix dimensions')

    if graph_type=='signed':
        for i in range(dim):
            for j in range(i+1, dim):
                if corr_mat[i,j] < 0:
                    G.add_edge(node_labels[i], node_labels[j], sign=-1)
                elif corr_mat[i,j] > 0:
                    G.add_edge(node_labels[i], node_labels[j], sign=1)
    
    if graph_type=='corr':
        for i in range(dim):
            for j in range(i+1, dim):
                if corr_mat[i,j] != 0.0000:
                    G.add_edge(node_labels[i], node_labels[j])
    
    if graph_type=='uncorr':
        for i in range(dim):
            for j in range(i+1, dim):
                if corr_mat[i,j] == 0.000:
                    G.add_edge(node_labels[i], node_labels[j])
    
    density = (2*G.number_of_edges())/(G.number_of_nodes()*(G.number_of_nodes() - 1))
                
    return G, density

In [28]:
print("num examples: %d, matrix dim: %d" % (num_examples, dim))

num examples: 42, matrix dim: 90


In [29]:
corr_mat = corr_tensor[int(num_examples/2), :, :].copy()
        
corr_mat[(corr_mat > -1*0.2) & (corr_mat < 0.2)] = 0
G, density = make_graph(corr_mat, nodes, 'corr')

In [21]:
# Run classical max clique algm
from networkx.algorithms.approximation.independent_set import maximum_independent_set as mis

indset_array = []
set_size_array = []
date_array = []
density_array = []
time_array = []
threshold_array = []


count = 0
for i in np.arange(0.1, 1, 0.1):
    for j in range(1, int(num_examples/5)):
        
        corr_mat = corr_tensor[j*5, :, :].copy()
        corr_mat[(corr_mat > -1*i) & (corr_mat < i)] = 0
        
        G, density = make_graph(corr_mat, nodes, 'corr')
        
        count += 1
        if count % 10 == 0: print("count: %d" % (count))
        
        try:
            t = time.clock()
            max_ind_set = mis(G)
            run_time = time.clock() - t
            set_size = len(max_ind_set)
        
        except Exception as err:
            print(err)
            print("Error on matrix %d with threshold %f" % (j*5, i))
            
        else:
            indset_array.append(max_ind_set)
            set_size_array.append(set_size)
            density_array.append(density)
            time_array.append(run_time)
            date_array.append(dates[j*5])
            threshold_array.append(i)

count: 10
count: 20
count: 30
count: 40
count: 50
count: 60


In [22]:
assert len(indset_array) == len(date_array) == len(density_array) == len(set_size_array) == len(time_array)

In [24]:
os.chdir("../result_files")

# Create Pandas DataFrame for class results
pd.DataFrame(data={"date": date_array, "threshold": threshold_array, "density": density_array,
                "max_ind_set": indset_array, "set_size": set_size_array,
                "computation_time": time_array}).to_csv("indset_class_%s_res.csv" % (dataset))