In [1]:
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from sklearn import cluster, covariance, manifold
from sklearn.cluster import DBSCAN, AffinityPropagation

In [2]:
symbols = \
[
    'AUDCAD1',
    'AUDCHF1',
    'AUDJPY1',
    'AUDNZD1',
    'AUDUSD1',
    'CADCHF1',
    'CADJPY1',
    'CHFJPY1',
    'EURAUD1',
    'EURCAD1',
    'EURCHF1',
    'EURDKK1',
    'EURGBP1',
    'EURHKD1',
    'EURJPY1',
    'EURNOK1',
    'EURNZD1',
    'EURPLN1',
    'EURSEK1',
    'EURTRY1',
    'EURUSD1',
    'GBPAUD1',
    'GBPCAD1',
    'GBPCHF1',
    'GBPJPY1',
    'GBPNZD1',
    'GBPSGD1',
    'GBPUSD1',
    'HKDJPY1',
    'NOKJPY1',
    'NOKSEK1',
    'NZDCAD1',
    'NZDCHF1',
    'NZDJPY1',
    'NZDSGD1',
    'NZDUSD1',
    'SGDJPY1',
    'USDCAD1',
    'USDCHF1',
    'USDCNH1',
    'USDDKK1',
    'USDHKD1',
    'USDJPY1',
    'USDMXN1',
    'USDNOK1',
    'USDPLN1',
    'USDRUB1',
    'USDSEK1',
    'USDSGD1',
    'USDTRY1'
]

names = np.array(symbols).T

quotes = []
for symbol in symbols:
    s = pd.read_csv('Data/'+symbol+'.csv', names=['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume'])
    quotes.append(s)

def calc_r(data):
    r = []
    import math
    for i in range(len(data)-1):
        v = math.log(math.fabs(data[i+1]/data[i]))
        r.append(v)
        
    return r

open = np.array([q.Open[0:2047] for q in quotes]).astype(np.float)
close = np.array([q.Close[0:2047] for q in quotes]).astype(np.float)

variation = close - open

#variation = np.array([calc_r(q.Close[0:1024]) for q in quotes]).astype(np.float)

#%matplotlib qt
#plt.plot(variation[1])


In [11]:
edge_model = covariance.GraphLassoCV()
#edge_model = covariance.GraphLasso()

X = variation.copy().T
X /= X.std(axis=0)

#print(X)

edge_model.fit(X)

#_, labels = cluster.affinity_propagation(edge_model.covariance_)
_, labels = cluster.mean_shift(edge_model.covariance_)


#labels = AffinityPropagation().fit_predict(edge_model.covariance_)
print(labels)
#print(edge_model.covariance_)

#labels = DBSCAN().fit_predict(edge_model.covariance_)
#print(labels)

n_labels = labels.max()

for i in range(n_labels + 1):
    print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0]
Cluster 1: AUDCAD1, AUDCHF1, AUDJPY1, AUDNZD1, AUDUSD1, CADCHF1, CADJPY1, CHFJPY1, EURAUD1, EURCAD1, EURCHF1, EURDKK1, EURGBP1, EURHKD1, EURJPY1, EURNOK1, EURNZD1, EURPLN1, EURSEK1, EURTRY1, EURUSD1, GBPAUD1, GBPCAD1, GBPCHF1, GBPJPY1, GBPNZD1, GBPSGD1, GBPUSD1, HKDJPY1, NOKJPY1, NOKSEK1, NZDCAD1, NZDCHF1, NZDJPY1, NZDSGD1, NZDUSD1, SGDJPY1, USDCAD1, USDCHF1, USDCNH1, USDDKK1, USDHKD1, USDJPY1, USDMXN1, USDNOK1, USDPLN1, USDRUB1, USDSEK1, USDSGD1, USDTRY1


In [4]:
# We use a dense eigen_solver to achieve reproducibility (arpack is
# initiated with random vectors that we don't control). In addition, we
# use a large number of neighbors to capture the large-scale structure.
node_position_model = manifold.LocallyLinearEmbedding(
    n_components=2, eigen_solver='dense', n_neighbors=6)

embedding = node_position_model.fit_transform(X.T).T

In [5]:
%matplotlib qt
plt.figure(1, facecolor='w', figsize=(10, 8))
plt.clf()
ax = plt.axes([0., 0., 1., 1.])
plt.axis('off')

# Display a graph of the partial correlations
partial_correlations = edge_model.precision_.copy()
print(partial_correlations)

d = 1 / np.sqrt(np.diag(partial_correlations))

partial_correlations *= d
partial_correlations *= d[:, np.newaxis]
non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

# Plot the nodes using the coordinates of our embedding
plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
            cmap=plt.cm.spectral)

# Plot the edges
start_idx, end_idx = np.where(non_zero)
#a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[embedding[:, start], embedding[:, stop]]
            for start, stop in zip(start_idx, end_idx)]
values = np.abs(partial_correlations[non_zero])

#lc = LineCollection(segments,
#                    zorder=0, cmap=plt.cm.hot_r,
#                    norm=plt.Normalize(0, .7 * values.max()))
#lc.set_array(values)
#lc.set_linewidths(15 * values)
#ax.add_collection(lc)

# Add a label to each node. The challenge here is that we want to
# position the labels to avoid overlap with other labels
for index, (name, label, (x, y)) in enumerate(
        zip(names, labels, embedding.T)):

    dx = x - embedding[0]
    dx[index] = 1
    dy = y - embedding[1]
    dy[index] = 1
    this_dx = dx[np.argmin(np.abs(dy))]
    this_dy = dy[np.argmin(np.abs(dx))]
    if this_dx > 0:
        horizontalalignment = 'left'
        x = x + .002
    else:
        horizontalalignment = 'right'
        x = x - .002
    if this_dy > 0:
        verticalalignment = 'bottom'
        y = y + .002
    else:
        verticalalignment = 'top'
        y = y - .002
    plt.text(x, y, name, size=10,
             horizontalalignment=horizontalalignment,
             verticalalignment=verticalalignment,
             bbox=dict(facecolor='w',
                       edgecolor=plt.cm.spectral(label / float(n_labels)),
                       alpha=.6))

plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
         embedding[0].max() + .10 * embedding[0].ptp(),)
plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
         embedding[1].max() + .03 * embedding[1].ptp())

plt.show()

[[ 1.         0.         0.        ..., -0.         0.        -0.       ]
 [ 0.         1.0000183 -0.        ...,  0.        -0.         0.       ]
 [ 0.        -0.         1.        ...,  0.         0.        -0.       ]
 ..., 
 [-0.         0.         0.        ...,  1.        -0.         0.       ]
 [ 0.        -0.         0.        ..., -0.         1.         0.       ]
 [-0.         0.        -0.        ...,  0.         0.         1.       ]]
