# Affinity Propagation Model

__Affinity Propagation__ is a clustering algorithm that doesn't require us to specify the number of clusters beforehand. It finds out representatives of clusters, called exemplars, using a technique called message passing. We start by specifying the measures of similarity that we want it to consider. It simultaneously considers all training data points as potential exemplars. It then passes messages between the data points until it finds a set of exemplars.

In [10]:
import datetime
import json

import numpy as np
import matplotlib.pyplot as plt
from sklearn import covariance, cluster
from matplotlib.finance import quotes_historical_yahoo_ochl as quotes_yahoo
# from mpl_finance import quotes_historical_yahoo_ochl as quotes_yahoo

# Input file containing company symbols
input_file = 'company_symbol_mapping.json'

# Load the company symbol map
with open(input_file, 'r') as f:
    company_symbols_map = json.loads(f.read())

symbols, names = np.array(list(company_symbols_map.items())).T

# Load the historical stock quotes
start_date = datetime.datetime(2003, 7, 3)
end_date = datetime.datetime(2007, 5, 4)
quotes = [quotes_yahoo(symbol, start_date, end_date, asobject=True) for symbol in symbols]

# Extract opening and closing quotes
opening_quotes = np.array([quote.open for quote in quotes]).astype(np.float)
closing_quotes = np.array([quote.close for quote in quotes]).astype(np.float)

# Compute difference between opening and closing quotes
quotes_diff = closing_quotes - opening_quotes

# Normalize the data
X = quotes_diff.copy().T
X /= X.std(axis=0)

# Create a graph model
edge_model = covariance.GraphLassoCV()

# Train the model
with np.errstate(invalid='ignore'):
    edge_model.fit(X)
    
    # Build clustering model using Affinity Propagation Model
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    num_labels = labels.max()
    
    # Print the results of clustering
    for i in range(num_labels + 1):
        print('Cluster {} ==> {}'.format(i + 1, names[labels == i]))


URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>