In [117]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.cluster import SpectralClustering
from pprint import pprint
import copy

In [109]:
Kmm = pd.read_csv("Kmm.csv")
STATES = [
    'Alaska', 'Alabama', 'Arkansas', 'Arizona', 
    'California', 'Colorado', 'Connecticut', 'District of Columbia', 
    'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Iowa', 'Idaho', 
    'Illinois', 'Indiana', 'Kansas'
]
N = 17
P = 15

In [110]:
def sub_matrix(k, states):
    states_indices = [STATES.index(s) for s in states]
    
    l = []
    for i in states_indices:
        l += list(range(i*P, (i+1)*P))

    return k[l,:][:,l]

In [164]:
def get_clusters(clustering, keys):
    clusters = []

    labels = clustering.labels_
    
    for i, k in enumerate(keys):
        clusters.append((k, Counter(labels[i*P:(i+1)*P]).most_common(1)[0][0]))
    
    return dict(clusters)

In [171]:
def cluster(prev_cluster):
    if len(prev_cluster) <= 1:
        return
    
    Kmm_sub = sub_matrix(Kmm.values, prev_cluster.keys())
    clustering = SpectralClustering(
        n_clusters=2, affinity='nearest_neighbors', assign_labels='kmeans', n_neighbors=P, random_state=123
    ).fit(Kmm_sub)

    clusters = get_clusters(clustering, list(prev_cluster.keys()))
    pprint(clusters)
#     print(set(clustering.labels_), set(clusters.values()))
    
    if len(set(clusters.values())) == 1:
        return
    
    for c in set(clusters.values()):
        sub_cluster = {k: v for k, v in clusters.items() if v == c}
        cluster(sub_cluster)

In [172]:
cluster({s: 0 for s in STATES})



{'Alabama': 0,
 'Alaska': 0,
 'Arizona': 0,
 'Arkansas': 0,
 'California': 0,
 'Colorado': 0,
 'Connecticut': 0,
 'Delaware': 0,
 'District of Columbia': 0,
 'Florida': 1,
 'Georgia': 1,
 'Hawaii': 1,
 'Idaho': 1,
 'Illinois': 1,
 'Indiana': 1,
 'Iowa': 1,
 'Kansas': 1}
{'Alabama': 0,
 'Alaska': 0,
 'Arizona': 0,
 'Arkansas': 0,
 'California': 1,
 'Colorado': 1,
 'Connecticut': 1,
 'Delaware': 1,
 'District of Columbia': 1}
{'Alabama': 1, 'Alaska': 1, 'Arizona': 0, 'Arkansas': 0}
{'Arizona': 0, 'Arkansas': 0}
{'Alabama': 0, 'Alaska': 1}
{'California': 0,
 'Colorado': 0,
 'Connecticut': 0,
 'Delaware': 0,
 'District of Columbia': 0}
{'Florida': 0,
 'Georgia': 0,
 'Hawaii': 0,
 'Idaho': 0,
 'Illinois': 1,
 'Indiana': 0,
 'Iowa': 0,
 'Kansas': 0}
{'Florida': 0,
 'Georgia': 0,
 'Hawaii': 0,
 'Idaho': 0,
 'Indiana': 1,
 'Iowa': 0,
 'Kansas': 0}
{'Florida': 0, 'Georgia': 0, 'Hawaii': 1, 'Idaho': 0, 'Iowa': 0, 'Kansas': 0}
{'Florida': 1, 'Georgia': 0, 'Idaho': 0, 'Iowa': 0, 'Kansas': 1}
{'Geo