In [1]:
import pickle

from matplotlib import pyplot as plt
import networkx as nx

from koe.graph_utils import extract_graph_properties, networkx_stats
from koe.model_utils import get_or_error, natural_order, get_labels_by_sids, exclude_no_labels
from koe.models import DataMatrix, Database, AudioFile
from koe.sequence_utils import songs_to_syl_seqs
from koe.ts_utils import bytes_to_ndarray
from koe.ts_utils import get_rawdata_from_binary
from koe.utils import triu2mat, mat2triu
from root.models import User

import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode,  iplot, plot
init_notebook_mode(connected=True)

import numpy as np
from scipy.cluster.hierarchy import cut_tree, linkage
from scipy.spatial import distance


def load():
    filename = '/tmp/class_clustering.pkl'
    with open(filename, 'rb') as f:
        saved = pickle.load(f)
        tree = saved['tree']
        sorted_order = saved['sorted_order']
        distmat = saved['distmat']
        class_distmat = saved['class_distmat']
        class_dist_triu = saved['class_dist_triu']
        sids = saved['sids']
        coordinates = saved['coordinates']
        classes_info = saved['classes_info']
        dmid = saved['dmid']
        unique_labels = saved['unique_labels']
        enum_labels = saved['enum_labels']
        dm = DataMatrix.objects.get(id=dmid)

    return tree, sorted_order, distmat, class_distmat, class_dist_triu, dm, sids, coordinates, classes_info,\
        unique_labels, enum_labels

tree, sorted_order, distmat, class_distmat, class_dist_triu, dm, sids, coordinates, classes_info, unique_labels\
    ,enum_labels = load()

In [2]:
enum2label = {enum: label for enum, label in enumerate(unique_labels)}

heights = tree[:, 2]
cutoffs = np.percentile(heights, [10, 20, 30, 40, 50, 60, 70, 80, 90])
clusters = cut_tree(tree, height=cutoffs)

In [3]:
def merge_labels(clusters, classes_info, sids, enum2label):
    """
    Create a new mapping from syllable id (sid in database) to their new class index.
     The class index is the index of the cluster where the original class belongs.
     E.g. syllable 123456 originally belongs to class 1, and syllable 234567 originally belongs to class 2
     Class 1 and 2 now merges into cluster #3.
     Accordingly, syllable 123456 and 234567 is now mapped to cluster #3
    :param clusters: an array of N element, where N is the number of original classes. Value at element #i is the
                     index of the cluster class #i now belongs, e.g. if originally there are 10 classes and clusters is
                     [0 0 1 3 1 2 2 0 1 3], it is interpreted that:
       class index    0 1 2 3 4 5 6 7 8 9
       cluster #0 ->  ^ ^           ^
       cluster #1 ->      ^   ^       ^
       cluster #2 ->            ^ ^
       cluster #3 ->        ^           ^
                     class #0, #1, #7 now belong to cluster #0
                     class #2, #4, #8 now belong to cluster #1
                     class #5, #6 now belong to cluster #2
                     class #3, #9 now belong to cluster #3
    :param classes_info: a map from (original) class index to list of sind (indices of syllables in the array sids)
                         e.g. given 10 classes, classes_info looks something like:
                         {0: [1,4,6,9,31], 1:[2,17, 5, 22], ...}
    :param sids: array of syllable id (sid in database). The array indices of them is used in classes_info
    :param enum2label: a map from class index to the actual label given in the database
    :return: (sid_to_cluster_base_1, merged_enum2label_base1) where:
           sid_to_cluster_base_1: a map from syllable id (sid) to the cluster index (base 1) its original class belongs to
           merged_enum2label_base1: a map from cluster index (base 1) to a new label. This new label is a concatenation of all its
                              constituent class labels
    """
    sid_to_cluster_base_1 = {}
    merged_enum2label_base1 = {}
    for current_class_idx, merged_class_idx in enumerate(clusters):
        merged_class_idx_base_1 = merged_class_idx + 1

        sinds = classes_info[current_class_idx]
        for sind in sinds:
            sid = sids[sind]
            sid_to_cluster_base_1[sid] = merged_class_idx_base_1

        current_class_label = enum2label[current_class_idx]
        if merged_class_idx_base_1 in merged_enum2label_base1:
            merged_enum2label_base1[merged_class_idx_base_1].append(current_class_label)
        else:
            merged_enum2label_base1[merged_class_idx_base_1] = [current_class_label]

    for merged_class_idx in list(merged_enum2label_base1.keys()):
        class_labels = merged_enum2label_base1[merged_class_idx]
        merged_enum2label_base1[merged_class_idx] = ' -&- '.join(class_labels)

    return sid_to_cluster_base_1, merged_enum2label_base1


def display_graph(nodes, edges, labels):
    graph=nx.Graph()#  G is an empty Graph
    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)

    # pos=nx.fruchterman_reingold_layout(graph)   
    pos = nx.spring_layout(graph, weight='strength', iterations=3000)

    Xn=[pos[k][0] for k in range(len(pos))]
    Yn=[pos[k][1] for k in range(len(pos))]

    trace_nodes=dict(type='scatter',
                     x=Xn, 
                     y=Yn,
                     mode='markers',
                     marker=dict(size=28, color='rgb(0,240,0)'),
                     text=labels,
                     hoverinfo='text')

    Xe=[]
    Ye=[]
    for e in graph.edges():
        Xe.extend([pos[e[0]][0], pos[e[1]][0], None])
        Ye.extend([pos[e[0]][1], pos[e[1]][1], None])

    trace_edges=dict(type='scatter',
                     mode='lines',
                     x=Xe,
                     y=Ye,
                     line=dict(width=1, color='rgb(25,25,25)'),
                     hoverinfo='none' 
                    )

    axis=dict(showline=False, # hide axis line, grid, ticklabels and  title
              zeroline=False,
              showgrid=False,
              showticklabels=False,
              title='' 
              )
    layout=dict(title= 'My Graph',  
                font= dict(family='Balto'),
                width=600,
                height=600,
                autosize=False,
                showlegend=False,
                xaxis=axis,
                yaxis=axis,
                margin=dict(
                l=40,
                r=40,
                b=85,
                t=100,
                pad=0,

        ),
        hovermode='closest',
        plot_bgcolor='#efecea', #set background color            
        )


    fig = dict(data=[trace_nodes, trace_edges], layout=layout)
    iplot(fig)


for i in range(len(cutoffs)):
    clustering = clusters[:, i]
    sid_to_cluster, merged_enum2label = merge_labels(clustering, classes_info, sids, enum2label)

    songs = AudioFile.objects.filter(database=dm.database)
    song_sequences = songs_to_syl_seqs(songs, sid_to_cluster, merged_enum2label)

    edges, node_dict = extract_graph_properties(song_sequences, merged_enum2label)

    graph = nx.Graph()
    graph.add_edges_from(edges)

    digraph = nx.DiGraph()
    digraph.add_edges_from(edges)

#     networkx_stats(graph, digraph, node_dict)
    nodes = sorted(list(node_dict.keys()))
    labels = [node_dict[x] for x in nodes]
    
    display_graph(nodes, edges, labels)

#     pos = nx.spring_layout(graph, weight='strength', iterations=3000)

#     fig = plt.figure(1, figsize=(15, 15))
#     nx.draw_networkx(graph, pos, node_size=60, font_size=12)

#     fig.show()
#     plt.show()


Mean of empty slice.


invalid value encountered in double_scalars


Degrees of freedom <= 0 for slice


invalid value encountered in true_divide


invalid value encountered in double_scalars

