In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from koe.model_utils import get_or_error
from koe.models import Database
from koe.sequence_utils import get_sequences
from koe.storage_utils import get_sids_tids
from root.models import User
from koe.cluster_analysis_utils import SimpleNameMerger, NameMerger, get_syllable_labels
from koe.sequence_utils import songs_to_syl_seqs
from koe.sequence_utils import calc_class_ajacency, calc_class_dist_by_adjacency

from nltk import ngrams

from koe.models import Segment, AudioFile
from root.models import ExtraAttr, ExtraAttrValue
from collections import Counter

In [None]:
database = get_or_error(Database, dict(name='Bellbird_TMI'))
annotator_name = 'wesley'
label_level = 'label'

sids, tids = get_sids_tids(database)
annotator = get_or_error(User, dict(username__iexact=annotator_name))

label_arr, syl_label_enum_arr = get_syllable_labels(annotator, label_level, sids)

enum2label = {enum: label for enum, label in enumerate(label_arr)}
sid2enumlabel = {sid: enum_label for sid, enum_label in zip(sids, syl_label_enum_arr)}

adjacency_mat, classes_info = calc_class_ajacency(database, syl_label_enum_arr, enum2label, sid2enumlabel,
                                                  symmetric=False, count_circular=False)

distmat = calc_class_dist_by_adjacency(adjacency_mat, syl_label_enum_arr, return_triu=False)

In [None]:
counter = Counter(syl_label_enum_arr)
nlabels = len(counter)
frequencies = np.array([counter[i] for i in range(nlabels)])

adjacency_mat_fw_norm = adjacency_mat / frequencies[:, None]
adjacency_mat_bw_norm = adjacency_mat / frequencies

coordinates = np.concatenate((adjacency_mat_fw_norm, adjacency_mat_bw_norm), axis=1)

# df_norm = pd.DataFrame(distmat, columns=label_arr, index=label_arr)
# df_norm.style.background_gradient(cmap='Blues')

In [None]:
import plotly.plotly as py
import plotly.figure_factory as ff
from scipy.cluster import hierarchy

import numpy as np


def plot_spectrogram(mat, name, pdf=None):
    # Calculate the distance between each sample
    Z = hierarchy.linkage(mat, 'average')

    fig = plt.figure(figsize=(18,18))
    ax = fig.gca()
    ax.set_title(name)
    hierarchy.dendrogram(
        Z,
        labels=label_arr,
        leaf_font_size=12.,
        orientation='right'
    )
    if pdf:
        pdf.savefig(fig)
        plt.close()
    else:
        plt.show()

pdf = PdfPages('dendrogram.pdf')
mats = {
    'Using adjacency matrix (forward direction A->B)': adjacency_mat_fw_norm,
    'Using adjacency matrix (backward direction A->B)': adjacency_mat_bw_norm,
    'Using both adjacency matrices': coordinates,
}
for name, mat in mats.items():
    plot_spectrogram(mat, name, pdf)

pdf.close()

In [None]:
dendro_fw = ff.create_dendrogram(adjacency_mat_fw_norm, color_threshold=2, orientation='left', labels=label_arr)
dendro_fw['layout'].update({'width':1000, 'height':1700})
py.iplot(dendro_fw, filename='adjacency_mat_fw_norm')

In [None]:
hierarchy.dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=df.index)