# Code to Reproduce our ISMIR 2019 Paper

## How to Run

Install the following packages. An anaconda release should have everything.
1. ipywidgets -- https://ipywidgets.readthedocs.io/en/stable/
1. matplotlib
1. numpy
1. pandas

Then execute the cells of this notebook. In the end, you will have an widget to explore the disruption graph. The auxiliary code is in the `code` folder.

## Plots

The plots show the top 10 positive and negative disruptive nodes. Each plot is a KDE on the posterior.

## Files

The `cache` folder will contain csvs to explore with R or Python for each option of the widget.

## Preamble Code

The next few cells simply set's up the environment. Just execute them.

In [1]:
# -*- coding: utf8

import sys
sys.path.append('../code/')

In [2]:
from amutils import build_graph
from amutils import build_reverse_index
from amutils import load_am_json_data

from disrupt import compute_disruption

import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

In [3]:
plt.rcParams['figure.figsize']  = (18, 10)
plt.rcParams['axes.labelsize']  = 16
plt.rcParams['axes.titlesize']  = 16
plt.rcParams['legend.fontsize'] = 16
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16
plt.rcParams['lines.linewidth'] = 2

In [4]:
plt.ion()

plt.style.use('seaborn-colorblind')
plt.rcParams['figure.figsize']  = (12, 8)

In [5]:
def despine(ax=None):
    if ax is None:
        ax = plt.gca()
    # Hide the right and top spines
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_visible(False)

    # Only show ticks on the left and bottom spines
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    ax.axes.get_yaxis().set_visible(False)

In [6]:
json_data = load_am_json_data()
decades, genres, styles = build_reverse_index(json_data)

In [7]:
if not os.path.exists('cache'):
    os.mkdir('cache')
if not os.path.exists('cache/disrupt-artists-csvs/'):
    os.mkdir('cache/disrupt-artists-csvs/')

## Bayesian Disruption

The constant belo defines our prior. In python, posterion disruption is computed as:

```python
# with ni, nj and nk defined
import numpy as np
D = np.random.dirichlet([PRIOR + ni,
                         PRIOR + nj,
                         PRIOR + nk], size=10000)
disruption_per_sample = D[:, 0] - D[:, 1]
```

In [8]:
PRIOR = 10

In [9]:
def rank_nodes(decade=None, genre=None, style=None,
               min_in=1, min_out=1, restrictive=False):
    to_use = None
    if decade is not None:
        to_use = set(decades[decade])
    if genre is not None:
        if to_use is None:
            to_use = set(genres[genre])
        else:
            to_use = set(genres[genre]).intersection(to_use)
    if style is not None:
        if to_use is None:
            to_use = set(styles[style])
        else:
            to_use = set(styles[style]).intersection(to_use)
    
    print('Computing disruption!')
    G = build_graph(json_data, to_use, restrictive=restrictive)
    disrupt = compute_disruption(G, min_in, min_out)
    disrupt = disrupt.dropna()
    cols = ['ni', 'nj', 'nk', 'disruption']
    
    diffs = []
    confidences = []
    for ni, nj, nk, disruption in disrupt[cols].values:
        D = np.random.dirichlet([PRIOR + ni,
                                 PRIOR + nj,
                                 PRIOR + nk], size=10000)
        pos_i = D[:, 0]
        pos_j = D[:, 1]
        if disruption <= 0:
            confidence = ((D[:, 0] - D[:, 1]) < 0).mean()
        else:
            confidence = ((D[:, 0] - D[:, 1]) > 0).mean()
        confidences.append(confidence)
        diff = pos_i - pos_j
        diffs.append(diff)
    
    
    disrupt['name'] = [json_data[id_]['name'] for id_ in disrupt.index]
    disrupt['confidence'] = confidences
    posteriors = pd.DataFrame(diffs, index=disrupt.index)
    posteriors['name'] = disrupt['name']
    posteriors['confidence'] = confidences
    if to_use:
        disrupt = disrupt.loc[disrupt.index.isin(to_use)]
        posteriors = posteriors.loc[posteriors.index.isin(to_use)]
        
    return disrupt, posteriors

## Nice Little Widget

Below we have our widget. The options are:

1. initial_decade -> compute disruption for artists which began releasing work in this decade.
1. genre -> compute disruption for artists with this genre (first on the allmusic list).
1. style -> same as genre
1. min_in -> consider only nodes with this in_degree  (they can still be influenced by nodes that do not match this criterion)
1. min_out -> consider only nodes with this out_degree (they can still be influenced by nodes that do not match this criterion)
1. confidence -> filter out samples where the bayesian credible interval are below this confidence.
1. restrictive -> not used in the paper, but this may be used to consider only a subgraph. That is, compute disruption of 1940s Jazz artists considering that only these artists exist. Usually fails on small graphs. Most useful for genres: Subset jazz, or subset pop/rock.

In [10]:
from ipywidgets import interact_manual

decade_options = [None] + list(sorted(decades.keys()))
genre_options = [None] + list(sorted(genres.keys()))
style_options = [None] + list(sorted(styles.keys()))

@interact_manual
def interactive_rank(initial_decade=decade_options,
                     genre=genre_options,
                     style=style_options,
                     min_in=[1, 2, 3, 4, 5],
                     min_out=[1, 2, 3, 4, 5],
                     confidence=0.95,
                     restrictive=False):
    
    disruption, posterior = rank_nodes(initial_decade, genre, style,
                                       min_in, min_out, restrictive)
    name = 'decade-{}-genre-{}-style-{}-min_in-{}-min_out-{}-restrictive-{}'
    name = name.format(initial_decade, genre, style, min_in, min_out, restrictive)
    name = name.replace('/', '-')
    
    disruption.to_csv('cache/disrupt-artists-csvs/' + name + '.disrupt')
    
    disruption = disruption[disruption['confidence'] > confidence]
    posterior = posterior[posterior['confidence'] > confidence]
    
    positive = disruption[disruption['disruption'] > 0]
    negative = disruption[disruption['disruption'] < 0]
    top = positive.nlargest(10, ['disruption'])
    bottom = negative.nsmallest(10, ['disruption'])
    
    top_plot = posterior.loc[top.index]
    bottom_plot = posterior.loc[bottom.index]
    names_top = top_plot['name']
    names_bottom = bottom_plot['name']
    del top_plot['name']
    del bottom_plot['name']
        
    top_plot = pd.DataFrame(top_plot.T.values, columns=names_top)
    if top_plot.shape[1] > 0:
        axes = top_plot.plot.kde(subplots=True, color='magenta')
        for ax in axes:
            despine(ax)
            leg = ax.legend(loc='upper left', frameon=False)
            for item in leg.legendHandles:
                item.set_visible(False)
            ax.set_xlim((-1, 1))
            ax.set_xlabel('Posterior Disruption')
        plt.tight_layout()
        plt.show()
        plt.close()
    else:
        print('No positive disruption')
    
    bottom_plot = pd.DataFrame(bottom_plot.T.values, columns=names_bottom)
    if bottom_plot.shape[1] > 0:
        axes = bottom_plot.plot.kde(subplots=True, color='magenta')
        for ax in axes:
            despine(ax)
            leg = ax.legend(loc='upper left', frameon=False)
            for item in leg.legendHandles:
                item.set_visible(False)
            ax.set_xlim((-1, 1))
            ax.set_xlabel('Posterior Disruption')
        plt.tight_layout()
    else:
        print('No negative disruption')

interactive(children=(Dropdown(description='initial_decade', options=(None, 1890, 1900, 1910, 1920, 1930, 1940â€¦