In [16]:
import sys
sys.path.append('../py-code/')
import os
import gzip as gz
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import networkx as nx
from difflib import get_close_matches
from statsmodels.distributions.empirical_distribution import *
import scipy
from scipy.stats import norm
import scipy.stats as ss
import disrupt as dr
import amutils as at

In [17]:
def init_matplotlib():
    plt.rcParams['axes.labelsize'] = 12
    plt.rcParams['axes.titlesize'] = 12
    plt.rcParams['legend.fontsize'] = 12
    plt.rcParams['xtick.labelsize'] = 12
    plt.rcParams['ytick.labelsize'] = 12
    plt.rcParams['lines.linewidth'] = 3
    plt.rcParams['font.family'] = 'serif'
    plt.style.use('tableau-colorblind10')
def despine(ax=None):
    if ax is None:
        ax = plt.gca()
    # Hide the right and top spines
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    # Only show ticks on the left and bottom spines
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')

In [18]:
init_matplotlib()

In [19]:
allmusic = dict({'pagerank':dict(), 'disruption':dict(), 'in_degree':dict()})
whosampled = dict({'pagerank':dict(), 'disruption':dict(), 'in_degree':dict()})

In [20]:
allmusic['disruption']['df'] = pd.read_csv('../centrality-csvs/allmusic_disruption.csv')
allmusic['pagerank']['df'] = pd.read_csv('../centrality-csvs/allmusic_pageranks.csv')
allmusic['in_degree']['df'] = pd.read_csv('../centrality-csvs/allmusic_in_degree.csv')

whosampled['disruption']['df'] = pd.read_csv('../centrality-csvs/whosampled_disruption_per_year.csv')
whosampled['pagerank']['df'] = pd.read_csv('../centrality-csvs/whosampled_pageranks_per_year.csv')
whosampled['in_degree']['df'] = pd.read_csv('../centrality-csvs/whosampled_in_degree_per_year.csv')

In [21]:
for data in [allmusic, whosampled]:
    for key in list(data.keys()):
        data[key]['df'].index = list(data[key]['df']['Unnamed: 0'])
        data[key]['df'] = data[key]['df'].drop(columns= ['Unnamed: 0'])

<br><h3>Correlation</h3><br>
<h4>Who sampled</h4><br>

In [55]:
menor = min(whosampled['pagerank']['df']['2019'])
data_pagerank = whosampled['pagerank']['df']['2019'][whosampled['pagerank']['df']['2019']!=menor]

In [56]:
data_pagerank.corr(whosampled['in_degree']['df']['2019'])   

0.8599706205415361

In [57]:
minimum = min(whosampled['pagerank']['df']['2019'])
centralities = [whosampled['pagerank']['df']['2019'][whosampled['pagerank']['df']['2019']!=minimum]]
centralities.append(whosampled['in_degree']['df']['2019'][whosampled['in_degree']['df']['2019']!=0])
centralities.append(whosampled['disruption']['df']['2019'])
centralities = pd.DataFrame(centralities, index = ['pagerank', 'in_degree','disruption'])

In [58]:
centralities.T.corr(method='spearman')

Unnamed: 0,pagerank,in_degree,disruption
pagerank,1.0,0.793085,-0.323814
in_degree,0.793085,1.0,-0.29899
disruption,-0.323814,-0.29899,1.0


In [59]:
centralities.T.corr(method='kendall')

Unnamed: 0,pagerank,in_degree,disruption
pagerank,1.0,0.631293,-0.229536
in_degree,0.631293,1.0,-0.217303
disruption,-0.229536,-0.217303,1.0


<br><h4>Allmusic</h4><br>

In [61]:
minimum = min(allmusic['pagerank']['df']['2010'])
centralities = [allmusic['pagerank']['df']['2010'][allmusic['pagerank']['df']['2010']!=minimum]]
centralities.append(allmusic['in_degree']['df']['2010'][allmusic['in_degree']['df']['2010']!=0])
centralities.append(allmusic['disruption']['df']['2010'])
centralities = pd.DataFrame(centralities, index = ['pagerank', 'in_degree','disruption'])

In [62]:
centralities.T.corr(method='spearman')

Unnamed: 0,pagerank,in_degree,disruption
pagerank,1.0,0.824165,0.036697
in_degree,0.824165,1.0,-0.005446
disruption,0.036697,-0.005446,1.0


In [63]:
centralities.T.corr(method='kendall')

Unnamed: 0,pagerank,in_degree,disruption
pagerank,1.0,0.687308,0.026614
in_degree,0.687308,1.0,-0.005194
disruption,0.026614,-0.005194,1.0
