In [1]:
import networkx as nx
import rdflib
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
from rdflib import Graph as Graph
from networkx import Graph as NXGraph
import matplotlib.pyplot as plt
import statistics
import collections
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
from math import factorial

## KG exploration

In [2]:
lj = Graph()
lj.parse("resources/linked_jazz.ttl", format='turtle')

lj_G = rdflib_to_networkx_digraph(lj)

List predicates in the KG

In [3]:
set(lj.predicates())

{rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#acquaintanceOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#closeFriendOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#collaboratedWith'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#hasMet'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#knowsOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#mentorOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#performedWith'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#sharedMusicGroup'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#wasInfluencedBy'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
 rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label')}

Build the weighted graph

In [4]:
WEIGHTS_MAP = {
    'http://www.semanticweb.org/KEProject/MusicalInfluences#sharedMusicGroup': 0.9,
    #'http://www.semanticweb.org/KEProject/MusicalInfluences#knowsOf': 0.2,
    #'http://www.semanticweb.org/KEProject/MusicalInfluences#wasInfluencedBy': 0.2,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#collaboratedWith': 0.8,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#performedWith': 0.8,
    #'http://www.semanticweb.org/KEProject/MusicalInfluences#acquaintanceOf': 0.6,
    #'http://www.semanticweb.org/KEProject/MusicalInfluences#hasMet': 0.3,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#mentorOf': 0.95,
    #'http://www.semanticweb.org/KEProject/MusicalInfluences#closeFriendOf': 0.7
}

for i, j in lj_G.edges:
    lj_G[i][j]['weight'] = 0
    for s, p, o in lj_G[i][j]['triples']:
        lj_G[i][j]['weight'] += WEIGHTS_MAP.get(str(p), 0)

Compute the centrality due to the eigenvectors

In [5]:
ec = nx.eigenvector_centrality(lj_G, weight="weight")

Print the top 10 most influential artists. The label is queried directly from the LinkedJazz KG through a SPARQL query.

In [6]:
def get_semlab_label(iri):
  sparql = SPARQLWrapper("https://query.semlab.io/proxy/wdqs/bigdata/namespace/wdq/sparql")
  sparql.setReturnFormat(JSON)
  sparql.setQuery("""
  SELECT DISTINCT ?item ?itemLabel WHERE {
    BIND (<%s> AS ?item) .
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    
  }
  ORDER BY ?projectLabel
  """ % iri)
  
  try:
    ret = sparql.queryAndConvert()
    label = ret["results"]["bindings"][0]["itemLabel"]["value"]
  except:
    label = iri
  return label

for k in sorted(ec, key=ec.get, reverse=True)[:10]:
    label = get_semlab_label(str(k))
    print(f"{ec[k]} - {label}")

0.45276791139384936 - Larry Ridley
0.40985774499549793 - James Spaulding
0.38409447906418076 - Freddie Hubbard
0.3734989346249061 - Harold Mabern
0.21618225871409388 - Lee Morgan
0.17980527171344582 - Orville O'Brien
0.1785105560085451 - Daniel "Big Black" Rey
0.1550288950430747 - Paul Parker
0.1550288950430747 - Walt Miller
0.1550288950430747 - Al Plank


## Adjacency matrix experiments

In [7]:
%pip install git+https://github.com/n28div/hyperhanfpy

Collecting git+https://github.com/n28div/hyperhanfpy
  Cloning https://github.com/n28div/hyperhanfpy to /tmp/pip-req-build-ebh4__5b
  Running command git clone --filter=blob:none --quiet https://github.com/n28div/hyperhanfpy /tmp/pip-req-build-ebh4__5b
  Resolved https://github.com/n28div/hyperhanfpy to commit 880742f9ae600fa7058dd237938870af866fd146
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: hyperhanfpy
  Building wheel for hyperhanfpy (pyproject.toml) ... [?25ldone
[?25h  Created wheel for hyperhanfpy: filename=hyperhanfpy-0.0.1-py3-none-any.whl size=7515 sha256=4dfda7456e9cca1f7171a6dad4178604582bbc79e2f2766c8a3a402a75ab534e
  Stored in directory: /tmp/pip-ephem-wheel-cache-rj2bzoxq/wheels/7c/5a/f0/22dedd0a4928778e0b9af96fd09f3253dd05f9fb239bbeaf9a
Successfully built hyperhanfpy
Installing collected packages: hyperhanfpy


In [8]:
from math import factorial
from hyperhanfpy import HyperANF

def truncated_graph_power_exp(graph: nx.Graph, degree: int) -> np.array:
  """
  Compute the trucated matrix power exponential using Taylor series expansion.

  Args:
      graph (nx.Graph): Input NetworkX grpah.
      degree (int): Tatlor degree of the polynomial

  Returns:
      np.array: Adjacency matrix
  """
  hhanf = HyperANF(graph)
  adj = nx.adjacency_matrix(graph, dtype=np.float64).todense()
  
  # for each range in [1, degree] (note: degree is inclusive)
  for i in range(1, degree + 1):
    # compute matrix power
    pw = hhanf.power(i)
    pw = nx.adjacency_matrix(pw, dtype=np.float64).todense()
    
    # compute coefficient as taylor series expansion
    # i.e. e^i / i!
    coeff = (np.e**i) / factorial(i)
    adj += coeff * pw
  
  return adj

In [9]:
lj_G_exp = truncated_graph_power_exp(lj_G, 2)

How much did Miles Davis influence Herbie Hancock? What about Count Basie?

Generally, the influence of Miles Davis should be much higher, since Hancock mention him as one of his main inspirations.

In [10]:
miles_davis_idx = list(lj_G.nodes()).index(rdflib.URIRef("http://base.semlab.io/entity/Q41"))
count_basie_idx = list(lj_G.nodes()).index(rdflib.URIRef("http://base.semlab.io/entity/Q72"))
herbie_hancock_idx = list(lj_G.nodes()).index(rdflib.URIRef("http://base.semlab.io/entity/Q493"))

In [11]:
cp_row = lj_G_exp[herbie_hancock_idx, :]
print(f"Miles Davis influence in Herbie Hancock: {cp_row[miles_davis_idx]}")

Miles Davis influence in Herbie Hancock: 12.828072308183707


In [12]:
print(f"Count Basie influence in Herbie Hancock: {cp_row[count_basie_idx]}")

Count Basie influence in Herbie Hancock: 3.6945280494653248


Most influential artists to Herbie Hancock

In [13]:
for k in np.argsort(cp_row)[::-1][:20]:
  print(f"{cp_row[k]}: {get_semlab_label(list(lj_G.nodes)[k])}")

12.990232620612938: George Coleman
12.828072308183707: Ron Carter
12.828072308183707: Miles Davis
12.828072308183707: Chick Corea
9.85344684541647: Teo Macero
9.481618662570565: Oscar Peterson
9.481618662570565: Donald Byrd
6.321079108380378: Gil Evans
3.6945280494653248: Benny Green
3.6945280494653248: Ahmad Jamal
3.6945280494653248: Don Goldberg
3.6945280494653248: Lester Young
3.6945280494653248: Larry Ridley
3.6945280494653248: Otto Hardwick
3.6945280494653248: Ella Fitzgerald
3.6945280494653248: Harold Mabern
3.6945280494653248: Chico Hamilton
3.6945280494653248: Cat Anderson
3.6945280494653248: Dizzy Gillespie
3.6945280494653248: George Coleman
