In [51]:
import networkx as nx
import rdflib
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
from rdflib import Graph as Graph
from networkx import Graph as NXGraph
import matplotlib.pyplot as plt
import statistics
import collections
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
from math import factorial

## KG exploration

In [33]:
lj = Graph()
lj.parse("resources/linked_jazz.ttl", format='turtle')

lj_G = rdflib_to_networkx_graph(lj)

List predicates in the KG

In [28]:
set(lj.predicates())

{rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#acquaintanceOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#closeFriendOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#collaboratedWith'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#hasMet'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#knowsOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#mentorOf'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#performedWith'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#sharedMusicGroup'),
 rdflib.term.URIRef('http://www.semanticweb.org/KEProject/MusicalInfluences#wasInfluencedBy'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
 rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label')}

Build the weighted graph

In [29]:
kg_nodes = len(lj)

predicates_frequency = {
  p: len(list(lj.triples((None, p, None))))
  for p in lj.predicates()
}

In [34]:
WEIGHTS_MAP = {
    'http://www.semanticweb.org/KEProject/MusicalInfluences#sharedMusicGroup': 0.9,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#knowsOf': 0.2,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#wasInfluencedBy': 0.2,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#collaboratedWith': 0.8,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#performedWith': 0.8,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#acquaintanceOf': 0.6,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#hasMet': 0.3,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#mentorOf': 0.95,
    'http://www.semanticweb.org/KEProject/MusicalInfluences#closeFriendOf': 0.7
}

for i, j in lj_G.edges:
    lj_G[i][j]['weight'] = 0
    for s, p, o in lj_G[i][j]['triples']:
        lj_G[i][j]['weight'] += WEIGHTS_MAP.get(str(p), 0)

Compute the centrality due to the eigenvectors

In [35]:
ec = nx.eigenvector_centrality(lj_G, weight="weight")

Print the top 10 most influential artists. The label is queried directly from the LinkedJazz KG through a SPARQL query.

In [36]:
def get_semlab_label(iri):
  sparql = SPARQLWrapper("https://query.semlab.io/proxy/wdqs/bigdata/namespace/wdq/sparql")
  sparql.setReturnFormat(JSON)
  sparql.setQuery("""
  SELECT DISTINCT ?item ?itemLabel WHERE {
    BIND (<%s> AS ?item) .
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    
  }
  ORDER BY ?projectLabel
  """ % iri)
  
  try:
    ret = sparql.queryAndConvert()
    label = ret["results"]["bindings"][0]["itemLabel"]["value"]
  except:
    label = iri
  return label

for k in sorted(ec, key=ec.get, reverse=True)[:10]:
    label = get_semlab_label(str(k))
    print(f"{ec[k]} - {label}")

0.43078161725884534 - Count Basie
0.2783203387338041 - Buddy DeFranco
0.24635594241730585 - Roy Eldridge
0.23090382684773114 - Lionel Hampton
0.2281363963700805 - Louis Armstrong
0.2248402606105156 - Oscar Peterson
0.14187427149761 - Clark Terry
0.14155396981004958 - Dizzy Gillespie
0.13915167874605938 - Joe Williams
0.13484259210755245 - Sam Rivers


## Adjacency matrix experiments

In [113]:
from math import factorial

def truncated_graph_power_exp(graph: nx.Graph, degree: int) -> np.array:
  """
  Compute the trucated matrix power exponential using Taylor series expansion.

  Args:
      graph (nx.Graph): Input NetworkX grpah.
      degree (int): Tatlor degree of the polynomial

  Returns:
      np.array: Adjacency matrix
  """
  adj = nx.adjacency_matrix(graph, dtype=np.float64, weight=None).todense()
  
  # for each range in [1, degree] (note: degree is inclusive)
  for i in range(1, degree + 1):
    # compute matrix power
    pw = nx.power(lj_G, i)
    pw = nx.adjacency_matrix(pw, dtype=np.float64, weight=None).todense()
    
    # compute coefficient as taylor series expansion
    # i.e. e^i / i!
    coeff = (np.e**i) / factorial(i)
    adj += coeff + pw
  
  return adj

In [114]:
lj_G_exp = truncated_graph_power_exp(lj_G, 3)

How much did Miles Davis influence Herbie Hancock? What about Count Basie?

Generally, the influence of Miles Davis should be much higher, since Hancock mention him as one of his main inspirations.

In [130]:
miles_davis_idx = list(lj_G.nodes()).index(rdflib.URIRef("http://base.semlab.io/entity/Q41"))
count_basie_idx = list(lj_G.nodes()).index(rdflib.URIRef("http://base.semlab.io/entity/Q72"))
herbie_hancock_idx = list(lj_G.nodes()).index(rdflib.URIRef("http://base.semlab.io/entity/Q493"))

In [131]:
cp_row = lj_G_exp[herbie_hancock_idx, :]
print(f"Miles Davis influence in Herbie Hancock: {cp_row[miles_davis_idx]}")

Miles Davis influence in Herbie Hancock: 13.760399365122312


In [134]:
print(f"Count Basie influence in Herbie Hancock: {cp_row[count_basie_idx]}")

Count Basie influence in Herbie Hancock: 11.760399365122312


Most influential artists to Herbie Hancock

In [136]:
for k in np.argsort(cp_row)[::-1][:20]:
  print(f"{cp_row[k]}: {get_semlab_label(list(lj_G.nodes)[k])}")

13.760399365122312: Myra Hess
13.760399365122312: Herbie Hancock
13.760399365122312: George Schick
13.760399365122312: Gil Evans
13.760399365122312: Chick Corea
13.760399365122312: Philly Joe Jones
13.760399365122312: Donald Byrd
13.760399365122312: Miles Davis
13.760399365122312: Teo Macero
13.760399365122312: Gayle Moran
13.760399365122312: Delfeayo Marsalis
13.760399365122312: Tony Williams
13.760399365122312: Michael Greene
13.760399365122312: Buster Williams
13.760399365122312: George Shearing
13.760399365122312: http://www.semanticweb.org/KEProject/MusicalInfluences#MusicArtist
13.760399365122312: Don Goldberg
13.760399365122312: Oscar Peterson
13.760399365122312: Ron Carter
13.760399365122312: George Coleman
