In [1]:
from shared import *
import pandas as pd
from rdflib import Graph, Literal, BNode, Namespace, URIRef
from rdflib.namespace import RDF, OWL, RDFS
from urllib.parse import quote_plus
from pathlib import Path

In [2]:
df = read_songs("../source-data/hooktheory-songs.csv")
df

Unnamed: 0,artist,song,chordAbs,chordRel,link,keyTonic,keyScale
0,'Til Tuesday,Voices Carry,"[G, G/D, G, G/D, G, G/D, G, Bb, Bb/F, Bb, Bb/F...","[I, I64, I, I64, I, I64, I, IV, IV64, IV, IV64...",/theorytab/view/til-tuesday/voices-carry,G,mixolydian
1,070 Shake,Guilty Conscience,"[G, Em7, Cmaj9, Am11, D]","[I, vi7, IV9, ii11, V]",/theorytab/view/070-shake/guilty-conscience,G,major
2,100 gecs,Hand Crushed By A Mallet,"[C#m, F#m, A, G#m, C#m, F#m, A, G#m]","[i, iv, VI, v, i, iv, VI, v]",/theorytab/view/100-gecs/hand-crushed-by-a-mallet,C#,minor
3,100 gecs,Money Machine,"[D#m, B, F#, A#m, D#m, B, F#, A#m, D#m, B, F#,...","[vi, IV, I, iii, vi, IV, I, iii, vi, IV, I, ii...",/theorytab/view/100-gecs/money-machine,F#,major
4,10cc,I'm Not In Love,"[A, Am, G#m, G#/B#, C#m, C#m/B, A, B, E, A/E, ...","[IV, iv, iii, V6, vi, vi42, IV, V, I, IV64, i7...",/theorytab/view/10cc/im-not-in-love,E,major
...,...,...,...,...,...,...,...
10484,twenty one pilots,My Blood,"[Bbm, Ab, Db, Fm, Eb, Ab, Bbm, Ab, Db, Fm, Eb,...","[iv, III, VI, i, VII, III, iv, III, VI, i, VII...",/theorytab/view/twenty-one-pilots/my-blood,F,minor
10485,twenty one pilots,Nico And The Niners,"[Dm, Dm, Dm, Dm, Am, Am, Am, Am, G, G, F, F, D...","[iv, iv, iv, iv, i, i, i, i, VII, VII, VI, VI,...",/theorytab/view/twenty-one-pilots/nico-and-the...,A,minor
10486,twenty one pilots,Pet Cheetah,"[Eb, Gm, F, Dm, Eb, Gm, F, Dm]","[VI, i, VII, v, VI, i, VII, v]",/theorytab/view/twenty-one-pilots/pet-cheetah,G,minor
10487,vistlip,-OZONE-,"[Dbmaj7, Dbmaj7, Dbmaj7, Eb7/Db, Ab, Eb7/G, Fm...","[IV7, IV7, IV7, V42, I, V65, vi7, Vsus4, Isus4...",/theorytab/view/vistlip/-ozone-,Ab,major


In [3]:
df[df.artist == "Edith Piaf"]

Unnamed: 0,artist,song,chordAbs,chordRel,link,keyTonic,keyScale
2578,Edith Piaf,La vie en rose,"[G, Bm, Em7, Am, D7/A, Am7, D7, E7sus2, D7, Eo...","[I, iii, vi7, ii, V43, ii7, V7, vi7sus2, V7, v...",/theorytab/view/edith-piaf/la-vie-en-rose,G,major
2579,Edith Piaf,Milord,"[D, G, D, E, A, D, G, D, E, A]","[I, IV, I, V, V, I, IV, I, V, V]",/theorytab/view/edith-piaf/milord,D,major
2580,Edith Piaf,Non Je ne regrette rien,"[G, D7/F#, G, C, C+, Am/C, D9, D7(b9), D7, Bm,...","[I, V65, I, IV, IV+(#5), ii6, V9, V7(b9), V7, ...",/theorytab/view/edith-piaf/non-je-ne-regrette-...,G,major
2581,Edith Piaf,Padam Padam,"[Bbm, Bbm/Db, Bbm, Bbm/Db, Bbm, Bbm/Db, F, F/C...","[i, i6, i, i6, i, i6, V, V64, V, V64, V, V64, ...",/theorytab/view/edith-piaf/padam-padam,Bb,minor
2582,Edith Piaf,Sous Le Ciel De Paris,"[Am, Dm, E, Am, Dm, Am, Am, Dm, E, Am, Dm, G, ...","[vi, ii, V, vi, ii, vi, vi, ii, V, vi, ii, V, ...",/theorytab/view/edith-piaf/sous-le-ciel-de-paris,C,major


In [4]:
from unidecode import unidecode

In [5]:
unidecode("Такунович in café")

'Takunovich in cafe'

In [6]:
def name_to_iri_name(name) -> str:
    return "_".join(
        unidecode(name.replace("(", " ").replace(")", " ").replace('"', "").replace("^", "")).split()
    )

In [8]:
g = Graph()
g.parse("../ontologies/chord_progressions.owl", format="turtle")

<Graph identifier=Nbc3e2961b71d44819e9291bfadf8a660 (<class 'rdflib.graph.Graph'>)>

In [9]:
g.bind("chp", chp)

In [10]:
def artist_name_to_node(g, artist_name):
    author = URIRef(chp[name_to_iri_name(artist_name)])
    g.add((author, RDF.type, chp.Artist))
    g.add((author, RDF.type, OWL.NamedIndividual))
    g.add((author, chp.hasName, Literal(artist_name)))
    return author

def get_song_name(artist_name, song_name):
    return name_to_iri_name(artist_name + "_" + song_name)

def song_name_to_node(g, author, song_iri_frag, song_raw_name):
    song = URIRef(chp[song_iri_frag])
    g.add((song, RDF.type, chp.Composition))
    g.add((song, RDF.type, OWL.NamedIndividual))
    g.add((song, chp.hasAuthor, author))
    g.add((song, chp.hasName, Literal(song_raw_name)))
    return song

In [11]:
total_artists = len(df.artist.unique())
for i, (artist, songs) in enumerate(df.sort_values(["artist", "song"]).groupby("artist")):
    if i % 100 == 0:
        print(f"{i}/{total_artists}")
#     if i > 20:
#         break
        
    author = artist_name_to_node(g, artist)
    
    for row in songs.itertuples():
        tonic_name = row.keyTonic.replace("#", "s")
        scale_name = row.keyScale
        song_name = get_song_name(artist, row.song)
        
        tonic_note = URIRef(chp[tonic_name])
        g.add((tonic_note, RDF.type, chp.Note))
        g.add((tonic_note, RDF.type, OWL.NamedIndividual))
        g.add((tonic_note, RDFS.label, Literal(row.keyTonic)))
        
        scale = URIRef(chp[scale_name])
        g.add((scale, RDF.type, chp.Scale))
        g.add((scale, RDF.type, OWL.NamedIndividual))
        g.add((scale, RDFS.label, Literal(row.keyScale)))
        
        key = URIRef(chp[tonic_name + "_" + scale_name])
        g.add((key, RDF.type, chp.Key))
        g.add((key, RDF.type, OWL.NamedIndividual))
        g.add((key, chp.hasTonicNote, tonic_note))
        g.add((key, chp.hasScale, scale))
        
        song = song_name_to_node(g, author, song_name, row.song)
        g.add((song, chp.hasKey, key))
        
        progression_name = f"{song_name}_Progression1"
        last_entry = None
        for i, (chord_abs_str, chord_rel_str) in enumerate(zip(row.chordAbs, row.chordRel)):
            n = i + 1
            entry = URIRef(chp[f"{progression_name}_{n}"])
            g.add((entry, RDF.type, chp.ChordProgressionEntry))
            g.add((entry, RDF.type, OWL.NamedIndividual))
            g.add((entry, chp.hasProgressionPosition, Literal(n)))
            
            g.add((song, chp.hasChordProgression, entry))
            
            chord_abs = URIRef(chp[quote_plus(chord_abs_str)])
            g.add((chord_abs, RDF.type, chp.AbsoluteChord))
            g.add((chord_abs, RDF.type, OWL.NamedIndividual))
            g.add((chord_abs, chp.hasHookTheoryRepresentation, Literal(chord_abs_str)))
            g.add((chord_abs, RDFS.label, Literal(chord_abs_str)))
            g.add((entry, chp.hasAbsoluteChord, chord_abs))
            
            chord_rel = URIRef(chp[quote_plus(chord_rel_str)])
            g.add((chord_rel, RDF.type, chp.RelativeChord))
            g.add((chord_rel, RDF.type, OWL.NamedIndividual))
            g.add((chord_rel, chp.hasHookTheoryRepresentation, Literal(chord_rel_str)))
            g.add((chord_rel, RDFS.label, Literal(chord_rel_str)))
            g.add((entry, chp.hasRelativeChord, chord_rel))
            
            if last_entry:
                g.add((last_entry, chp.hasNextEntry, entry))
            last_entry = entry
            

0/2944
100/2944
200/2944
300/2944
400/2944
500/2944
600/2944
700/2944
800/2944
900/2944
1000/2944
1100/2944
1200/2944
1300/2944
1400/2944
1500/2944
1600/2944
1700/2944
1800/2944
1900/2944
2000/2944
2100/2944
2200/2944
2300/2944
2400/2944
2500/2944
2600/2944
2700/2944
2800/2944
2900/2944


In [12]:
len(g)

2176852

In [13]:
POPULATED_OWL_FILENAME = "../ontologies/chord-progressions-populated-ht-full.owl"
g.serialize(destination=POPULATED_OWL_FILENAME, format="turtle")

<Graph identifier=Nbc3e2961b71d44819e9291bfadf8a660 (<class 'rdflib.graph.Graph'>)>

In [70]:
# rg = Graph(store="Oxigraph")
# rg.parse(POPULATED_OWL_FILENAME, format="ttl")
# len(rg)

7192

In [71]:
# def do_sparql_query(graph, query):
#     if isinstance(query, Path):
#         with query.open("r") as f:
#             q_str = f.read()
#     else:
#         q_str = str(query)
#         
#     res = graph.query(q_str)
#     
#     def map_value(v):
#         if isinstance(v, URIRef):
#             vs = str(v) 
#             return vs[vs.rfind("#") + 1:]
#         
#         return v.toPython()
#     
#     return pd.DataFrame([
#         {str(var): map_value(getattr(row, str(var))) for var in res.vars}
#         for row in res
#     ])/

In [76]:
# do_sparql_query(rg, Path("queries/1.sparql"))

Unnamed: 0,abs_chord_repr,composition
0,D,38_Special_If_I'd_Been_the_One
1,D,2pac_Changes
2,D,070_Shake_Guilty_Conscience


### Querying Wikidata

In [14]:
from qwikidata.sparql import return_sparql_query_results

wdfn = "../source-data/wikidata-songs.csv"

def sparql_results_to_df(res):
    return pd.DataFrame({cname: value["value"] for cname, value in row.items()} for row in res["results"]["bindings"])

def get_artist_songs_from_wikidata(artist):
    return sparql_results_to_df(return_sparql_query_results("""
    SELECT DISTINCT ?song ?songLabel ?artistLabel ?genreLabel ?publicationDate WHERE {
    #   FILTER (?song = wd:Q21172725)
      ?artist rdfs:label "<$ARTIST_LABEL$>"@en.
      # we need either composer (classical music, for example) or performer (pop) and it's hard to tell what exactly, so include both.
      {?song wdt:P175 ?artist.} UNION {?song wdt:P86 ?artist.}
      ?song p:P31/ps:P31/(p:P279|ps:P279)* wd:Q105543609.
      OPTIONAL { ?song wdt:P577 ?publicationDate }
      OPTIONAL { ?song wdt:P136 ?genre }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    """.replace("<$ARTIST_LABEL$>", artist)))

get_artist_songs_from_wikidata("Adele").head(5)

Unnamed: 0,song,songLabel,artistLabel,genreLabel,publicationDate
0,http://www.wikidata.org/entity/Q326790,Skyfall,Adele,orchestral pop,
1,http://www.wikidata.org/entity/Q57786072,Best For Last,Adele,,
2,http://www.wikidata.org/entity/Q1107851,Cold Shoulder,Adele,soul music,2008-04-21T00:00:00Z
3,http://www.wikidata.org/entity/Q57797127,First Love,Adele,,
4,http://www.wikidata.org/entity/Q170165,Rolling in the Deep,Adele,power metal,2011-01-01T00:00:00Z


In [131]:
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
import time

done = {"value": 0}
cl = Lock()
artists = df.artist.unique()

def job(artist):
    print(f"Done {done['value']} / {len(artists)}, doing {artist}")
    result = None
    while result is None:
        try:
            result = get_artist_songs_from_wikidata(artist)
        except Exception as e:
            print(f"{e.__class__.__name__}: {e} @ {artist}, retrying...")
        time.sleep(5)
    with cl:
        done["value"] += 1
    return result

with ThreadPoolExecutor(4) as tp:
    wdf = pd.concat(tp.map(job, artists))

Done 0 / 2944, doing 'Til Tuesday
Done 0 / 2944, doing 070 Shake
Done 0 / 2944, doing 100 gecs
Done 0 / 2944, doing 10cc
Done 1 / 2944, doing 112
Done 2 / 2944, doing 2 UnlimitedDone 3 / 2944, doing 20syl

Done 4 / 2944, doing 24kGoldn
Done 5 / 2944, doing 2Pac
Done 6 / 2944, doing 2pac
Done 7 / 2944, doing 3 Doors Down
Done 8 / 2944, doing 30 Seconds To Mars
Done 9 / 2944, doing 311
Done 10 / 2944, doing 38 Special
Done 11 / 2944, doing 3LAU
Done 12 / 2944, doing 4 Non Blondes
Done 13 / 2944, doing 4 Strings
Done 14 / 2944, doing 4minute
Done 15 / 2944, doing 5 Seconds Of Summer
Done 16 / 2944, doing 50 Cent
Done 17 / 2944, doing 65Daysofstatic
Done 18 / 2944, doing 808 State
Done 19 / 2944, doing 98 Degrees
Done 20 / 2944, doing 9th Wonder
Done 21 / 2944, doing A Boogie Wit Da Hoodie
Done 22 / 2944, doing A Great Big World
Done 23 / 2944, doing A Perfect Circle
Done 24 / 2944, doing A Sunny Day in Glasgow
Done 25 / 2944, doing A Tribe Called Quest
Done 26 / 2944, doing A-ha
Done 27 /

Done 238 / 2944, doing Barbra Streisand
Done 239 / 2944, doing Barenaked Ladies
Done 240 / 2944, doing Barry Manilow
Done 241 / 2944, doing Basement Jaxx
Done 242 / 2944, doing Basshunter
Done 243 / 2944, doing Bastille
Done 244 / 2944, doing Basto
Done 245 / 2944, doing Bat For Lashes
Done 246 / 2944, doing Baths
Done 247 / 2944, doing Bayside
Done 248 / 2944, doing Beach Bunny
Done 249 / 2944, doing Beach Fossils
Done 250 / 2944, doing Beach House
Done 251 / 2944, doing Bear McCreary
Done 252 / 2944, doing Beast
Done 253 / 2944, doing Bebe Rexha
Done 254 / 2944, doing Beck
Done 255 / 2944, doing Becky G
Done 256 / 2944, doing Bee Gees
Done 257 / 2944, doing Bela Bartok
Done 258 / 2944, doing Belchior
Done 259 / 2944, doing Belinda Carlisle
Done 260 / 2944, doing Bellini
Done 261 / 2944, doing Ben Folds
Done 262 / 2944, doing Ben Folds Five
Done 263 / 2944, doing Ben Harper
Done 264 / 2944, doing Ben Lee
Done 265 / 2944, doing Ben Prunty
Done 266 / 2944, doing Ben Rector
Done 267 / 29

Done 473 / 2944, doing Change
Done 474 / 2944, doing Charles Gounod
Done 475 / 2944, doing Charles Wright - The Watts 103rd Street Rhythm Ban
Done 476 / 2944, doing Charles-Valentin Alkan
Done 477 / 2944, doing Charley Patton
Done 478 / 2944, doing Charli XCX
Done 479 / 2944, doing Charlie Parker
Done 480 / 2944, doing Charlie Puth
Done 481 / 2944, doing Charlotte Day Wilson
Done 482 / 2944, doing Charlotte Gainsbourg
Done 483 / 2944, doing Charly Bliss
Done 484 / 2944, doing Chase Rice
Done 485 / 2944, doing Cheap Trick
Done 486 / 2944, doing Chef'Special
Done 487 / 2944, doing Cher
Done 488 / 2944, doing Cher Lloyd
Done 489 / 2944, doing Cheryl Cole
Done 490 / 2944, doing Chester See
Done 491 / 2944, doing Chet Faker
Done 492 / 2944, doing Chevelle
Done 493 / 2944, doing Chic
Done 494 / 2944, doing Chicago
Done 495 / 2944, doing Chico Buarque
Done 496 / 2944, doing Chiddy Bang
Done 497 / 2944, doing Chief Keef
Done 498 / 2944, doing Childish Gambino
Done 499 / 2944, doing Chipzel
Don

Done 705 / 2944, doing Dirty Loops
Done 706 / 2944, doing Dirty Projectors
Done 707 / 2944, doing Dirty South
Done 708 / 2944, doing Dirtyloud
Done 709 / 2944, doing Disasterpeace
Done 710 / 2944, doing Disclosure
Done 711 / 2944, doing Dispatch
Done 712 / 2944, doing Disturbed
Done 713 / 2944, doing Divine
Done 714 / 2944, doing Dixie Chicks
Done 715 / 2944, doing Dizzy Wright
Done 716 / 2944, doing Dj Sammy
Done 717 / 2944, doing Django Django
Done 718 / 2944, doing Dmitri Shostakovich
Done 719 / 2944, doing Doja Cat
Done 720 / 2944, doing Dolly Parton
Done 721 / 2944, doing Dominic Fike
Done 722 / 2944, doing Don Diablo
Done 723 / 2944, doing Don Henley
Done 724 / 2944, doing Don McLean
Done 725 / 2944, doing Donna Lewis
Done 726 / 2944, doing Donna Summer
Done 727 / 2944, doing Donovan
Done 728 / 2944, doing Doss
Done 729 / 2944, doing Dover
Done 730 / 2944, doing Doves
Done 731 / 2944, doing Dragonette
Done 732 / 2944, doing Dragonforce
Done 733 / 2944, doing Drake
Done 734 / 2944

Done 937 / 2944, doing Franz Schubert
Done 938 / 2944, doing Freddie Hubbard
Done 939 / 2944, doing Frederic Chopin
Done 940 / 2944, doing Free
Done 941 / 2944, doing Freezepop
Done 942 / 2944, doing French Montana
Done 943 / 2944, doing Friendly Fires
Done 944 / 2944, doing Frou Frou
Done 945 / 2944, doing Fuel
Done 946 / 2944, doing Fun
Done 947 / 2944, doing Fusq
Done 948 / 2944, doing Future
Done 949 / 2944, doing Future Islands
Done 950 / 2944, doing G-Eazy
Done 951 / 2944, doing GLOC-9
Done 952 / 2944, doing Gabbie Hanna
Done 953 / 2944, doing Gabby Barrett
Done 954 / 2944, doing Gabriel Faure
Done 955 / 2944, doing Gabriel Rios
Done 956 / 2944, doing Gabriel Yared
Done 957 / 2944, doing Gabrielle Aplin
Done 958 / 2944, doing Gackt
Done 959 / 2944, doing Gal Costa
Done 960 / 2944, doing Galantis
Done 961 / 2944, doing Galavant
Done 962 / 2944, doing Galaxie 500
Done 963 / 2944, doing Game Freak
Done 964 / 2944, doing Gamma ray
Done 965 / 2944, doing Gammer
Done 966 / 2944, doing 

Done 1170 / 2944, doing J Dilla
Done 1171 / 2944, doing JAHKOY
Done 1172 / 2944, doing JAM Project
Done 1173 / 2944, doing JJ Lin
Done 1174 / 2944, doing JJ Project
Done 1175 / 2944, doing JJAMZ
Done 1176 / 2944, doing JP Cooper
Done 1177 / 2944, doing JT Music
Done 1178 / 2944, doing Jace Everett
Done 1179 / 2944, doing Jack Conte
Done 1180 / 2944, doing Jack Garratt
Done 1181 / 2944, doing Jack Harlow
Done 1182 / 2944, doing Jack Johnson
Done 1183 / 2944, doing Jack Stauber
Done 1184 / 2944, doing Jack U
Done 1185 / 2944, doing Jack Wall
Done 1186 / 2944, doing Jack White
Done 1187 / 2944, doing JackLNDN
Done 1188 / 2944, doing Jackson 5
Done 1189 / 2944, doing Jackson Browne
Done 1190 / 2944, doing Jai Paul
Done 1191 / 2944, doing Jain
Done 1192 / 2944, doing Jake Bugg
Done 1193 / 2944, doing Jake Kaufman
Done 1194 / 2944, doing Jakob Liedholm
Done 1195 / 2944, doing James Arthur
Done 1196 / 2944, doing James Bay
Done 1197 / 2944, doing James Blake
Done 1198 / 2944, doing James Blun

Done 1398 / 2944, doing Kavinsky
Done 1399 / 2944, doing Keane
Done 1400 / 2944, doing Kehlani
Done 1401 / 2944, doing Keith Moon
Done 1402 / 2944, doing Keith Sweat
Done 1403 / 2944, doing Kelela
Done 1404 / 2944, doing Kelis
Done 1405 / 2944, doing Kelly Bailey
Done 1406 / 2944, doing Kelly Clarkson
Done 1407 / 2944, doing Kelly Rowland
Done 1408 / 2944, doing Kelsea Ballerini
Done 1409 / 2944, doing Ken Ashcorp
Done 1410 / 2944, doing Ken Hirai
Done 1411 / 2944, doing Kendrick Lamar
Done 1412 / 2944, doing Kenji Kawai
Done 1413 / 2944, doing Kenny Chesney
Done 1414 / 2944, doing Kenny Loggins
Done 1415 / 2944, doing Kenny Rogers
Done 1416 / 2944, doing Kent
Done 1417 / 2944, doing Keri Hilson
Done 1418 / 2944, doing Kero Kero Bonito
Done 1419 / 2944, doing Kesha
Done 1420 / 2944, doing Kev Brown
Done 1421 / 2944, doing Kevin Penkin
Done 1422 / 2944, doing Khalid
Done 1423 / 2944, doing Kiana Lede - Ur Best Friend
Done 1424 / 2944, doing Kid Cudi
Done 1425 / 2944, doing Kid Rock
Done

Done 1630 / 2944, doing Madness
Done 1631 / 2944, doing Madonna
Done 1632 / 2944, doing Madvillain
Done 1633 / 2944, doing Magdalena Bay
Done 1634 / 2944, doing Magic Man
Done 1635 / 2944, doing Maisie Peters
Done 1636 / 2944, doing Major Lazer
Done 1637 / 2944, doing Mako
Done 1638 / 2944, doing Mallu Magalhaes
Done 1639 / 2944, doing Maluma
Done 1640 / 2944, doing Manchester Orchestra
Done 1641 / 2944, doing Mandy Moore
Done 1642 / 2944, doing Manfred Mann
Done 1643 / 2944, doing Manian
Done 1644 / 2944, doing Manic Street Preachers
Done 1645 / 2944, doing Manowar
Done 1646 / 2944, doing Manuel
Done 1647 / 2944, doing Maor Levi
Done 1648 / 2944, doing Marc Anthony
Done 1649 / 2944, doing Marc Cohn
Done 1650 / 2944, doing Marcus Warner
Done 1651 / 2944, doing Marek Grechuta
Done 1652 / 2944, doing Maren Morris
Done 1653 / 2944, doing Maria Bethania
Done 1654 / 2944, doing Mariah Carey
Done 1655 / 2944, doing Maribou State
Done 1656 / 2944, doing Marillion
Done 1657 / 2944, doing Maril

Done 1857 / 2944, doing Nena
Done 1858 / 2944, doing Neneh Cherry
Done 1859 / 2944, doing Neon Indian
Done 1860 / 2944, doing Neon Trees
Done 1861 / 2944, doing Nerf Herder
Done 1862 / 2944, doing Nero
Done 1863 / 2944, doing Netsky
Done 1864 / 2944, doing Neutral Milk Hotel
Done 1865 / 2944, doing New Found Glory
Done 1866 / 2944, doing New Kids On The Block
Done 1867 / 2944, doing New Kids on the Block
Done 1868 / 2944, doing New Order
Done 1869 / 2944, doing New Politics
Done 1870 / 2944, doing New Radicals
Done 1871 / 2944, doing Niall Horan
Done 1872 / 2944, doing Niccolo Paganini
Done 1873 / 2944, doing Nick Cave and the Bad Seeds
Done 1874 / 2944, doing Nick Drake
Done 1875 / 2944, doing Nick Jonas
Done 1876 / 2944, doing Nickelback
Done 1877 / 2944, doing Nicki Minaj
Done 1878 / 2944, doing Nicky Romero
Done 1879 / 2944, doing Nico
Done 1880 / 2944, doing Nico Touches the Walls
Done 1881 / 2944, doing Nicola Conte 
Done 1882 / 2944, doing Nicolay
Done 1883 / 2944, doing Nicole 

Done 2083 / 2944, doing Rammstein
Done 2084 / 2944, doing Rancid
Done 2085 / 2944, doing Randy Crawford
Done 2086 / 2944, doing Randy Newman
Done 2087 / 2944, doing Rank 1
Done 2088 / 2944, doing Rascal Flatts
Done 2089 / 2944, doing Ratatat
Done 2090 / 2944, doing Rauw Alejandro
Done 2091 / 2944, doing Ray Charles
Done 2092 / 2944, doing Ray Conniff
Done 2093 / 2944, doing Razihel
Done 2094 / 2944, doing Real Estate
Done 2095 / 2944, doing Real McCoy
Done 2096 / 2944, doing Really Slow Motion
Done 2097 / 2944, doing Reba McEntire
Done 2098 / 2944, doing Rebecca Black
Done 2099 / 2944, doing Red Hot Chili Peppers
Done 2100 / 2944, doing Red Velvet
Done 2101 / 2944, doing Red Vox
Done 2102 / 2944, doing Rednex
Done 2103 / 2944, doing Reel Big Fish
Done 2104 / 2944, doing Rejjie Snow
Done 2105 / 2944, doing Reks
Done 2106 / 2944, doing Relient K
Done 2107 / 2944, doing Remioromen
Done 2108 / 2944, doing Rex Orange County
Done 2109 / 2944, doing Rezonate
Done 2110 / 2944, doing Rhapsody
D

Done 2315 / 2944, doing Smallpools
Done 2316 / 2944, doing Smash Mouth
Done 2317 / 2944, doing Snail's House
Done 2318 / 2944, doing Snakehips
Done 2319 / 2944, doing Snakeships
Done 2320 / 2944, doing Snarky Puppy
Done 2321 / 2944, doing Snoh Aalegra
Done 2322 / 2944, doing Snook
Done 2323 / 2944, doing Snoop Dogg
Done 2324 / 2944, doing Snow Patrol
Done 2325 / 2944, doing Social Distortion
Done 2326 / 2944, doing Sofa Surfers
Done 2327 / 2944, doing Soft Cell
Done 2328 / 2944, doing Softengine
Done 2329 / 2944, doing Soilwork
Done 2330 / 2944, doing Solange
Done 2331 / 2944, doing Solar Fields
Done 2332 / 2944, doing Solarstone
Done 2333 / 2944, doing Sondre Lerche
Done 2334 / 2944, doing Sonic Youth
Done 2335 / 2944, doing Sonique
Done 2336 / 2944, doing Sonny Alven
Done 2337 / 2944, doing Sons of Kemet
Done 2338 / 2944, doing Soul Asylum
Done 2339 / 2944, doing SoulEye
Done 2340 / 2944, doing Souls of Mischief
Done 2341 / 2944, doing Soundgarden
Done 2342 / 2944, doing Space Dimens

Done 2536 / 2944, doing The Gap Band
Done 2537 / 2944, doing The Gaslight Anthem
Done 2538 / 2944, doing The Glitch Mob
Done 2539 / 2944, doing The Go-Go's
Done 2540 / 2944, doing The Gregory Brothers
Done 2541 / 2944, doing The Griswolds
Done 2542 / 2944, doing The Guess Who
Done 2543 / 2944, doing The Head and The Heart
Done 2544 / 2944, doing The Heavy
Done 2545 / 2944, doing The Hollies
Done 2546 / 2944, doing The Honeysticks
Done 2547 / 2944, doing The Horrible Crowes
Done 2548 / 2944, doing The Hotelier
Done 2549 / 2944, doing The Human League
Done 2550 / 2944, doing The Hush Sound
Done 2551 / 2944, doing The Ink Spots
Done 2552 / 2944, doing The Internet
Done 2553 / 2944, doing The Isley Brothers
Done 2554 / 2944, doing The Jam
Done 2555 / 2944, doing The Japanese House
Done 2556 / 2944, doing The Kid LAROI
Done 2557 / 2944, doing The Kid Laroi
Done 2558 / 2944, doing The Killers
Done 2559 / 2944, doing The Kills
Done 2560 / 2944, doing The Kinks
Done 2561 / 2944, doing The Knac

Done 2753 / 2944, doing Two Door CInema Club
Done 2754 / 2944, doing Two Door Cinema Club
Done 2755 / 2944, doing Two Feet
Done 2756 / 2944, doing Two Steps From Hell
Done 2757 / 2944, doing Two Steps from Hell
Done 2758 / 2944, doing TwoThirds
Done 2759 / 2944, doing TyDi
Done 2760 / 2944, doing Tycho
Done 2761 / 2944, doing Tyler Bates
Done 2762 / 2944, doing Tyler Shaw
Done 2763 / 2944, doing Tyr
Done 2764 / 2944, doing U2
Done 2765 / 2944, doing UB40
Done 2766 / 2944, doing UFO
Done 2767 / 2944, doing UNKLE
Done 2768 / 2944, doing Uffie
Done 2769 / 2944, doing Ultravox
Done 2770 / 2944, doing Underscores
Done 2771 / 2944, doing Underworld
Done 2772 / 2944, doing Unknown Brain
Done 2773 / 2944, doing Unknown Mortal Orchestra
Done 2774 / 2944, doing Unwound
Done 2775 / 2944, doing Unwritten Law
Done 2776 / 2944, doing Usher
Done 2777 / 2944, doing VULFPECK
Done 2778 / 2944, doing Valve
Done 2779 / 2944, doing Vampire Weekend
Done 2780 / 2944, doing Van Halen
Done 2781 / 2944, doing V

In [132]:
wdf

Unnamed: 0,song,publicationDate,songLabel,artistLabel,genreLabel
0,http://www.wikidata.org/entity/Q99126904,2019-05-29T00:00:00Z,money machine,100 gecs,
1,http://www.wikidata.org/entity/Q93160306,2020-02-24T00:00:00Z,Ringtone (Remix),100 gecs,glitch hop
0,http://www.wikidata.org/entity/Q2525537,,From Rochdale to Ocho Rios,10cc,rock music
1,http://www.wikidata.org/entity/Q1823414,,Flying Junk,10cc,
2,http://www.wikidata.org/entity/Q14942376,,Rock 'n' Roll Lullaby,10cc,rock music
...,...,...,...,...,...
13,http://www.wikidata.org/entity/Q25212174,2016-05-27T00:00:00Z,Snowcone,deadmau5,
0,http://www.wikidata.org/entity/Q86742896,2020-02-26T00:00:00Z,Q86742896,fripSide,J-pop
1,http://www.wikidata.org/entity/Q86742896,2020-02-26T00:00:00Z,Q86742896,fripSide,anisong
0,http://www.wikidata.org/entity/Q85814726,2018-11-21T00:00:00Z,We Fell in Love in October,girl in red,


In [133]:
# wdf.to_csv(wdfn, index=False)

In [15]:
wdf = pd.read_csv(wdfn)
wdf

Unnamed: 0,song,publicationDate,songLabel,artistLabel,genreLabel
0,http://www.wikidata.org/entity/Q99126904,2019-05-29T00:00:00Z,money machine,100 gecs,
1,http://www.wikidata.org/entity/Q93160306,2020-02-24T00:00:00Z,Ringtone (Remix),100 gecs,glitch hop
2,http://www.wikidata.org/entity/Q2525537,,From Rochdale to Ocho Rios,10cc,rock music
3,http://www.wikidata.org/entity/Q1823414,,Flying Junk,10cc,
4,http://www.wikidata.org/entity/Q14942376,,Rock 'n' Roll Lullaby,10cc,rock music
...,...,...,...,...,...
25962,http://www.wikidata.org/entity/Q25212174,2016-05-27T00:00:00Z,Snowcone,deadmau5,
25963,http://www.wikidata.org/entity/Q86742896,2020-02-26T00:00:00Z,Q86742896,fripSide,J-pop
25964,http://www.wikidata.org/entity/Q86742896,2020-02-26T00:00:00Z,Q86742896,fripSide,anisong
25965,http://www.wikidata.org/entity/Q85814726,2018-11-21T00:00:00Z,We Fell in Love in October,girl in red,


In [16]:
def map_genres(tdf):
    lst = tdf.unique().tolist()
    return None if pd.isna(lst[0]) else lst
    
awdf = wdf.groupby("song").aggregate({
    "songLabel": "first",
    "artistLabel": "first",
    "genreLabel": map_genres,
    "publicationDate": "first",
})
awdf = awdf.reset_index()
# filter songs without valid label
awdf = awdf[~awdf.apply(lambda row: row.song.endswith(row.songLabel), axis=1)]
awdf.publicationDate = pd.to_datetime(awdf.publicationDate, errors="coerce")
awdf

Unnamed: 0,song,songLabel,artistLabel,genreLabel,publicationDate
0,http://www.wikidata.org/entity/Q1001245,Fight from the Inside,Queen,,1977-01-01 00:00:00+00:00
1,http://www.wikidata.org/entity/Q1001441,"Serenade no. 3 in D, ""Andretter""",Wolfgang Amadeus Mozart,,1773-07-01 00:00:00+00:00
2,http://www.wikidata.org/entity/Q100144704,Lazy Day,John Cale,,2020-10-06 00:00:00+00:00
3,http://www.wikidata.org/entity/Q1001904,Flaming,Pink Floyd,[psychedelic rock],1967-08-04 00:00:00+00:00
4,http://www.wikidata.org/entity/Q1001928,Flash,Brian May,[rock music],1980-11-24 00:00:00+00:00
...,...,...,...,...,...
22804,http://www.wikidata.org/entity/Q99947370,Bet You Wanna,Blackpink,"[rap, K-pop]",2020-10-02 00:00:00+00:00
22805,http://www.wikidata.org/entity/Q99954395,Der traurige Mönch,Franz Liszt,,NaT
22806,http://www.wikidata.org/entity/Q999593,Unwritten,Natasha Bedingfield,[pop music],2004-11-29 00:00:00+00:00
22807,http://www.wikidata.org/entity/Q99980407,FRANCHISE,Travis Scott,,2020-09-25 00:00:00+00:00


In [17]:
awdf.describe()

  """Entry point for launching an IPython kernel.


Unnamed: 0,song,songLabel,artistLabel,genreLabel,publicationDate
count,21362,21362,21362,11619,14651
unique,21362,19193,1675,1076,3905
top,http://www.wikidata.org/entity/Q7063774,Symphony No. 1,Johann Sebastian Bach,[rock music],1967-01-01 00:00:00+00:00
freq,1,16,555,1234,260
first,,,,,1683-01-01 00:00:00+00:00
last,,,,,2030-12-05 00:00:00+00:00


In [18]:
awdf.genreLabel.explode().value_counts().head(50)

rock music                1287
pop music                 1158
country music              537
pop rock                   433
alternative rock           391
hip hop music              343
hard rock                  343
synth-pop                  292
folk rock                  287
progressive rock           259
contemporary R&B           232
dance-pop                  202
popular music              188
rock and roll              177
heavy metal                173
soul music                 171
rhythm and blues           159
soft rock                  154
psychedelic rock           153
jazz                       151
electropop                 123
new wave                   120
blues rock                 117
protest song               112
indie rock                  89
opera                       87
art rock                    86
progressive metal           80
bebop                       78
blues                       78
country rock                78
grunge                      78
house mu

In [19]:
# analyze matching with Hooktheory dataset
def match_sets(s1: set, s2: set, s1name: str = "s1", s2name: str = "s2"):
    return pd.Series({
        f"{s1name}": len(s1),
        f"{s2name}": len(s2),
        f"{s1name} & {s2name}": len(s1 & s2),
        f"{s1name} - {s2name}": len(s1 - s2),
        f"{s2name} - {s1name}": len(s2 - s1),
    })
    
ht_artists = set(map(name_to_iri_name, df.artist.unique()))
wd_artists = set(map(name_to_iri_name, wdf.artistLabel.unique()))
match_sets(ht_artists, wd_artists, "ht", "wd")

ht         2939
wd         1705
ht & wd    1705
ht - wd    1234
wd - ht       0
dtype: int64

In [21]:
wg = Graph()
wg.parse("../ontologies/chord_progressions.owl", format="turtle")
wg.bind("chp", chp)

In [22]:
for i, (artist, songs) in enumerate(awdf.sort_values(["artistLabel", "song"]).groupby("artistLabel")):
    author = artist_name_to_node(wg, artist)
    for row in songs.itertuples():
        song = song_name_to_node(wg, author, get_song_name(artist, quote_plus(row.songLabel)), row.songLabel)
        if not pd.isna(row.publicationDate):
            wg.add((song, chp.hasPublicationDate, Literal(row.publicationDate)))
        if row.genreLabel:
            for genre in row.genreLabel:    
                wg.add((song, chp.hasGenre, Literal(genre)))

In [23]:
len(wg)

117001

In [24]:
wg.serialize(destination="../ontologies/chord-progressions-populated-wikidata.owl", format="turtle")

<Graph identifier=N0c186de8bee546bf8f2836f88662cfa8 (<class 'rdflib.graph.Graph'>)>

In [25]:
from rdflib.void import generateVoID

In [31]:
vfn = "../ontologies/chord-progressions-populated-full.owl"
vg = Graph()

# vg.parse(vfn, format="turtle")
vg = g | wg 

vg.bind("chp", chp)
vg.bind("chpv", chpv)

In [32]:
ovg = generateVoID(vg, dataset=chpv.Dataset)[0]

In [33]:
ovfn = vfn[:-4] + "-void.owl"
ovg.bind("chp", chp)
ovg.bind("chpv", chpv)
ovg.serialize(ovfn, format="turtle")

<Graph identifier=N720a864091854082a6fc6273caafd917 (<class 'rdflib.graph.Graph'>)>

In [34]:
vg.serialize(vfn, format="turtle")

<Graph identifier=N7f8ffedebb3a4124821e37ec008a2d50 (<class 'rdflib.graph.Graph'>)>