forked from mhollingshead/billboard-hot-100
-
Notifications
You must be signed in to change notification settings - Fork 0
/
h100json2rdf.py
executable file
·61 lines (46 loc) · 1.93 KB
/
h100json2rdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
# Create Turtle RDF from the JSON Billboard Hot 100 JSON data at
# https://github.com/mhollingshead/billboard-hot-100.
# run as
# ./h100json2rdf.py ../all.json
import json
import sys
import urllib.parse
if (len(sys.argv) < 2):
print("Enter an input filename as an argument.")
exit()
inputFile = sys.argv[1]
jsonBlock = ""
with open(inputFile) as fp:
for line in fp:
jsonBlock += line
jsonData = json.loads(jsonBlock)
print('@prefix h1: <http://rdfdata.org/hot100#> .')
print('@prefix schema: <http://schema.org/> .')
print('@prefix dc: <http://purl.org/dc/elements/1.1/> .')
print('@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .')
print('@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .')
print()
for week in jsonData:
chartDate = week["date"]
for recording in week["data"]:
artistName =recording["artist"]
artistName = artistName.replace('"','\\"')
songName = recording["song"]
songName = songName.replace('"','\\"')
# ID of song is artistURIStub + song because two different songs can
# have the same title, e.g. Taylor Swift's and Banarama's "Cruel Summer"
songURI = 'h1:' + urllib.parse.quote(artistName + songName).replace('%20','')
artistURI = 'h1:' + urllib.parse.quote(artistName).replace('%20','')
# Lose characters that screw up URI.
for c in ' &/.\"\'':
songURI = songURI.replace(c,'')
artistURI = artistURI.replace(c,'')
print(artistURI + ' a h1:MusicalArtist ; ')
print(' rdfs:label "' + artistName + '"@en .\n')
print(songURI + ' a schema:MusicRecording;')
print(' schema:byArtist ' + artistURI + ';')
print(' dc:title ' + '"' + songName + '";')
print(' h1:charted ' + '"' + chartDate + '"^^xsd:date {| ')
print(' h1:position ' + str(recording['this_week']))
print('|}.\n')