# IPRO construction

In [1]:
import owlready2 as owl
import types
import json
import re
from pprint import pprint as pp
from num2words import num2words
import networkx as nx
from unidecode import unidecode



## Load base Ontology
handcrafted

In [2]:
ipro = owl.get_ontology("file://ipro.owl").load()
print('#####', ipro)
print(list(ipro.classes()))
print(list(ipro.properties()),'\n')

list_onto = ipro.imported_ontologies[0]
print('#####', list_onto)
print(list(list_onto.classes()))
print(list(list_onto.properties()),'\n')

bag_onto = list_onto.imported_ontologies[0]
print('#####', bag_onto)
print(list(bag_onto.classes()))
print(list(bag_onto.properties()),'\n')

sequence_onto = list_onto.imported_ontologies[1]
print('#####', sequence_onto)
print(list(sequence_onto.classes()))
print(list(sequence_onto.properties()),'\n')

time = ipro.get_namespace('http://www.w3.org/2006/time')
print(time)
print(time.DurationDescription)

##### get_ontology("http://ipro-semantic.com#")
[ipro.Release, ipro.Album, time.DurationDescription, ipro.Band, ipro.Bass, ipro.StringsPlucked, ipro.Bassist, ipro.Musician, ipro.BrassInstrument, ipro.MusicalInstrument, ipro.Discography, ipro.Drummer, ipro.Drums, ipro.PercussionInstrument, ipro.Effects, ipro.Genre, ipro.Guitar, ipro.Guitarist, ipro.Keyboard, ipro.Keyboardist, ipro.Organ, ipro.Participation, ipro.Percussions, ipro.Piano, ipro.ProductionRole, ipro.Singer, ipro.Strings, ipro.StringsDrawn, ipro.Synthesizer, ipro.Track, ipro.Vocals, ipro.WindInstrument]
[time.minutes, time.seconds, ipro.ofAlbum, time.hasDuration, 1.1.creator, 1.1.date, 1.1.description, 1.1.title, vann.preferredNamespacePrefix, vann.preferredNamespaceUri] 

##### get_ontology("http://www.ontologydesignpatterns.org/cp/owl/list.owl#")
[list.List, list.ListItem]
[list.lastItemOf, list.hasLastItem, list.firstItemOf, list.nextItem, list.previousItem, list.hasFirstItem] 

##### get_ontology("http://www.ontologydesi

### Helper function to generate identifiers

In [3]:
def str2identifier(s):
    s_ = unidecode(s)
    s_ = re.sub(r'[^a-zA-Z0-9\'\-_ ]', r'', s_)
    s_ = re.sub(r'[\'\- ]+', r'_', s_)
    assert '__' not in s_
    return s_.strip('_')

## Insert bands and discographies

In [4]:
with open('wikidata/artists.json', 'r') as f:
    wikidata_artists = json.load(f)

with open('discogs/artists.json', 'r') as f:
    tmp = json.load(f)
    discogs_artists = {e['discogsID']: e['artist'] for e in tmp}

with open('progarchives/biographies.json', 'r') as f:
    progarchives_bios = json.load(f)

with ipro:
    class wikidataIri(owl.Thing >> str, owl.FunctionalProperty, owl.InverseFunctionalProperty):
        label = [owl.locstr('Wikidata IRI', lang = 'en')]
        comment = [owl.locstr('Link to Wikidata IRI', lang = 'en')]
    class discogsId(owl.Thing >> str, owl.FunctionalProperty, owl.InverseFunctionalProperty):
        label = [owl.locstr('Discogs ID', lang = 'en')]
        comment = [owl.locstr('Link to Discogs ID', lang = 'en')]
    class progarchivesId(owl.Thing >> str, owl.FunctionalProperty, owl.InverseFunctionalProperty):
        label = [owl.locstr('Progarchives ID', lang = 'en')]
        comment = [owl.locstr('Link to Progarchives ID', lang = 'en')]
    class imageUrl(owl.Thing >> str):
        label = [owl.locstr('Image URL', lang = 'en')]
        comment = [owl.locstr('Link to image URL', lang = 'en')]

    class activeSinceYear(ipro.Band >> int, owl.FunctionalProperty):
        label = [owl.locstr('Active since year', lang = 'en')]
        comment = [owl.locstr('Indicates the year since the band was active', lang = 'en')]
    class discography(ipro.Band >> ipro.Discography, owl.FunctionalProperty):
        label = [owl.locstr('Discography', lang = 'en')]
        comment = [owl.locstr('Link to the discography of the band', lang = 'en')]
    class discographyOf(ipro.Discography >> ipro.Band, owl.FunctionalProperty):
        inverse_property = ipro.discography
        label = [owl.locstr('Discography of', lang = 'en')]
        comment = [owl.locstr('Link to the band that has the discography', lang = 'en')]

    for artist in wikidata_artists:
        band = ipro.Band(str2identifier(artist['itemLabel']['value']))
        band.wikidataIri = artist['item']['value']
        band.discogsId = artist['discogsId']['value']
        band.progarchivesId = artist['progarchivesId']['value']
        band.comment = [owl.locstr(progarchives_bios[band.progarchivesId], lang = 'en')]
        band.label = [owl.locstr(discogs_artists[band.discogsId]['name'], lang = 'en')]
        band.activeSinceYear = int(artist['inizio']['value'].split('-')[0])
        band.imageUrl.append(discogs_artists[band.discogsId]['images'][0]['uri'])

        band.discography = ipro.Discography(str2identifier(artist['itemLabel']['value']) + '_discography')
        band.discography.label = [owl.locstr(f'Discography of {band.label[0]}', lang = 'en')]
        band.discography.comment = [owl.locstr(f'Discography of Italian progressive rock band {band.label[0]}', lang = 'en')]

print(list(ipro.Band.instances())[:10])

[ipro.Acqua_Fragile, ipro.Alphataurus, ipro.Analogy, ipro.Area, ipro.Arti_e_Mestieri, ipro.Banco_del_Mutuo_Soccorso, ipro.Blocco_Mentale, ipro.Cervello, ipro.De_De_Lind, ipro.Formula_3]


In [5]:
# print all info of Acqua Fragile
print(ipro.Acqua_Fragile)
print(ipro.Acqua_Fragile.comment)
print(ipro.Acqua_Fragile.comment[0].lang)
print(ipro.Acqua_Fragile.label)
print(ipro.Acqua_Fragile.label[0].lang)
print(ipro.Acqua_Fragile.wikidataIri)
print(ipro.Acqua_Fragile.discogsId)
print(ipro.Acqua_Fragile.progarchivesId)
print(ipro.Acqua_Fragile.activeSinceYear)
print(ipro.Acqua_Fragile.imageUrl)
print(ipro.Acqua_Fragile.discography)

ipro.Acqua_Fragile
['Founded in Parma, Italy in 1971 - Disbanded in 1975 - Reformed 2004-2006 and in 2017 as "Acqua Fragile Project".\nThey are perhaps best known for the band that were to supply PFM\'s English singing vocalist Bernado Lanzetti, making his debut with them on Chocolate Kings.\nLanzetti with guitarist Gino Campanini and drummer Piero Canavera had played together in Gli Immortali. Joined on keyboards by Maurizio Mori and bassist Franz Dondi, formerly of I Moschettieri, who released a single in 1967, they shortly changed their name to ACQUA FRAGILE.\nIt was to be two years before their eponymous debut album saw the light of day, due to difficulty in finding a record company that would allow them to release it with English sung lyrics. Musically they bore a resemblance to GENESIS and GENTLE GIANT, with harmony vocals not unlike CROSBY, STILLS, NASH AND YOUNG, no doubt influenced by the time Lanzetti spent in the USA. Lanzetti\'s vocals have a similar feel to Roger Chapman o

## Insert genres

In [6]:
with open('discogs/genres.json', 'r') as f:
    genres = json.load(f)

with open('dbpedia/genres2dbpedia_iri.json', 'r') as f:
    genres2dbpedia_iri = json.load(f)

G = nx.read_edgelist('dbpedia/genre.edgelist')

with ipro:
    class hasSubgenre(ipro.Genre >> ipro.Genre):
        label = [owl.locstr('Has subgenre', lang = 'en')]
        comment = [owl.locstr('Link to subgenre', lang = 'en')]
    class influences(ipro.Genre >> ipro.Genre, owl.TransitiveProperty):
        label = [owl.locstr('Influences', lang = 'en')]
        comment = [owl.locstr('Link to influenced genre', lang = 'en')]
    class dbpediaIri(ipro.Genre >> str, owl.FunctionalProperty, owl.InverseFunctionalProperty):
        label = [owl.locstr('DBpedia IRI', lang = 'en')]
        comment = [owl.locstr('Link to DBpedia IRI', lang = 'en')]

    for gen, comment in genres.items():
        genre_name = gen.replace('+',' ').title()
        genre = ipro.Genre(str2identifier(genre_name))
        genre.label = [owl.locstr(genre_name, lang = 'en')]
        genre.comment = [owl.locstr(comment, lang = 'en')]
        genre.dbpediaIri = genres2dbpedia_iri[gen]

    for i,j,prop in G.edges.data('property'):
        i_name = str2identifier(i.replace('+',' ').title())
        j_name = str2identifier(j.replace('+',' ').title())
        if prop == 'influences':
            ipro[i_name].influences.append(ipro[j_name])
        elif prop == 'hasSubgenre':
            ipro[i_name].hasSubgenre.append(ipro[j_name])
        else:
            raise ValueError(f'Unknown property {prop}')


## Manual fixes

In [7]:
#################################
#         MANUAL FIXES          #
#################################

# with open('discogs/releases.json', 'r') as f:
#     discogs_id2releases = {e['discogsID']: e['releases'] for e in json.load(f)}

# with open('discogs/release_details.json', 'r') as f:
#     discogs_id2release_details = {e['releaseID']: e['master'] for e in json.load(f)}

# with open('progarchives/albums.json', 'r') as f:
#     progarchives_id2albums = json.load(f)

# with open('progarchives/map2discogs_id.json', 'r') as f:
#     progarchives2discogs_id = json.load(f)

# for band in ipro.Band.instances():
#     for progarchives_album in progarchives_id2albums[band.progarchivesId]:
#         if progarchives_album['progarchivesId'] in progarchives2discogs_id:
#             continue
#         try:
#             discogs_album = [rel for rel in discogs_id2releases[band.discogsId] if str2identifier(progarchives_album['title']) in str2identifier(rel['title'])][0]
#             progarchives2discogs_id[progarchives_album['progarchivesId']] = discogs_album['id']
#         except IndexError:
#             progarchives2discogs_id[progarchives_album['progarchivesId']] = f'FIX ##################### {band.label[0]} - {progarchives_album["title"]}'

# with open('progarchives/map2discogs_id.json', 'w+') as f:
#     json.dump(progarchives2discogs_id, f)

## Insert instruments

In [8]:
with open('progarchives/instruments.json', 'r', encoding='utf-8') as f:
    progarchives_instruments = {k.lower(): v for k,v in json.load(f).items()}

with open('progarchives/instruments_comment.json', 'r', encoding='utf-8') as f:
    progarchives_instruments_comment = {k.lower(): v for k,v in json.load(f).items()}

with ipro:
    for instrument_label, comment in progarchives_instruments_comment.items():
        category = progarchives_instruments[instrument_label]

        instrument = ipro[category](str2identifier(instrument_label.title() + ('_instrument' if category != 'ProductionRole' else '_role')))

        instrument.label = [owl.locstr(instrument_label.title(), lang = 'en')]
        instrument.comment = [owl.locstr(comment, lang = 'en')]

    for i,instr in enumerate(ipro.MusicalInstrument.instances()):
        print(instr)
        if i == 10:
            break

ipro.Backing_Vocals_instrument
ipro.Vocals_instrument
ipro.Minimoog_Synthesizer_instrument
ipro.Arp_2600_Synthesizer_instrument
ipro.Arp_Solina_Synth_instrument
ipro.Moog_Synthesizer_instrument
ipro.Ems_Synthesizer_instrument
ipro.Vcs_Synthesizer_instrument
ipro.Arp_Synthesizer_instrument
ipro.Eminent_Synthesizer_instrument
ipro.Synthesizer_instrument


In [9]:
with open('progarchives/map2discogs_id.json', 'r') as f:
    progarchives2discogs_id = json.load(f)

with open('discogs/release_details.json', 'r') as f:
    tmp = json.load(f)
    discogs_releases = {e['releaseID']: e['master'] for e in tmp}
    del tmp

with open('progarchives/albums.json', 'r') as f:
    progarchives_id2albums = json.load(f)

with open('progarchives/tracks.json', 'r') as f:
    progarchives_albumid2tracks = json.load(f)

with open('progarchives/members.json', 'r') as f:
    progarchives_members = json.load(f)

with ipro:
    class containsRelease(ipro.Discography >> ipro.Release, bag_onto.hasItem):
        label = [owl.locstr('Contains release', lang = 'en')]
        comment = [owl.locstr('Link to release of discography', lang = 'en')]
    class releaseOf(owl.ObjectProperty):
        inverse_property = ipro.containsRelease
        label = [owl.locstr('Release of', lang = 'en')]
        comment = [owl.locstr('Link to discography', lang = 'en')]
    class previousRelease(ipro.Release >> ipro.Release, list_onto.previousItem, owl.FunctionalProperty):
        label = [owl.locstr('Previous release', lang = 'en')]
        comment = [owl.locstr('Link to previous release of discography', lang = 'en')]
    class nextRelease(ipro.Release >> ipro.Release, list_onto.nextItem, owl.FunctionalProperty):
        inverse_property = ipro.previousRelease
        label = [owl.locstr('Next release', lang = 'en')]
        comment = [owl.locstr('Link to next release of discography', lang = 'en')]
    class releaseYear(ipro.Release >> int, owl.FunctionalProperty):
        label = [owl.locstr('Release year', lang = 'en')]
        comment = [owl.locstr('Year of publishment of the release', lang = 'en')]

    class hasGenre(ipro.Album >> ipro.Genre):
        label = [owl.locstr('Has genre', lang = 'en')]
        comment = [owl.locstr('Link to a genre of the album', lang = 'en')]

    class containsTrack(ipro.Album >> ipro.Track, bag_onto.hasItem):
        label = [owl.locstr('Contains track', lang = 'en')]
        comment = [owl.locstr('Link to a track of the album', lang = 'en')]
    class trackOf(owl.ObjectProperty):
        inverse_property = ipro.containsTrack
        label = [owl.locstr('Track of', lang = 'en')]
        comment = [owl.locstr('Link to album', lang = 'en')]
    class previousTrack(ipro.Track >> ipro.Track, list_onto.previousItem, owl.FunctionalProperty):
        label = [owl.locstr('Previous track', lang = 'en')]
        comment = [owl.locstr('Link to previous track of the album', lang = 'en')]
    class nextTrack(ipro.Track >> ipro.Track, list_onto.nextItem, owl.FunctionalProperty):
        inverse_property = ipro.previousTrack
        label = [owl.locstr('Next track', lang = 'en')]
        comment = [owl.locstr('Link to next track of the album', lang = 'en')]
    class trackNumber(ipro.Track >> int, owl.FunctionalProperty):
        label = [owl.locstr('Track number', lang = 'en')]
        comment = [owl.locstr('Track number in the album', lang = 'en')]

    class participatedIn(ipro.Musician >> ipro.Participation):
        label = [owl.locstr('Participated in', lang = 'en')]
        comment = [owl.locstr('Link to a participation of the musician', lang = 'en')]
    class withInstrument(ipro.Participation >> ipro.MusicalInstrument):
        label = [owl.locstr('With instrument', lang = 'en')]
        comment = [owl.locstr('Link to an instrument of the participation', lang = 'en')]
    class withProductionRole(ipro.Participation >> ipro.ProductionRole):
        label = [owl.locstr('With production role', lang = 'en')]
        comment = [owl.locstr('Link to a production role of the participation', lang = 'en')]
    class inAlbum(ipro.Participation >> ipro.Album, owl.FunctionalProperty):
        label = [owl.locstr('In album', lang = 'en')]
        comment = [owl.locstr('Link to the album of the participation', lang = 'en')]

    class hasMusician(ipro.Band >> ipro.Musician):
        label = [owl.locstr('Has musician', lang = 'en')]
        comment = [owl.locstr('Link to a musician of the band', lang = 'en')]
    class musicianOf(ipro.Musician >> ipro.Band):
        inverse_property = ipro.hasMusician
        label = [owl.locstr('Musician of', lang = 'en')]
        comment = [owl.locstr('Link to a band of the musician', lang = 'en')]

    ipro.Discography.equivalent_to.append(ipro.containsRelease.some(ipro.Release))

    for band in ipro.Band.instances():
        prev_release = None
        for i, rel in enumerate(progarchives_id2albums[band.progarchivesId]):
            album_name = discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['title']
            release = ipro.Release(str2identifier(album_name) + '_release')
            release.label = [owl.locstr(album_name + ' studio release', lang = 'en')]
            release.comment = [owl.locstr(f'{release.label[0]} is the {num2words(i+1, ordinal=True)} release of {band.label[0]}, published in {rel["year"]}', lang = 'en')]
            release.releaseYear = rel['year']
            band.discography.containsRelease.append(release)
            if prev_release:
                assert prev_release.releaseYear <= release.releaseYear, "Previous album release year is greater than current album release year"
                prev_release.nextRelease = release
            prev_release = release

            # insert album info
            album = ipro.Album(str2identifier(album_name) + '_album')
            album.label = [owl.locstr(album_name, lang = 'en')]
            album.comment = [owl.locstr(f'{album.label[0]} is the {num2words(i+1, ordinal=True)} album of {band.label[0]} released in {rel["year"]}', lang = 'en')]
            album.progarchivesId = rel['progarchivesId']
            album.discogsId = str(progarchives2discogs_id[rel['progarchivesId']])
            image_url = discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['images'][0]['uri']
            if image_url:
                album.imageUrl = [image_url]
            release.ofAlbum = album
            
            # insert album genres
            genres = discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['genres'] + discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['styles']
            for g in genres:
                g_name = str2identifier(g.title())
                if not ipro[g_name]:
                    print(g_name)
                album.hasGenre.append(ipro[g_name])
            
            # insert album tracks
            prev_track = None
            for i, tr in enumerate(progarchives_albumid2tracks[album.progarchivesId]['tracks']):
                
                track_name = str2identifier(tr[0]) + '_track'
                counter = 1
                while True:
                    if ipro[track_name]:  # exists
                        track_name = str2identifier(tr[0]) + f'_track{counter}'
                        counter += 1
                    else:  # not exists, OK
                        track = ipro.Track(track_name)
                        break
                
                track.label = [owl.locstr(tr[0], lang = 'en')]
                track.comment = [owl.locstr(f'{track.label[0]} is the {num2words(i+1, ordinal=True)} track of {album.label[0]} album', lang = 'en')]

                duration_track = time.DurationDescription(str2identifier(tr[0]) + '_track_duration')
                track.hasDuration.append(duration_track)
                duration_track.label = [owl.locstr(f'{track.label[0]} track duration', lang = 'en')]
                duration_track.comment = [owl.locstr(f'Duration of {track.label[0]} track', lang = 'en')]
                duration_track.minutes.append(int(tr[1].split(':')[0]))
                duration_track.seconds.append(int(tr[1].split(':')[1]))

                track.trackNumber = i+1
                album.containsTrack.append(track)

                if prev_track:
                    assert prev_track.trackNumber < track.trackNumber, "Previous track number is greater than current track number"
                    prev_track.nextTrack = track
                prev_track = track

                duration_album = time.DurationDescription(str2identifier(album_name) + '_album_duration')
                album.hasDuration.append(duration_album)
                duration_album.label = [owl.locstr(f'{album.label[0]} album duration', lang = 'en')]
                duration_album.comment = [owl.locstr(f'Duration of {album.label[0]} album', lang = 'en')]
                duration_album.minutes.append(int(progarchives_albumid2tracks[album.progarchivesId]['total_time'].split(':')[0]))
                duration_album.seconds.append(int(progarchives_albumid2tracks[album.progarchivesId]['total_time'].split(':')[1]))

                # insert musicians
                for musician_label, instruments in progarchives_members[band.progarchivesId][album.progarchivesId].items():
                    if not ipro[str2identifier(musician_label)]:
                        musician = ipro.Musician(str2identifier(musician_label))
                        musician.label = [owl.locstr(musician_label, lang = 'en')]
                    else:
                        musician = ipro[str2identifier(musician_label)]    

                    band.hasMusician.append(musician)
                    participation = ipro.Participation(musician.get_name() + '_' + album.get_name().replace('_album','') +'_participation')
                    participation.label = [owl.locstr(f'{musician.label[0]}\'s participation to {album.label[0]}', lang = 'en')]
                    participation.comment = [owl.locstr(f'{musician.label[0]}\'s participation to the album {album.label[0]} of the band {band.label[0]}', lang = 'en')]
                    musician.participatedIn.append(participation)
                    participation.inAlbum = album

                    # insert every instrument
                    participation.withInstrument = [ipro[str2identifier(instrument.title()) + '_instrument'] for instrument in instruments if ipro[str2identifier(instrument.title()) + '_instrument']]
                    participation.withProductionRole = [ipro[str2identifier(role.title()) + '_role'] for role in instruments if ipro[str2identifier(role.title()) + '_role']]
                    assert len(instruments) == (len(participation.withInstrument) + len(participation.withProductionRole)), f"Instrument count is not equal to participation count\n"

## Insert comment for musician

In [10]:
with ipro:
    for musician in ipro.Musician.instances():
        bands = [b.label[0] for b in musician.musicianOf]
        assert len(bands) > 0, "Musician is not in any band"
        musician.comment = [owl.locstr(f'{musician.label[0]} is a musician, part of {", ".join(bands)}', lang = 'en')]

## Other components

In [11]:
with ipro:
    class playsInstrument(owl.ObjectProperty):
        property_chain = [owl.PropertyChain([ipro.participatedIn, ipro.withInstrument])]
    class playsRole(owl.ObjectProperty):
        property_chain = [owl.PropertyChain([ipro.participatedIn, ipro.withProductionRole])]

    # definite class for Keyboardist
    ipro.Keyboardist.equivalent_to.append(ipro.participatedIn.some(ipro.withInstrument.some(ipro.Keyboard)))

    # definite class for Drummer
    ipro.Drummer.equivalent_to.append(ipro.participatedIn.some(ipro.withInstrument.some(ipro.PercussionInstrument)))

    # definite class for Guitarist
    ipro.Guitarist.equivalent_to.append(ipro.participatedIn.some(ipro.withInstrument.some(ipro.Guitar)))

    # definite class for Bassist
    ipro.Bassist.equivalent_to.append(ipro.participatedIn.some(ipro.withInstrument.some(ipro.Bass)))

    # definite class for Singer
    ipro.Singer.equivalent_to.append(ipro.participatedIn.some(ipro.withInstrument.some(ipro.Vocal)))

    # definite class for Band
    ipro.Band.equivalent_to.append(ipro.discography.exactly(1,ipro.Discography))

    # dul = owl.get_ontology('http://www.ontologydesignpatterns.org/ont/dul/DUL.owl').load()
    # ipro.imported_ontologies.append(dul)

    # ipro.Release.is_a.append(dul.InformationObject)
    # ipro.Album.is_a.append(dul.ObjectAggregate)
    # ipro.Band.is_a.append(dul.Group)
    # ipro.Musician.is_a.append(dul.Agent)
    # ipro.MusicalInstrument.is_a.append(dul.DesignedArtifact)
    # ipro.Discography.is_a.append(dul.ObjectAggregate)
    # ipro.Genre.is_a.append(dul.Collection)
    # ipro.Participation.is_a.append(dul.Action)
    # ipro.ProductionRole.is_a.append(dul.Agent)
    # ipro.Track.is_a.append(dul.InformationObject)

## END: save ontology

In [12]:
ipro.save('ipro_generated.owl')