# IPRO construction

In [1]:
import owlready2 as owl
import types
import json
import re
from pprint import pprint as pp
from num2words import num2words
from unidecode import unidecode



## Load base Ontology
handcrafted

In [2]:
ipro = owl.get_ontology("file://ipro.owl").load()
print('#####', ipro)
print(list(ipro.classes()))
print(list(ipro.properties()),'\n')

list_onto = ipro.imported_ontologies[0]
print('#####', list_onto)
print(list(list_onto.classes()))
print(list(list_onto.properties()),'\n')

bag_onto = list_onto.imported_ontologies[0]
print('#####', bag_onto)
print(list(bag_onto.classes()))
print(list(bag_onto.properties()),'\n')

sequence_onto = list_onto.imported_ontologies[1]
print('#####', sequence_onto)
print(list(sequence_onto.classes()))
print(list(sequence_onto.properties()),'\n')

##### get_ontology("http://www.semanticweb.org/ipro#")
[ipro.Release, ipro.Album, ipro.Band, ipro.Bass, ipro.MusicalInstrument, ipro.Bassist, ipro.Musician, ipro.Discography, ipro.Drummer, ipro.Genre, ipro.Guitar, ipro.Guitarist, ipro.Keyboard, ipro.Keyboardist, ipro.Organ, ipro.Participation, ipro.Percussions, ipro.Piano, ipro.Singer, ipro.Song, ipro.Synthesizer, ipro.Vocals]
[ipro.ofAlbum] 

##### get_ontology("http://www.ontologydesignpatterns.org/cp/owl/list.owl#")
[list.List, list.ListItem]
[list.lastItemOf, list.hasLastItem, list.firstItemOf, list.nextItem, list.previousItem, list.hasFirstItem] 

##### get_ontology("http://www.ontologydesignpatterns.org/cp/owl/bag.owl#")
[bag.Item, bag.Bag]
[bag.size, bag.itemContent, bag.itemOf, bag.hasItem] 

##### get_ontology("http://www.ontologydesignpatterns.org/cp/owl/sequence.owl#")
[owl.Thing]
[sequence.directlyFollows, sequence.directlyPrecedes, sequence.follows, sequence.precedes, rdf-schema.comment, rdf-schema.label, rdf-schema.isDefi

### Helper function to generate identifiers

In [3]:
def str2identifier(s):
    s_ = unidecode(s)
    s_ = re.sub(r'[^a-zA-Z0-9\'\-_ ]', r'', s_)
    s_ = re.sub(r'[\'\- ]+', r'_', s_)
    assert '__' not in s_
    return s_.strip('_')

## Insert bands and discographies

In [4]:
with open('wikidata/artists.json', 'r') as f:
    wikidata_artists = json.load(f)

with open('discogs/artists.json', 'r') as f:
    tmp = json.load(f)
    discogs_artists = {e['discogsID']: e['artist'] for e in tmp}

with open('progarchives/biographies.json', 'r') as f:
    progarchives_bios = json.load(f)

with ipro:
    class wikidataIri(owl.Thing >> str, owl.FunctionalProperty, owl.InverseFunctionalProperty): pass
    class discogsId(owl.Thing >> str, owl.FunctionalProperty, owl.InverseFunctionalProperty): pass
    class progarchivesId(owl.Thing >> str, owl.FunctionalProperty, owl.InverseFunctionalProperty): pass
    class imageUrl(owl.Thing >> str): pass

    class activeSinceYear(ipro.Band >> int, owl.FunctionalProperty): pass
    class discography(ipro.Band >> ipro.Discography, owl.FunctionalProperty): pass
    class discographyOf(ipro.Discography >> ipro.Band, owl.FunctionalProperty):
        inverse_property = ipro.discography

    for artist in wikidata_artists:
        band = ipro.Band(str2identifier(artist['itemLabel']['value']))
        band.wikidataIri = artist['item']['value']
        band.discogsId = artist['discogsId']['value']
        band.progarchivesId = artist['progarchivesId']['value']
        band.comment = [owl.locstr(progarchives_bios[band.progarchivesId], lang = 'en')]
        band.label = [owl.locstr(discogs_artists[band.discogsId]['name'], lang = 'en')]
        band.activeSinceYear = int(artist['inizio']['value'].split('-')[0])
        band.imageUrl.append(discogs_artists[band.discogsId]['images'][0]['uri'])

        band.discography = ipro.Discography(str2identifier(artist['itemLabel']['value']) + '_discography')
        band.discography.label = [owl.locstr(f'Discography of {band.label[0]}', lang = 'en')]
        band.discography.comment = [owl.locstr(f'Discography of Italian progressive rock band {band.label[0]}', lang = 'en')]

print(list(ipro.Band.instances())[:10])

[ipro.Acqua_Fragile, ipro.Alphataurus, ipro.Analogy, ipro.Area, ipro.Arti_e_Mestieri, ipro.Banco_del_Mutuo_Soccorso, ipro.Blocco_Mentale, ipro.Cervello, ipro.De_De_Lind, ipro.Formula_3]


In [5]:
# print all info of Acqua Fragile
print(ipro.Acqua_Fragile)
print(ipro.Acqua_Fragile.comment)
print(ipro.Acqua_Fragile.comment[0].lang)
print(ipro.Acqua_Fragile.label)
print(ipro.Acqua_Fragile.label[0].lang)
print(ipro.Acqua_Fragile.wikidataIri)
print(ipro.Acqua_Fragile.discogsId)
print(ipro.Acqua_Fragile.progarchivesId)
print(ipro.Acqua_Fragile.activeSinceYear)
print(ipro.Acqua_Fragile.imageUrl)
print(ipro.Acqua_Fragile.discography)

ipro.Acqua_Fragile
['Founded in Parma, Italy in 1971 - Disbanded in 1975 - Reformed 2004-2006 and in 2017 as "Acqua Fragile Project".\nThey are perhaps best known for the band that were to supply PFM\'s English singing vocalist Bernado Lanzetti, making his debut with them on Chocolate Kings.\nLanzetti with guitarist Gino Campanini and drummer Piero Canavera had played together in Gli Immortali. Joined on keyboards by Maurizio Mori and bassist Franz Dondi, formerly of I Moschettieri, who released a single in 1967, they shortly changed their name to ACQUA FRAGILE.\nIt was to be two years before their eponymous debut album saw the light of day, due to difficulty in finding a record company that would allow them to release it with English sung lyrics. Musically they bore a resemblance to GENESIS and GENTLE GIANT, with harmony vocals not unlike CROSBY, STILLS, NASH AND YOUNG, no doubt influenced by the time Lanzetti spent in the USA. Lanzetti\'s vocals have a similar feel to Roger Chapman o

## Insert genres

In [6]:
with open('discogs/genres.json', 'r') as f:
    genres = json.load(f)

with ipro:
    for gen, comment in genres.items():
        genre_name = gen.replace('+',' ').title()
        genre = ipro.Genre(str2identifier(genre_name))
        genre.label = [owl.locstr(genre_name, lang = 'en')]
        genre.comment = [owl.locstr(comment, lang = 'en')]

## Insert albums

In [7]:
#################################
#         MANUAL FIXES          #
#################################

# with open('discogs/releases.json', 'r') as f:
#     discogs_id2releases = {e['discogsID']: e['releases'] for e in json.load(f)}

# with open('discogs/release_details.json', 'r') as f:
#     discogs_id2release_details = {e['releaseID']: e['master'] for e in json.load(f)}

# with open('progarchives/albums.json', 'r') as f:
#     progarchives_id2albums = json.load(f)

# with open('progarchives/map2discogs_id.json', 'r') as f:
#     progarchives2discogs_id = json.load(f)

# for band in ipro.Band.instances():
#     for progarchives_album in progarchives_id2albums[band.progarchivesId]:
#         if progarchives_album['progarchivesId'] in progarchives2discogs_id:
#             continue
#         try:
#             discogs_album = [rel for rel in discogs_id2releases[band.discogsId] if str2identifier(progarchives_album['title']) in str2identifier(rel['title'])][0]
#             progarchives2discogs_id[progarchives_album['progarchivesId']] = discogs_album['id']
#         except IndexError:
#             progarchives2discogs_id[progarchives_album['progarchivesId']] = f'FIX ##################### {band.label[0]} - {progarchives_album["title"]}'

# with open('progarchives/map2discogs_id.json', 'w+') as f:
#     json.dump(progarchives2discogs_id, f)

In [8]:
with open('progarchives/map2discogs_id.json', 'r') as f:
    progarchives2discogs_id = json.load(f)

with open('discogs/release_details.json', 'r') as f:
    tmp = json.load(f)
    discogs_releases = {e['releaseID']: e['master'] for e in tmp}
    del tmp

with open('progarchives/albums.json', 'r') as f:
    progarchives_id2albums = json.load(f)

with ipro:
    class containsRelease(ipro.Discography >> ipro.Release, bag_onto.hasItem): pass
    class releaseOf(owl.ObjectProperty):
        inverse_property = ipro.containsRelease
    class previousRelease(ipro.Release >> ipro.Release, list_onto.previousItem, owl.FunctionalProperty): pass
    class nextRelease(ipro.Release >> ipro.Release, list_onto.nextItem, owl.FunctionalProperty):
        inverse_property = ipro.previousRelease
    class releaseYear(ipro.Release >> int, owl.FunctionalProperty): pass

    class hasGenre(ipro.Album >> ipro.Genre): pass    

    ipro.Discography.equivalent_to.append(ipro.containsRelease.some(ipro.Release))

    for band in ipro.Band.instances():
        prev_release = None
        for i, rel in enumerate(progarchives_id2albums[band.progarchivesId]):
            album_name = discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['title']
            release = ipro.Release(str2identifier(album_name) + '_release')
            release.label = [owl.locstr(album_name + ' studio release', lang = 'en')]
            release.comment = [owl.locstr(f'{release.label[0]} is the {num2words(i+1, ordinal=True)} release of {band.label[0]}, published in {rel["year"]}', lang = 'en')]
            release.releaseYear = rel['year']

            band.discography.containsRelease.append(release)

            album = ipro.Album(str2identifier(album_name) + '_album')
            album.label = [owl.locstr(album_name, lang = 'en')]
            album.comment = [owl.locstr(f'{album.label[0]} is the {num2words(i+1, ordinal=True)} album of {band.label[0]} released in {rel["year"]}', lang = 'en')]
            album.progarchivesId = rel['progarchivesId']
            album.discogsId = str(progarchives2discogs_id[rel['progarchivesId']])
            
            image_url = discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['images'][0]['uri']
            if image_url:
                album.imageUrl = [image_url]
            
            genres = discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['genres'] + discogs_releases[progarchives2discogs_id[rel['progarchivesId']]]['styles']
            for g in genres:
                g_name = str2identifier(g.title())
                if not ipro[g_name]:
                    print(g_name)
                album.hasGenre.append(ipro[g_name])

            release.ofAlbum = album

            if prev_release:
                assert prev_release.releaseYear <= release.releaseYear, "Previous album release year is greater than current album release year"
                prev_release.nextRelease = release

            prev_release = release

# owl.sync_reasoner_hermit(ipro)


In [9]:
ipro.save('ipro_generated.owl')