In [15]:
import pandas as pd
import numpy as np
from rdflib import ConjunctiveGraph, URIRef, BNode, RDF, Literal
from rdflib.namespace import XSD
import validators

In [16]:
def isfloat(num):
    try:
        float(num)
        return True
    except ValueError:
        return False


In [17]:
df_regions = pd.read_csv("./datasets/wineRegionGraph.csv")

In [18]:
df_regions.head(10)

Unnamed: 0,subject,predicate,object
0,https://w3id.org/winecatalogue#Bullas,https://w3id.org/winecatalogue#wineProduced,17623
1,https://w3id.org/winecatalogue#Alicante,http://dbpedia.org/ontology/wikiPageID,11882604
2,https://w3id.org/winecatalogue#Tacoronte_Acentejo,http://dbpedia.org/ontology/wikiPageID,14227564
3,https://w3id.org/winecatalogue#Alella,http://www.w3.org/2003/01/geo/wgs84_pos#long,2.29583
4,https://w3id.org/winecatalogue#Valle_de_la_Oro...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://dbpedia.org/resource/Category:WineRegion
5,https://w3id.org/winecatalogue#Terra_Alta,http://dbpedia.org/property/year,1972
6,https://w3id.org/winecatalogue#Almansa,http://dbpedia.org/ontology/wikiPageID,11466715
7,https://w3id.org/winecatalogue#Priorat,http://dbpedia.org/property/officialName,Denominació d'Origen Qualificada Priorat / Den...
8,https://w3id.org/winecatalogue#La_Palma,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://dbpedia.org/resource/Category:WineRegion
9,https://w3id.org/winecatalogue#Montsant,https://w3id.org/winecatalogue#id,http://dbpedia.org/resource/Montsant_DO


In [19]:
BASE_URI = "https://w3id.org/winecatalogue#"
g = ConjunctiveGraph()
graph = URIRef(BASE_URI)

for index, row in df_regions.iterrows():
    if (validators.url(row['object'])):
        if (row['predicate'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'):
            g.add((URIRef(row['subject']), URIRef(row['predicate']), URIRef(row['object']), graph))
        else:
            g.add((URIRef(row['subject']), URIRef(row['predicate']), Literal(row['object'], datatype=XSD.anyURI), graph))
    else:
        if row['object'].isdigit():
            g.add((URIRef(row['subject']), URIRef(row['predicate']), Literal(row['object'], datatype=XSD.integer), graph))
        elif isfloat(row['object']):
            g.add((URIRef(row['subject']), URIRef(row['predicate']), Literal(row['object'], datatype=XSD.float), graph))
        else:
            g.add((URIRef(row['subject']), URIRef(row['predicate']), Literal(row['object']), graph))

In [20]:
df_wines = pd.read_csv("./datasets/wines_SPA.csv")

In [21]:
df_wines.shape

(7500, 11)

In [22]:
df_wines.fillna(-1, inplace=True)
df_wines= df_wines.drop_duplicates(subset=['winery', 'wine', 'year', 'price'], keep='first')

In [23]:
df_wines

Unnamed: 0,winery,wine,year,rating,num_reviews,country,region,price,type,body,acidity
0,Teso La Monja,Tinto,2013,4.9,58,Espana,Toro,995.00,Toro Red,5.0,3.0
1,Artadi,Vina El Pison,2018,4.9,31,Espana,Vino de Espana,313.50,Tempranillo,4.0,2.0
2,Vega Sicilia,Unico,2009,4.8,1793,Espana,Ribera del Duero,324.95,Ribera Del Duero Red,5.0,3.0
3,Vega Sicilia,Unico,1999,4.8,1705,Espana,Ribera del Duero,692.96,Ribera Del Duero Red,5.0,3.0
4,Vega Sicilia,Unico,1996,4.8,1309,Espana,Ribera del Duero,778.06,Ribera Del Duero Red,5.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...
2020,Conreria d'Scala Dei,Les Brugueres,2018,4.2,390,Espana,Priorato,16.76,Priorat Red,-1.0,-1.0
2021,Mustiguillo,Finca Terrerazo,2017,4.2,390,Espana,El Terrerazo,24.45,Red,-1.0,-1.0
2022,Matarromera,Gran Reserva,2011,4.2,389,Espana,Ribera del Duero,64.50,Ribera Del Duero Red,-1.0,-1.0
2023,Sei Solo,Preludio,2016,4.2,388,Espana,Ribera del Duero,31.63,Ribera Del Duero Red,-1.0,-1.0


In [24]:
graph = URIRef(BASE_URI)
for index, row in df_wines.iterrows():
    # Maping Wineries
    g.add((URIRef(BASE_URI+str(row['winery']).replace(' ','_')), RDF.type, URIRef("http://schema.org/Winery"), graph))
    g.add((URIRef(BASE_URI+str(row['winery']).replace(' ','_')), URIRef(BASE_URI+'hasName'), Literal(row['winery'], datatype=XSD.string), graph))

    # Mapping Wine - Wine Name
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), RDF.type, URIRef("https://w3id.org/winecatalogue#Wine"), graph))
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])),
           URIRef(BASE_URI+'hasName'),
           Literal(row['wine'],datatype=XSD.string), graph))
    if row['year'] != 'N.V.' and row['year'] != 'nan':
        g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef('http://dbpedia.org/property/year'), Literal(row['year'], datatype=XSD.integer), graph))

    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef(BASE_URI+'rating'), Literal(row['rating'], datatype=XSD.float), graph))
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef(BASE_URI+'numReviews'), Literal(row['num_reviews'], datatype=XSD.integer), graph))
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef(BASE_URI+'bodyScore'), Literal(row['body'], datatype=XSD.float), graph))
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef(BASE_URI+'acidity'), Literal(row['acidity'], datatype=XSD.float), graph))


    #Mapping with Country
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef('http://dbpedia.org/ontology/location'), URIRef(BASE_URI+str(row['country']).replace(' ','_')), graph))
    g.add((URIRef(BASE_URI+str(row['country']).replace(' ','_')), RDF.type, URIRef(BASE_URI+'Country'), graph))
    g.add((URIRef(BASE_URI+str(row['country']).replace(' ','_')), URIRef(BASE_URI+'hasName'), Literal(row['country'], datatype=XSD.string), graph))

    #Mapping with Region
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef(BASE_URI+'hasRegion'), URIRef(BASE_URI+str(row['region']).replace(' ','_')), graph))

    ## Mapping price
    price_Bn = BNode()
    g.add((URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])), URIRef(BASE_URI+'hasPrice'),
           price_Bn, graph))
    g.add((price_Bn,
           URIRef('https://saref.etsi.org/core/unitOfMeasure'),
           URIRef('https://saref.etsi.org/core/euro'), graph))
    g.add((price_Bn,
           URIRef('https://w3id.org/winecatalogue#hasValue'),
           Literal(row['price'], datatype=XSD.float), graph))

    #Mapping with the type
    g.add((
        URIRef(BASE_URI+str(row['wine']).replace(' ','_')+str(row['year'])),
        URIRef(BASE_URI+'hasWineType'),
        URIRef(BASE_URI+str(row['type']).replace(' ','_')), graph))
    g.add((URIRef(BASE_URI+str(row['type']).replace(' ','_')), URIRef(BASE_URI+'hasName'), Literal(row['type'], datatype=XSD.string), graph))

g.serialize('wines_regions.rdf', format='xml', base= 'https://w3id.org/winecatalogue')

<Graph identifier=N5080e70b93504f6eb86bb2b452146293 (<class 'rdflib.graph.ConjunctiveGraph'>)>