# Reading MonaLIA RDF Data
To run the local SPARQL over HTTP I've installed Apache Jena Fuseki service; started the service with 20Gb memmory option; uploaded the Joconde files:
    export_rdf_2018-03-21.ttl
    reprskos.rdf
    domnskos.rdf
    skos.rdf
    
Dataset can be manages from http://localhost:3030   

https://stackoverflow.com/questions/13897712/where-do-i-test-my-queries-for-my-rdf-written-in-sparql
http://jena.apache.org/documentation/serving_data/index.html#download-fuseki1

For RDF data to pandas dataframe conversion all credit to Ted Lawless
https://lawlesst.github.io/notebook/sparql-dataframe.html

In [1]:
import os
import sys
import numpy as np
import pandas as pd

import json
from SPARQLWrapper import SPARQLWrapper, JSON, N3, XML

In [3]:
### Import MonaLIA library that is in the same directory ###
if (os.getcwd() not in sys.path):
    sys.path.append(os.getcwd())

import MonaLIA    
import importlib

importlib.reload(MonaLIA)

<module 'MonaLIA' from 'C:\\Users\\abobashe\\Documents\\MonaLIA\\Python Scripts\\MonaLIA.py'>

### Read the query string from the file

The file can be developed in CORESE

In [12]:
f = open('C:/Users/abobashe/Documents/MonaLIA/Queries/MonaLIA.DOMN Category Search.Images.rq', mode='rt', encoding='utf-8') 
qs = f.read()
f.close()
print(qs)

#MonaLIA.DOMN Category Search.Images.rq
# Get the count of the images per domain 

prefix skos: <http://www.w3.org/2004/02/skos/core#> 
prefix jcl: <http://jocondelab.iri-research.org/ns/jocondelab/>
prefix dc: <http://purl.org/dc/elements/1.1/> 

select ?domain_label ?parentDomain_label  ?imagePath ?noticeReference	
where {
	
	?paintingDomain a jcl:Term.
	?paintingDomain skos:prefLabel "peinture"@fr.
	?paintingDomain skos:inScheme ?domainSchema.
	?domainSchema dc:identifier "DOMN".
  
  	?parentDomain skos:narrower ?paintingDomain.
  	?parentDomain skos:prefLabel ?parentDomain_label.

	?domain skos:broader ?parentDomain.
	?domain skos:prefLabel ?domain_label.

	?notice jcl:noticeDomnTerm ?domain.
               ?notice jcl:noticeHasImage true.
	?notice jcl:noticeImage [ jcl:noticeImageIsMain true ; jcl:noticeImagePath ?imagePath].
	?notice jcl:noticeRef ?noticeReference.	
}
order by ?parentDomain_label ?domain_label



#### Overwrite the query string if needed

In [12]:
qs = '''
prefix skos: <http://www.w3.org/2004/02/skos/core#> 
prefix jcl: <http://jocondelab.iri-research.org/ns/jocondelab/>

#select ?parentCategory_label  ?subCategory_label   (count(?noticeReference) as ?c)  where {
select ?subCategory_label ?parentCategory_label ?imagePath ?noticeReference where { 

VALUES (?topCategory_label)  { ("âge et sexe"@fr)   ("animal"@fr)   } .

?topCategory a jcl:Term.
?topCategory skos:prefLabel ?topCategory_label .
?parentCategory skos:broader ?topCategory.
?parentCategory  skos:prefLabel ?parentCategory_label.

?subCategory skos:broader* ?parentCategory.
?subCategory  skos:prefLabel ?subCategory_label.

?notice jcl:noticeReprTerm ?subCategory.
?notice jcl:noticeHasImage true.               
?notice jcl:noticeImage [ jcl:noticeImageIsMain true ; jcl:noticeImagePath ?imagePath].
?notice jcl:noticeRef ?noticeReference.	
}
order by ?parentCategory_label ?subCategory_label
'''

### Specify local service

In [41]:
wds = "http://localhost:3030/Joconde/query"

### Run the query

In [14]:
image_set_df = MonaLIA.sparql_service_to_dataframe(wds, qs)

#### Give the short column names

In [15]:
col_names = ['category','parent', 'imagePath', 'ref' ]
image_set_df.columns = col_names
print(image_set_df.shape)
image_set_df.head()

(242430, 4)


Unnamed: 0,category,parent,imagePath,ref
0,art mobilier,domaine par support de conservation,/arc/0006/m500145_0000711_p.jpg,50010004390
1,art mobilier,domaine par support de conservation,/arc/0010/m500145_0016928_p.jpg,50010005721
2,art mobilier,domaine par support de conservation,/arc/0007/m500145_0002121_p.jpg,50010004447
3,art mobilier,domaine par support de conservation,/arc/0009/m500145_0007336_p.jpg,50010008271
4,art mobilier,domaine par support de conservation,/0055/m500145_0013990_p.jpg,50010008370


#### Display image count per category

In [16]:
image_set_df.groupby(['parent','category']).size().reset_index()

Unnamed: 0,parent,category,0
0,domaine par support de conservation,art mobilier,906
1,domaine par support de conservation,dessin,155334
2,domaine par support de conservation,enluminure,69
3,domaine par support de conservation,estampe,24237
4,domaine par support de conservation,imprimé,1019
5,domaine par support de conservation,manuscrit,990
6,domaine par support de conservation,miniature,1820
7,domaine par support de conservation,peinture,31901
8,domaine par support de conservation,photographie,15492
9,domaine par support de conservation,plumasserie,4


In [17]:
%store image_set_df

Stored 'image_set_df' (DataFrame)


#### Display duplicates 
images with more than one category assigned to it

In [None]:
c = pd.crosstab(index=image_set_df.ref, 
                              columns="ci")
c[c.ci > 1]

### Scrapbook

In [34]:
qs1 = '''
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix skos: <http://www.w3.org/2004/02/skos/core#> 
prefix jcl: <http://jocondelab.iri-research.org/ns/jocondelab/>
prefix dc: <http://purl.org/dc/elements/1.1/> 
prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

select ?x ?v where {
  
  VALUES ?v {"000PE000001" "00000102827"}.
  ?x jcl:noticeRef ?v.
}

'''
qs1 = '''select * where { ?o ?p ?s } limit 10'''

In [35]:
wds1 = "http://localhost:8080/sparql/"

In [43]:
image_set_df = MonaLIA.sparql_service_to_dataframe(wds1, qs1)
image_set_df

Unnamed: 0,o,s,p
0,http://www.inria.fr/2015/humans#Man,http://anny.ad.inria.fr:8080/img/male.png,http://ns.inria.fr/sparql-template/icon
1,http://www.inria.fr/2015/humans#Person,http://anny.ad.inria.fr:8080/img/person.png,http://ns.inria.fr/sparql-template/icon
2,http://www.inria.fr/2015/humans#Lecturer,http://anny.ad.inria.fr:8080/img/professor.jpg,http://ns.inria.fr/sparql-template/icon
3,http://www.inria.fr/2015/humans#Researcher,http://anny.ad.inria.fr:8080/img/professor.jpg,http://ns.inria.fr/sparql-template/icon
4,http://www.inria.fr/2015/humans#Woman,http://anny.ad.inria.fr:8080/img/female.png,http://ns.inria.fr/sparql-template/icon
5,http://www.inria.fr/2015/humans-instances#John,37,http://www.inria.fr/2015/humans#age
6,http://www.inria.fr/2015/humans-instances#Mark,14,http://www.inria.fr/2015/humans#age
7,http://www.inria.fr/2015/humans-instances#Gaston,102,http://www.inria.fr/2015/humans#age
8,http://www.inria.fr/2015/humans-instances#Flora,95,http://www.inria.fr/2015/humans#age
9,http://www.inria.fr/2015/humans-instances#Pierre,71,http://www.inria.fr/2015/humans#age


In [37]:
import xml
sparql = SPARQLWrapper(wds1, returnFormat=JSON)
sparql.setQuery(qs1)
#sparql.setMethod(POST )
result = sparql.query()

#processed_results = json.load(result.response)
#cols = processed_results['head']['vars']

#result.convert()

#result.response.read()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [16]:
sparql._getAcceptHeader()

'application/sparql-results+json,application/json,text/javascript,application/javascript'

In [9]:
SPARQLWrapper??

In [6]:
from SPARQLWrapper import SPARQLWrapper, JSON, XML, TURTLE, TSV, POSTDIRECTLY , POST
sparql = SPARQLWrapper(wds)
sparql.setQuery(qs1)
sparql.setReturnFormat(JSON)
sparql.setMethod(POST )
result = sparql.query()

#processed_results = json.load(result.response)
#cols = processed_results['head']['vars']

#result.response.read()

sparql._getAcceptHeader()

'application/sparql-results+json,application/json,text/javascript,application/javascript'

In [None]:
image_set_df.head()

In [31]:
import urllib
url = 'http://localhost:8080/tutorial/myserver?query=select * where {?x ?p ?y}limit 10'
request = urllib.request.Request(url.replace(' ', '%20'))
#request.add_header("Accept", "application/sparql-results+json")
response = urllib.request.urlopen(request)
response.read()

b'<?xml version="1.0" ?>\r\n<sparql xmlns=\'http://www.w3.org/2005/sparql-results#\'>\r\n<head>\r\n<variable name=\'x\'/>\r\n<variable name=\'y\'/>\r\n<variable name=\'p\'/>\r\n</head>\r\n<results>\r\n<result>\r\n<binding name=\'x\'><bnode>_:b639</bnode></binding>\r\n<binding name=\'y\'><literal datatype=\'http://www.w3.org/2001/XMLSchema#integer\'>1641</literal></binding>\r\n<binding name=\'p\'><uri>http://jocondelab.iri-research.org/ns/jocondelab/YearInfoEnd</uri></binding>\r\n</result>\r\n<result>\r\n<binding name=\'x\'><bnode>_:b655</bnode></binding>\r\n<binding name=\'y\'><literal datatype=\'http://www.w3.org/2001/XMLSchema#integer\'>1757</literal></binding>\r\n<binding name=\'p\'><uri>http://jocondelab.iri-research.org/ns/jocondelab/YearInfoEnd</uri></binding>\r\n</result>\r\n<result>\r\n<binding name=\'x\'><bnode>_:b680</bnode></binding>\r\n<binding name=\'y\'><literal datatype=\'http://www.w3.org/2001/XMLSchema#integer\'>1844</literal></binding>\r\n<binding name=\'p\'><uri>http

In [32]:
request.headers

{}

#### Give the short column names