# DataTable_RDF_Data_Cube_development

This Notebook steps through the development of a method to convert a UKDS DataTable .tab file to a RDF Data Cube file.

## Initial setup

### Import packages

In [86]:
import os, ukds
import pandas as pd
import urllib

### Set filepaths

This sets a filepath to an example data dictionary on a local file system, in this case the 'uktus15_household_ukda_data_dictionary.rtf' file.

In [3]:
base_dir=os.path.join(*[os.pardir]*4,r'_Data\United_Kingdom_Time_Use_Survey_2014-2015\UKDA-8128-tab')
dt_fp=os.path.join(base_dir,r'tab\uktus15_household.tab')
dd_fp=os.path.join(base_dir,r'mrdoc\allissue\uktus15_household_ukda_data_dictionary.rtf')

### Create DataTable

A ukds.DataTable instance is created and the .tab file is read into it.

In [56]:
dt=ukds.DataTable(dt_fp,dd_fp)

### Print first five rows

In [5]:
dt.tab.head()

Unnamed: 0,serial,strata,psu,HhOut,hh_wt,IMonth,IYear,DM014,DM016,DM510,...,Relate10_P1,Relate10_P2,Relate10_P3,Relate10_P4,Relate10_P5,Relate10_P6,Relate10_P7,Relate10_P8,Relate10_P9,Relate10_P10
0,11010903,-2,-2,598,,9,2014,0,0,0,...,-2,-2.0,,,,,,,,
1,11010904,-2,-2,598,,9,2014,0,0,0,...,-2,-2.0,,,,,,,,
2,11010906,-2,-2,598,,10,2014,0,0,0,...,-2,-2.0,-2.0,,,,,,,
3,11010907,-2,-2,598,,9,2014,1,1,0,...,-2,-2.0,-2.0,,,,,,,
4,11010908,-2,-2,598,,9,2014,0,0,0,...,-2,,,,,,,,,


## Discussion

### Aim


### Sample call

Sample code could look like:

```python
result = dt.to_data_cube() # dt is a DataTable instance
```

## Developing the method

### inputs

In [14]:
prefix='ukds8128'
dataset_name='uktus15_household'

### prefixes

```turtle
@prefix qb:    <http://purl.org/linked-data/cube#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix ukds8128: <http://purl.org/berg/ukds8128/> .
@prefix ukds8128-code: <http://purl.org/berg/ukds8128/code#> .
@prefix ukds8128-measure: <http://purl.org/berg/ukds8128/measure#> .
```

In [16]:
def get_data_cube_prefixes(prefix):
    """Returns the prefixes for the Data Cube rdf
    """

    st="""
@prefix qb:    <http://purl.org/linked-data/cube#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix %(prefix)s: <http://purl.org/berg/%(prefix)s/> .
@prefix %(prefix)s-code: <http://purl.org/berg/%(prefix)s/code#> .
@prefix %(prefix)s-measure: <http://purl.org/berg/%(prefix)s/measure#> .

""" % {'prefix':prefix}
    return st
    
dt.get_data_cube_prefixes=get_data_cube_prefixes
print(dt.get_data_cube_prefixes(prefix))


@prefix qb:    <http://purl.org/linked-data/cube#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix ukds8128: <http://purl.org/berg/ukds8128/> .
@prefix ukds8128-code: <http://purl.org/berg/ukds8128/code#> .
@prefix ukds8128-measure: <http://purl.org/berg/ukds8128/measure#> .




### dataset

```turtle
    ukds8128:uktus15_household a qb:DataSet;
        qb:structure ukds8128:uktus15_household-dsd .
```

In [17]:
def get_data_cube_dataset(prefix,dataset_name):
    """Return the qb:DataSet
    """
    
    l=[
        '%s:%s a qb:DataSet' % (prefix,dataset_name),
        'qb:structure %s:%s-dsd' % (prefix,dataset_name),
    ]
    st=' ;\n\t'.join(l) + ' .\n\n'
    return st
dt.get_data_cube_dataset=get_data_cube_dataset
print(dt.get_data_cube_dataset(prefix,dataset_name))

ukds8128:uktus15_household a qb:DataSet ;
	qb:structure ukds8128:uktus15_household-dsd .




### get_data_cube_data_structure_definition

In [18]:
def get_data_cube_data_structure_definition(prefix,dataset_name):
    """Returns a RDF Turtle string of the qb:DataStructureDefinition using the Data Cube and skos vocabulary
    
    Arguments:
        - self: the DataDictionary instance
        - prefix (str): the prefix to use for the data dictionary uris   
        - dataset_name (str): the name of the dataset
    
    """
    
    l=['%s:%s-dsd a qb:DataStructureDefinition' % (prefix,dataset_name)]
        
    for i,variable in enumerate(dt.datadictionary.get_variable_names()):
        l.append('qb:component [ qb:measure %s-measure:%s; qb:order %s ]' % (prefix,variable,i+1))
        
    st=' ;\n\t'.join(l) + ' .\n\n'
    
    return st

dt.get_data_cube_data_structure_definition=get_data_cube_data_structure_definition
print(dt.get_data_cube_data_structure_definition(prefix,dataset_name))

ukds8128:uktus15_household-dsd a qb:DataStructureDefinition ;
	qb:component [ qb:measure ukds8128-measure:serial; qb:order 1 ] ;
	qb:component [ qb:measure ukds8128-measure:strata; qb:order 2 ] ;
	qb:component [ qb:measure ukds8128-measure:psu; qb:order 3 ] ;
	qb:component [ qb:measure ukds8128-measure:HhOut; qb:order 4 ] ;
	qb:component [ qb:measure ukds8128-measure:hh_wt; qb:order 5 ] ;
	qb:component [ qb:measure ukds8128-measure:IMonth; qb:order 6 ] ;
	qb:component [ qb:measure ukds8128-measure:IYear; qb:order 7 ] ;
	qb:component [ qb:measure ukds8128-measure:DM014; qb:order 8 ] ;
	qb:component [ qb:measure ukds8128-measure:DM016; qb:order 9 ] ;
	qb:component [ qb:measure ukds8128-measure:DM510; qb:order 10 ] ;
	qb:component [ qb:measure ukds8128-measure:DM1115; qb:order 11 ] ;
	qb:component [ qb:measure ukds8128-measure:DM1619; qb:order 12 ] ;
	qb:component [ qb:measure ukds8128-measure:NumAdult; qb:order 13 ] ;
	qb:component [ qb:measure ukds8128-measure:NumChild; qb:order 14 ] ;


### get_data_cube_measure_properties

In [20]:
def get_data_cube_measure_properties(prefix):
    """Returns a RDF Turtle string of the qb:MeaureProperty using the Data Cube and skos vocabulary
    
    Arguments:
        - self: the DataDictionary instance
        - variable (str): the variable to convert to RDF
        - prefix (str): the prefix to use for the data dictionary uris   
    
    """
    st=""
    
    for d in dt.datadictionary.variable_list:

        variable=d['variable']
        
        if d['value_labels']:
            x=', qb:CodedProperty'
        else:
            x=''

        l=[
            'ukds8128-measure:%s a rdf:Property, qb:MeasureProperty%s' % (variable,x) ,
            'rdfs:label "serial"@en' ,
            'rdfs:subPropertyOf sdmx-measure:obsValue',
        ]

        if d['value_labels']:
            l.append('qb:CodeList %s-code:%s' % (prefix,variable))
            l.append('rdfs:range %s-code:%s' % (prefix,variable[0].upper()+variable[1:]))
        else:
            if d['variable_type']=='numeric':
                l.append('rdfs:range xsd:decimal')

        st+=' ;\n\t'.join(l) + ' .\n\n'
    
    return st

dt.get_data_cube_measure_properties=get_data_cube_measure_properties
print(dt.get_data_cube_measure_properties(prefix))

ukds8128-measure:serial a rdf:Property, qb:MeasureProperty ;
	rdfs:label "serial"@en ;
	rdfs:subPropertyOf sdmx-measure:obsValue ;
	rdfs:range xsd:decimal .

ukds8128-measure:strata a rdf:Property, qb:MeasureProperty, qb:CodedProperty ;
	rdfs:label "serial"@en ;
	rdfs:subPropertyOf sdmx-measure:obsValue ;
	qb:CodeList ukds8128-code:strata ;
	rdfs:range ukds8128-code:Strata .

ukds8128-measure:psu a rdf:Property, qb:MeasureProperty, qb:CodedProperty ;
	rdfs:label "serial"@en ;
	rdfs:subPropertyOf sdmx-measure:obsValue ;
	qb:CodeList ukds8128-code:psu ;
	rdfs:range ukds8128-code:Psu .

ukds8128-measure:HhOut a rdf:Property, qb:MeasureProperty, qb:CodedProperty ;
	rdfs:label "serial"@en ;
	rdfs:subPropertyOf sdmx-measure:obsValue ;
	qb:CodeList ukds8128-code:HhOut ;
	rdfs:range ukds8128-code:HhOut .

ukds8128-measure:hh_wt a rdf:Property, qb:MeasureProperty ;
	rdfs:label "serial"@en ;
	rdfs:subPropertyOf sdmx-measure:obsValue ;
	rdfs:range xsd:decimal .

ukds8128-measure:IMonth a rdf:Prop

### get_data_cube_codelist

In [149]:
def get_data_cube_codelist(prefix):
    """Returns a RDF Turtle string of the codelist using the Data Cube and skos vocabulary
    
    Arguments:
        - prefix (str): the prefix to use for the data dictionary uris
    
    """
    
    st=""
    
    for d in dt.datadictionary.variable_list:
        
        if not d['value_labels']: continue

        variable=d['variable']
        
        #
        if not variable=="strata": continue
        
        variable_lower=d['variable'][0].lower() + d['variable'][1:]
        variable_upper=d['variable'][0].upper() + d['variable'][1:]

        variable_label=d['variable_label']
        
        value_labels={k:v for k,v in d['value_labels'].items()}
        if not value_labels: continue
        
        #print(value_labels)
               
        #add additional value_labels if needed        
        for value in dt.tab[variable].unique(): 
            if value:
                try:
                    v=float(value)
                    if not v in value_labels.keys():
                        value_labels[v]=v
                except ValueError: 
                    value_labels[value]=value #'"%s"@en' %value
    
        # ConceptScheme
        l=[]
        l+=[
            '%s-code:%s a skos:ConceptScheme' % (prefix,variable_lower),
            'skos:prefLabel "%s"@en' % variable,
            'rdfs:label "%s"@en' % variable,
            'skos:notation "%s"' % variable,
            'skos:note "%s"@en' % variable_label,
            'rdfs:seeAlso %s-code:%s' % (prefix,variable_upper)
            ]
        for value,label in value_labels.items():
            l.append('skos:hasTopConcept %s:%s_code_%s' % (prefix,variable_lower,urllib.parse.quote(str(value))))
        st+=' ;\n\t'.join(l) + ' .\n\n'

        # Class
        l=[
            '%s-code:%s a rdfs:Class, owl:Class ' % (prefix,variable_upper),
            'rdfs:subClassOf skos:Concept ',
            'rdfs:label "%s"@en ' % variable,
            'rdfs:comment "%s"@en ' % variable_label,
            'rdfs:seeAlso %s-code:%s ' % (prefix,variable_lower),
        ]
        st+=' ;\n\t'.join(l) + ' .\n\n'

        # Codes
        for value,label in value_labels.items():
            l=[
                '%s-code:%s_code_%s a skos:Concept, %s-code:%s' % (prefix,variable_lower,urllib.parse.quote(str(value)),
                                                                   prefix,variable_upper),
                'skos:topConceptOf %s-code:%s' % (prefix,variable_lower),
                'skos:prefLabel %s' % (label if isinstance(label,float) else '"%s"@en' % label),
                'skos:notation %s' % (value if isinstance(value,float) else '"%s"' % value),
                'skos:inScheme %s-code:%s'  % (prefix,variable_lower),
            ]
            st+=' ;\n\t'.join(l) + ' .\n\n'
            
        break
            
    return st
    
dt.get_data_cube_codelist=get_data_cube_codelist
print(dt.get_data_cube_codelist(prefix))

ukds8128-code:strata a skos:ConceptScheme ;
	skos:prefLabel "strata"@en ;
	rdfs:label "strata"@en ;
	skos:notation "strata" ;
	skos:note "Strata"@en ;
	rdfs:seeAlso ukds8128-code:Strata ;
	skos:hasTopConcept ukds8128:strata_code_-2.0 ;
	skos:hasTopConcept ukds8128:strata_code_110.0 ;
	skos:hasTopConcept ukds8128:strata_code_117.0 ;
	skos:hasTopConcept ukds8128:strata_code_104.0 ;
	skos:hasTopConcept ukds8128:strata_code_112.0 ;
	skos:hasTopConcept ukds8128:strata_code_115.0 ;
	skos:hasTopConcept ukds8128:strata_code_128.0 ;
	skos:hasTopConcept ukds8128:strata_code_123.0 ;
	skos:hasTopConcept ukds8128:strata_code_373.0 ;
	skos:hasTopConcept ukds8128:strata_code_119.0 ;
	skos:hasTopConcept ukds8128:strata_code_107.0 ;
	skos:hasTopConcept ukds8128:strata_code_106.0 ;
	skos:hasTopConcept ukds8128:strata_code_121.0 ;
	skos:hasTopConcept ukds8128:strata_code_103.0 ;
	skos:hasTopConcept ukds8128:strata_code_111.0 ;
	skos:hasTopConcept ukds8128:strata_code_122.0 ;
	skos:hasTopConcept ukds8128:

In [85]:
import urllib
urllib.parse.quote('00:30:00')

'00%3A30%3A00'

### get_data_cube_observations

In [153]:
def get_data_cube_observations(prefix,dataset_name):
    """Returns a RDF Turtle string of the codelist using the Data Cube and skos vocabulary
    
    Arguments:
        - prefix (str): the prefix to use for the data dictionary uris
    
    """
    
    st=""
    
    has_code_list={x['variable']:True if x['value_labels'] else False for x in dt.datadictionary.get_variable_list()}
    #print(has_code_list)
    
    for index,row in dt.tab.iterrows():
        
        l=[
            '%s:%s_obs%s a qb:Observation' % (prefix,dataset_name,index),
            'qb:dataSet %s:%s' % (prefix,dataset_name),
        ]
        
        for variable,value in row.iteritems():
            if value:
                if has_code_list[variable]:
                    try:
                        code=urllib.parse.quote(str(float(value)))
                    except ValueError:
                        code=urllib.parse.quote(str(value))
                    l.append('%s-measure:%s %s-code:%s_code_%s' % (prefix,variable,prefix,variable,code))
                else:
                    try:
                        v=float(value)
                        l.append('%s-measure:%s %s' % (prefix,variable,value))
                    except ValueError:
                        l.append('%s-measure:%s "%s"' % (prefix,variable,value))
    
        st+=' ;\n\t'.join(l) + ' .\n\n'
        
        break
    
    return st
    
dt.get_data_cube_observations=get_data_cube_observations
print(dt.get_data_cube_observations(prefix,dataset_name))

ukds8128:uktus15_household_obs0 a qb:Observation ;
	qb:dataSet ukds8128:uktus15_household ;
	ukds8128-measure:serial 11010903 ;
	ukds8128-measure:strata ukds8128-code:strata_code_-2.0 ;
	ukds8128-measure:psu ukds8128-code:psu_code_-2.0 ;
	ukds8128-measure:HhOut ukds8128-code:HhOut_code_598.0 ;
	ukds8128-measure:IMonth ukds8128-code:IMonth_code_9.0 ;
	ukds8128-measure:IYear ukds8128-code:IYear_code_2014.0 ;
	ukds8128-measure:DM014 ukds8128-code:DM014_code_0.0 ;
	ukds8128-measure:DM016 ukds8128-code:DM016_code_0.0 ;
	ukds8128-measure:DM510 ukds8128-code:DM510_code_0.0 ;
	ukds8128-measure:DM1115 ukds8128-code:DM1115_code_0.0 ;
	ukds8128-measure:DM1619 ukds8128-code:DM1619_code_0.0 ;
	ukds8128-measure:NumAdult ukds8128-code:NumAdult_code_2.0 ;
	ukds8128-measure:NumChild ukds8128-code:NumChild_code_0.0 ;
	ukds8128-measure:NumSSex ukds8128-code:NumSSex_code_0.0 ;
	ukds8128-measure:NumCPart ukds8128-code:NumCPart_code_0.0 ;
	ukds8128-measure:NumMPart ukds8128-code:NumMPart_code_2.0 ;
	ukds812

### Imports

In [6]:
import rdflib
from rdflib.namespace import RDF

### Set up input variables

In [8]:
g=rdflib.Graph() # an empty graph
dd_prefix='o8128' # the prefix for the Data Dictionary uri
dd_uri=r'http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/' # the Data Dictionary uri

### Set up namespaces and bind them to the graph

In [9]:
dd_namespace=rdflib.Namespace(dd_uri)
g.bind(dd_prefix,dd_namespace)

### Function to add .tab data to the rdflib graph

In [18]:
def add_row_data(graph,data_dict):
    """Adds the data to the graph
    
    Arguments:
        - graph (rdflib.Graph):
        - data_dict (dict): a dictionary of the row data {variable:value}
    
    """
    
    a=rdflib.BNode()
    
    for k,v in data_dict.items():
        graph.add((a,dd_namespace[k],rdflib.Literal(v)))
        
    return graph

In [19]:
g.update("DELETE WHERE { ?s ?p ?o }")
g=add_row_data(g,dt.tab.loc[0].to_dict())
print(g.serialize(format='ttl').decode())

@prefix o8128: <http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] o8128:Accom "1" ;
    o8128:BenOth "2" ;
    o8128:CCPersNo "-1" ;
    o8128:Cable "2" ;
    o8128:CableNum "-1" ;
    o8128:Comp "1" ;
    o8128:CompNum "1" ;
    o8128:DM014 "0" ;
    o8128:DM016 "0" ;
    o8128:DM1115 "0" ;
    o8128:DM1619 "0" ;
    o8128:DM510 "0" ;
    o8128:DMSex_P1 "1" ;
    o8128:DMSex_P10 "" ;
    o8128:DMSex_P2 "2" ;
    o8128:DMSex_P3 "" ;
    o8128:DMSex_P4 "" ;
    o8128:DMSex_P5 "" ;
    o8128:DMSex_P6 "" ;
    o8128:DMSex_P7 "" ;
    o8128:DMSex_P8 "" ;
    o8128:DMSex_P9 "" ;
    o8128:DVAge_P1 "80" ;
    o8128:DVAge_P10 "" ;
    o8128:DVAge_P2 "71" ;
    o8128:DVAge_P3 "" ;
    o8128:DVAge_P4 "" ;
    o8128:DVAge_P5 "" ;
    o8128:DVAge_P6 "" ;
    o8128:DVA

## Putting it all together

### Final method for the DataDictionary class

In [30]:
import rdflib
from rdflib.namespace import RDF

def to_rdf(self,graph,prefix,uri):
    """Places the DataTable data in an rdflib Graph.
    
    Arguments:
        - graph (rdflib.Graph): a graph to place the data in
        - prefix (str): a prefix for the Data Dictionary ontology (used to describe the variables)
        - uri (str): a uri for the Data Dictionary ontology (used to describe the variables)
    
    Returns:
        - (rdflib.Graph): the input graph with the DataTable data inserted into it.
        
    """
    
    dd_namespace=rdflib.Namespace(dd_uri)
    graph.bind(dd_prefix,dd_namespace)
    
    for index,row in self.tab.iterrows():
        
        data_dict=row.to_dict()
        
        a=rdflib.BNode()
    
        for k,v in data_dict.items():
            graph.add((a,dd_namespace[k],rdflib.Literal(v)))
            
    return graph

In [31]:
kls=ukds.DataTable
kls.to_rdf=to_rdf
dt=kls()
dt.read_tab(dt_fp)
g=rdflib.Graph()
g=dt.to_rdf(graph=g,prefix='o8128',uri='http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/')
len(g)

1585555

In [32]:
g.serialize('tab.ttl',format='ttl')

## to_ttl

In [28]:
l=list(range(10))
a=iter(l)
while True:
    try:
        print(next(a))
    except StopIteration:
        break

0
1
2
3
4
5
6
7
8
9


In [49]:
def to_ttl(self,filename,prefix,uri):
    """Places the DataTable data in an rdflib Graph.
    
    Creates multiple files if size > 30 MB
    
    Arguments:
        - filename (str): the name of the output .ttl file - NO EXTENSION
        - prefix (str): a prefix for the Data Dictionary ontology (used to describe the variables)
        - uri (str): a uri for the Data Dictionary ontology (used to describe the variables)
    
    Returns:
        - (rdflib.Graph): the input graph with the DataTable data inserted into it.
        
    """
    file_index=0
    i=self.tab.iterrows()
    
    while True:
        
        with open(filename+'_'+str(file_index)+'.ttl','w',encoding="UTF-8") as file:
            file.write('@prefix %s: <%s> .\n' % (prefix,uri))
            file.write('\n')
    
            while True: 

                try:
                    index,row = next(i)
                except StopIteration:
                    return
                
                data_dict=row.to_dict()

                l=[]
                for k,v in data_dict.items():
                    l.append('%s:%s "%s"' % (prefix,k,v))

                file.write('[] %s . \n\n' % ' ;\n\t'.join(l))
    
                if index%500==0:
                    filesize_mb=os.path.getsize(filename+'_'+str(file_index)+'.ttl')/(1024*1024.0)
                    if filesize_mb>40:
                        file_index+=1
                        break

In [50]:
kls=ukds.DataTable
kls.to_ttl=to_ttl
dt=kls()
dt.read_tab(dt_fp)
dt.to_ttl('tab',prefix='o8128',uri='http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/')
with open('tab_0.ttl','r') as file:
    print(file.read()[:10000])

@prefix o8128: <http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/> .

[] o8128:serial "11010903" ;
	o8128:strata "-2" ;
	o8128:psu "-2" ;
	o8128:HhOut "598" ;
	o8128:hh_wt "" ;
	o8128:IMonth "9" ;
	o8128:IYear "2014" ;
	o8128:DM014 "0" ;
	o8128:DM016 "0" ;
	o8128:DM510 "0" ;
	o8128:DM1115 "0" ;
	o8128:DM1619 "0" ;
	o8128:NumAdult "2" ;
	o8128:NumChild "0" ;
	o8128:NumSSex "0" ;
	o8128:NumCPart "0" ;
	o8128:NumMPart "2" ;
	o8128:NumCivP "0" ;
	o8128:DVHsize "2" ;
	o8128:Relsize "1" ;
	o8128:SelPer "1" ;
	o8128:CCPersNo "-1" ;
	o8128:Accom "1" ;
	o8128:Hhldr1 "1" ;
	o8128:Hhldr2 "1" ;
	o8128:Hhldr3 "0" ;
	o8128:Hhldr4 "0" ;
	o8128:Hhldr5 "0" ;
	o8128:Hhldr6 "0" ;
	o8128:Hhldr7 "0" ;
	o8128:Hhldr8 "0" ;
	o8128:Hhldr9 "0" ;
	o8128:Hhldr10 "0" ;
	o8128:HiHNum "1" ;
	o8128:Tenure "1" ;
	o8128:NumRooms "8" ;
	o8128:TVSet "1" ;
	o8128:TVSetNum "1" ;
	o8128:Cable "2" ;
	o8128:CableNum "-1" ;
	o8128:Games "2" ;
	o8128:GamesNum "-1" ;
	o8128:Land "1" ;
	o8128:LandNum "1" ;
	o8128:Mob "1" ;
	o

### Some SPARQL queries

### Which predicates are used?

This takes a minute to run

In [50]:
from pandas.io.json import json_normalize
import json
query="""
PREFIX o8128: <http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/>

SELECT DISTINCT ?p
WHERE
    {
        ?s ?p ?o .
    }
#LIMIT 100

"""
df=json_normalize(json.loads(g.query(query).serialize(format='json'))['results']['bindings'])
df['p.value']=df['p.value'].str.replace(r'http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/','o8128:')
df

Unnamed: 0,p.type,p.value
0,uri,o8128:Mob
1,uri,o8128:PaidTim6
2,uri,o8128:Relate6_P1
3,uri,o8128:Relate1_P1
4,uri,o8128:Help19
5,uri,o8128:NPaidT11
6,uri,o8128:NPaidDa3
7,uri,o8128:PaidHrs5
8,uri,o8128:Hhldr8
9,uri,o8128:Relate7_P2


### Number of adults in household

In [40]:

df

Unnamed: 0,o.type,o.value,p.type,p.value,s.type,s.value
0,literal,1.0,uri,o8128:Mob,bnode,N63bea323102f42be8b0509544f75e399
1,literal,-1.0,uri,o8128:PaidTim6,bnode,N7eaf5455902540068d1c796e22305daf
2,literal,-2.0,uri,o8128:Relate6_P1,bnode,N0b65372d14384d75857850d48322fa29
3,literal,0.0,uri,o8128:Relate1_P1,bnode,N0ebb075fb672496a9d9f7fd58d3addfe
4,literal,0.0,uri,o8128:Help19,bnode,N237febd49c2f4fd49564bd2e70002d2b
5,literal,-1.0,uri,o8128:NPaidT11,bnode,Nf3d9f65c36f6493ca259644982cdff55
6,literal,-1.0,uri,o8128:NPaidDa3,bnode,N789d5c23056641e792e1a6d9cae587f1
7,literal,-1.0,uri,o8128:PaidHrs5,bnode,N3f9956db66f54476bd2732006d96ba9a
8,literal,0.0,uri,o8128:Hhldr8,bnode,Nf27a3eaa51c0469a99fedf500ea6e4b6
9,literal,,uri,o8128:Relate7_P2,bnode,Nbf54717b93f1439eb5cc4924e89e28a1


In [44]:
df.dtypes

o.type     object
o.value    object
p.type     object
p.value    object
s.type     object
s.value    object
dtype: object

In [45]:
for x in g.query(query):
    print(x)

(rdflib.term.URIRef('http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/Mob'), rdflib.term.BNode('N63bea323102f42be8b0509544f75e399'), rdflib.term.Literal('1'))
(rdflib.term.URIRef('http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/PaidTim6'), rdflib.term.BNode('N7eaf5455902540068d1c796e22305daf'), rdflib.term.Literal('-1'))
(rdflib.term.URIRef('http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/Relate6_P1'), rdflib.term.BNode('N0b65372d14384d75857850d48322fa29'), rdflib.term.Literal('-2'))
(rdflib.term.URIRef('http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/Relate1_P1'), rdflib.term.BNode('N0ebb075fb672496a9d9f7fd58d3addfe'), rdflib.term.Literal('0'))
(rdflib.term.URIRef('http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/Help19'), rdflib.term.BNode('N237febd49c2f4fd49564bd2e70002d2b'), rdflib.term.Literal('0'))
(rdflib.term.URIRef('http://purl.org/berg/ontology/10.5255/UKDA-SN-8128-1/NPaidT11'), rdflib.term.BNode('Nf3d9f65c36f6493ca259644982cdff55'), rdflib.term.Literal