In [34]:
# coding: utf-8

# Converting AHRC Data to RDF
# 
# 2016.12.03

# 1. Find each entity in the JSON file and assign it a variable name in Python

# Use safeJSON to import the file and convert it to a Python object (I originally used json but found that it was difficult to deal with values that do not exist - safeJSON resolves this by replacing any nonexistent values with SafeNone).

import safeJSON
from pprint import pprint

import rdflib
from rdflib import Graph, Literal, BNode, Namespace, RDF, URIRef, XSD
from rdflib.namespace import DC, FOAF, SKOS

#Import namespaces
ahproject = Namespace('http://data.open.ac.uk/meta/ontology/ahproject#')
dataopen = Namespace('http://data.open.ac.uk/meta/ontology/')
doap = Namespace('http://usefulinc.com/ns/doap#')
fabio = Namespace('http://purl.org/spar/fabio/') 
frapo = Namespace('http://purl.org/cerif/frapo/') 
gr = Namespace('http://purl.org/goodrelations/v1#')
org = Namespace('http://www.w3.org/ns/org#')
prism = Namespace('http://prismstandard.org/namespaces/basic/2.0/')  
projectfunding = Namespace("http://vocab.ox.ac.uk/projectfunding#")
vcard = Namespace('http://www.w3.org/2006/vcard/ns#')
vivo = Namespace('http://vivoweb.org/ontology/core#')

#Define variable for the graph:
g = Graph()

for x in range (1,5440):
    x = str(x)
    json_file = 'json_files/file' + x + '.json'

    with open(json_file) as data_file:    
        data = safeJSON.load(data_file)

# Project URI
# Identify the element for the project's ID (projectOverview.projectComposition.project.id), and assign it to the variable project_id:

    project_id = data['projectOverview']['projectComposition']['project']['id']

# Construct the project's URI:

    ahproject_base_uri = 'http://data.open.ac.uk/meta/ontology/ahproject/'
    if (type(project_id) != safeJSON.SafeNoneClass):
        project_uri = ahproject_base_uri + 'project/' + project_id

# Terms directly linked to Project

# Title
# Identify the element for the project's title (projectOverview.projectComposition.project.title), and assign it to the variable project_title:

        project_title = data['projectOverview']['projectComposition']['project']['title']

# Status
# Identify the element for the project's status (projectOverview.projectComposition.project.status), and assign it to the variable project_status:

        project_status = data['projectOverview']['projectComposition']['project']['status']

# Abstract
# Identify the element for the project's abstract (projectOverview.projectComposition.project.abstract), and assign it to the variable project_abstract:

        project_abstract = data['projectOverview']['projectComposition']['project']['abstractText']

# URL
# Identify the element for the project's URL (projectOverview.projectComposition.project.url), and assign it to the variable project_url:

        project_url = data['projectOverview']['projectComposition']['project']['url']

# Potential impact
# Identify the element for the project's potential impact (projectOverview.projectComposition.project.potentialImpactText), and assign it to the variable project_potential_impact:

        project_potential_impact = data['projectOverview']['projectComposition']['project']['potentialImpactText']

# ###Subject and Topic Keywords
# Define the list variables 'research_topics' and 'research_subjects':

        research_topics = data['projectOverview']['projectComposition']['project']['researchTopic']

        research_subjects = data['projectOverview']['projectComposition']['project']['researchSubject']

# We can then iterate through these lists in a for loop and convert the results to RDF, as shown in section 2, below.

# Fund and related terms

# Fund
# Construct a URI for 'fund':

        fund_uri = project_uri + '#fund'

# Identify the element for the fund type (projectOverview.projectComposition.project.fund.type), and assign it to the variable fund_type:

        fund_type = data['projectOverview']['projectComposition']['project']['fund']['type']

# Grant reference
# Identify the element for the project's grant reference (projectOverview.projectComposition.project.grantReference), and assign it to the variable grant_reference:

        grant_reference = data['projectOverview']['projectComposition']['project']['grantReference']

# Grant category
# Identify the element for the project's grant category (projectOverview.projectComposition.project.grantCategory), and assign it to the variable grant_category:

        grant_category = data['projectOverview']['projectComposition']['project']['grantCategory']

# Start date
# Identify the element for the project's start date (projectOverview.projectComposition.project.fund.start), and assign it to the variable fund_start:

        fund_start = data['projectOverview']['projectComposition']['project']['fund']['start']

# Convert fund_start from a string to a datetime object:

        from datetime import datetime

        fund_start_datetime = datetime.strptime(fund_start,'%Y-%m-%d')

# End date
# Identify the element for the project's end date (projectOverview.projectComposition.project.fund.end), and assign it to the variable fund_end:

        fund_end = data['projectOverview']['projectComposition']['project']['fund']['end']

# Convert fund_end from a string to a datetime object:

        fund_end_datetime = datetime.strptime(fund_end,'%Y-%m-%d')

# Funder and related terms

# Funder
# Construct a URI for 'funder':
# Identify the element for the project's funder ID (projectOverview.projectComposition.project.fund.funder.id), and assign it to the variable funder_id:

        funder_id = data['projectOverview']['projectComposition']['project']['fund']['funder']['id']

# Build the URI funder_uri using the base URI for AHProject, and specifying that it refers to an organisation:

        funder_uri = ahproject_base_uri + 'organisation/' + funder_id

# Funder name

# Identify the element for the project's funder name (projectOverview.projectComposition.project.fund.funder.name), and assign it to the variable funder_name:

        funder_name = data['projectOverview']['projectComposition']['project']['fund']['funder']['name']

# Funder URL

# Identify the element for the project's funder URL (projectOverview.projectComposition.project.fund.funder.url), and assign it to the variable funder_url:

        funder_url = data['projectOverview']['projectComposition']['project']['fund']['funder']['url']

# Lead Research Organisation and related terms

# Lead Research Organisation
# Construct a URI for 'Lead Research Organisation', by extracting the ID and using the AHProject base URI:

        lead_research_org_id = data['projectOverview']['projectComposition']['leadResearchOrganisation']['id']

        lead_research_org_uri = ahproject_base_uri + 'organisation/' + lead_research_org_id

# Lead Research Organisation Name
# Identify the element for the lead research organisation's name (projectOverview.projectComposition.leadResearchOrganization.name), and assign it to the variable lead_research_org_name:

        lead_research_org_name = data['projectOverview']['projectComposition']['leadResearchOrganisation']['name']

# Department
# Identify the element for the department (projectOverview.projectComposition.leadResearchOrganization.department), and assign it to the variable lead_research_org_dept:

        lead_research_org_dept = data['projectOverview']['projectComposition']['leadResearchOrganisation']['department']

# Lead Research Organisation Type
# Identify the element for the type of lead research organisation (projectOverview.projectComposition.leadResearchOrganization.typeInd), and assign it to the variable lead_research_org_type:

        lead_research_org_type = data['projectOverview']['projectComposition']['leadResearchOrganisation']['typeInd']

# Lead Research Organisation URL
# Identify the element for the lead research organisation's URL (projectOverview.projectComposition.leadResearchOrganization.url), and assign it to the variable lead_research_org_url:

        lead_research_org_url = data['projectOverview']['projectComposition']['leadResearchOrganisation']['url']

# Lead Research Organisation Address
# Construct the URI for the lead research organisation's address:

        lead_research_org_address_uri = lead_research_org_uri + '#address'

# Address Lines 1-5
# Identify the elements for lines 1-5 of the lead research organisation's address (projectOverview.projectComposition.leadResearchOrganization.address.line1-projectOverview.projectComposition.leadResearchOrganization.address.line5), and assigning them to the variables lead_research_org_address_line1-lead_research_org_address_line5:

        lead_research_org_address_line1 = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['line1']
        lead_research_org_address_line2 = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['line2']
        lead_research_org_address_line3 = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['line3']
        lead_research_org_address_line4 = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['line4']
        lead_research_org_address_line5 = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['line5']

# Construct a string that concatenates all existing values:

        lead_research_org_address_lines = ''
        if (type(lead_research_org_address_line1) != safeJSON.SafeNoneClass):
            lead_research_org_address_lines = lead_research_org_address_lines + lead_research_org_address_line1
            if ((type(lead_research_org_address_line2) != safeJSON.SafeNoneClass) or (type(lead_research_org_address_line3) != safeJSON.SafeNoneClass) or (type(lead_research_org_address_line4) != safeJSON.SafeNoneClass) or (type(lead_research_org_address_line5) != safeJSON.SafeNoneClass)):
                lead_research_org_address_lines = lead_research_org_address_lines + ', '
        if (type(lead_research_org_address_line2) != safeJSON.SafeNoneClass):
            lead_research_org_address_lines = lead_research_org_address_lines + lead_research_org_address_line2
            if ((type(lead_research_org_address_line3) != safeJSON.SafeNoneClass) or (type(lead_research_org_address_line4) != safeJSON.SafeNoneClass) or (type(lead_research_org_address_line5) != safeJSON.SafeNoneClass)):
                lead_research_org_address_lines = lead_research_org_address_lines + ', '
        if (type(lead_research_org_address_line3) != safeJSON.SafeNoneClass):
            lead_research_org_address_lines = lead_research_org_address_lines + lead_research_org_address_line3
            if ((type(lead_research_org_address_line4) != safeJSON.SafeNoneClass) or (type(lead_research_org_address_line5) != safeJSON.SafeNoneClass)):
                lead_research_org_address_lines = lead_research_org_address_lines + ', '
        if (type(lead_research_org_address_line4) != safeJSON.SafeNoneClass):
            lead_research_org_address_lines = lead_research_org_address_lines + lead_research_org_address_line4
            if (type(lead_research_org_address_line5) != safeJSON.SafeNoneClass):
                lead_research_org_address_lines = lead_research_org_address_lines + ', '
        if (type(lead_research_org_address_line5) != safeJSON.SafeNoneClass):
            lead_research_org_address_lines = lead_research_org_address_lines + lead_research_org_address_line5

# Replace special characters resulting from line breaks in the original text with commas:

        lead_research_org_address_lines = lead_research_org_address_lines.replace("\r\n", ", ")

# Postcode
# Identify the element for the lead research organisation's postcode (projectOverview.projectComposition.leadResearchOrganization.address.postCode), and assign it to the variable lead_research_org_postcode:

        lead_research_org_postcode = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['postCode']

# Region
# Identify the element for the lead research organisation's region (projectOverview.projectComposition.leadResearchOrganization.address.region), and assign it to the variable lead_research_org_region:

        lead_research_org_region = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['region']

# Country
# Identify the element for the lead research organisation's country (projectOverview.projectComposition.leadResearchOrganization.address.country), and assign it to the variable lead_research_org_country:

        lead_research_org_country = data['projectOverview']['projectComposition']['leadResearchOrganisation']['address']['country']

# Person and related terms

# Define the 'people' object:

        people = data['projectOverview']['projectComposition']['personRole']

# Identify the role for each person:

        for person in people:
            person_role = person['role']

# Identify the name of each role:

        for role in person_role:
            person_role_name = role['name']

# We can then iterate through this object in a for loop and convert the results to RDF, as shown in section 2, below.

# Publication and related terms
# Define the 'publications' object:

        publications = data['projectOverview']['projectComposition']['project']['publication']

# Where there are no publications, the value of publications is '[]'. The type of the publications object is always safeJSON.SafeList.
# We can then iterate through this object in a for loop and convert the results to RDF, as shown in section 2, below.

# Collaborating Organisations and related terms

# Define the 'collaborators' object:

        collaborators = data['projectOverview']['projectComposition']['collaborator']

# We can then iterate through this object in a for loop and convert the results to RDF, as shown in section 2, below.

# Outputs

# Define the 'outputs' object:

        outputs = data['projectOverview']['projectComposition']['project']['output']

# We can then iterate through these objects in a for loop and convert the results to RDF, as shown in section 2, below.

# 2. Use RDFLib to construct relationships between variables from the JSON file and external ontologies

#Convert the URI variables defined above into URI references:
        project = URIRef(project_uri)
        fund = URIRef(fund_uri)
        funder = URIRef(funder_uri)
        lead_research_org = URIRef(lead_research_org_uri)
        lead_research_org_address = URIRef(lead_research_org_address_uri)

# Add triples using store's add method.

#Terms directly linked to Project
        g.add( (project, RDF.type, projectfunding.Project ))
        if (type(project_url) != safeJSON.SafeNoneClass):
            g.add( (project, FOAF.homepage, Literal(project_url,datatype=XSD.string) ))
        if (type(project_status) != safeJSON.SafeNoneClass):
            g.add( (project, dataopen.status, Literal(project_status,datatype=XSD.string) ))
        if (type(project_title) != safeJSON.SafeNoneClass):
            g.add( (project, DC.title, Literal(project_title,datatype=XSD.string) ))
        if (type(project_abstract) != safeJSON.SafeNoneClass):
            g.add( (project, DC.abstract, Literal(project_abstract,datatype=XSD.string) ))
        if (type(project_potential_impact) != safeJSON.SafeNoneClass):
            g.add( (project, ahproject.potentialImpact, Literal(project_potential_impact,datatype=XSD.string) ))

#Subject and Topic keywords
        for research_topic in research_topics:
            research_topic_id = research_topic['id']
            research_topic_text = research_topic['text']
            if (type(research_topic_id) != safeJSON.SafeNoneClass):
                research_topic_uri_string = ahproject_base_uri + 'topic/' + research_topic_id
                research_topic_uri = URIRef(research_topic_uri_string)
                g.add( (research_topic_uri, RDF.type, SKOS.Concept ))
                g.add( (project, DC.subject, research_topic_uri ))
                if (type(research_topic_text) != safeJSON.SafeNoneClass):
                    g.add( (research_topic_uri, SKOS.prefLabel, Literal(research_topic_text,datatype=XSD.string) ))

        for research_subject in research_subjects:
            research_subject_id = research_subject['id']
            research_subject_text = research_subject['text']
            if (type(research_subject_id) != safeJSON.SafeNoneClass):
                research_subject_uri_string = ahproject_base_uri + 'subject/' + research_subject_id
                research_subject_uri = URIRef(research_subject_uri_string)  
                g.add( (research_subject_uri, RDF.type, SKOS.Concept ))
                g.add( (project, DC.subject, research_subject_uri ))
                if (type(research_subject_text) != safeJSON.SafeNoneClass):
                    g.add( (research_subject_uri, SKOS.prefLabel, Literal(research_subject_text,datatype=XSD.string) ))

#Fund and related terms
        g.add( (fund, RDF.type, projectfunding.Funding ))
        g.add( (fund, projectfunding.funds, project ))
        g.add( (fund, projectfunding.grantNumber, Literal(grant_reference,datatype=XSD.string) ))
        if (type(fund_type) != safeJSON.SafeNoneClass):
            g.add( (fund, gr.category, Literal(fund_type,datatype=XSD.string) ))
        if (type(grant_category) != safeJSON.SafeNoneClass):
            g.add( (fund, doap.category, Literal(grant_category,datatype=XSD.string) ))
        g.add( (fund, projectfunding.startDate, Literal(fund_start_datetime,datatype=XSD.dateTime) ))
        g.add( (fund, projectfunding.endDate, Literal(fund_end_datetime,datatype=XSD.dateTime) ))

#Funder and related terms
        g.add( (funder, RDF.type, projectfunding.FundingBody ))
        g.add( (funder, projectfunding.provides, fund ))
        if (type(funder_name) != safeJSON.SafeNoneClass):
            g.add( (funder, vcard.hasOrganizationName, Literal(funder_name,datatype=XSD.string) ))
        if (type(funder_url) != safeJSON.SafeNoneClass):
            g.add( (funder, FOAF.homepage, Literal(funder_url,datatype=XSD.string) ))

#Lead Research Organisation and related terms
        g.add( (lead_research_org, RDF.type, org.Organization ))
        g.add( (lead_research_org, org.HeadOf, project ))
        if (type(lead_research_org_name) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org, vcard.hasOrganizationName, Literal(lead_research_org_name,datatype=XSD.string) ))
        if (type(lead_research_org_dept) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org, vcard.hasOrganizationUnit, Literal(lead_research_org_dept,datatype=XSD.string) ))
        if (type(lead_research_org_type) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org, org.classification, Literal(lead_research_org_type,datatype=XSD.string) ))
        if (type(lead_research_org_url) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org, FOAF.homepage, Literal(lead_research_org_url,datatype=XSD.string) ))
        g.add( (lead_research_org, org.siteAddress, lead_research_org_address ))
        g.add( (lead_research_org_address, RDF.type, vcard.Address ))
        if (type(lead_research_org_address_lines) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org_address, frapo.hasPostalAddressLine, Literal(lead_research_org_address_lines,datatype=XSD.string) ))
        if (type(lead_research_org_postcode) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org_address, vcard.hasPostalCode, Literal(lead_research_org_postcode,datatype=XSD.string) ))
        if (type(lead_research_org_region) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org_address, vcard.region, Literal(lead_research_org_region,datatype=XSD.string) ))
        if (type(lead_research_org_country) != safeJSON.SafeNoneClass):
            g.add( (lead_research_org_address, vcard.hasCountryName, Literal(lead_research_org_country,datatype=XSD.string) ))

#People
        for person in people:
            person_id = person['id']
            person_firstname = person['firstName']
            person_surname = person['surname']
            person_url = person['url']
            person_role = person['role']
            if (type(person_id) != safeJSON.SafeNoneClass):
                person_uri_string = ahproject_base_uri + 'person/' + person_id
                person_uri = URIRef(person_uri_string)
                for role in person_role:
                    person_role_name = role['name']
                    if (type(person_role_name) != safeJSON.SafeNoneClass):
                        if (person_role_name == 'PRINCIPAL_INVESTIGATOR'):
                            g.add( (project, projectfunding.hasPrincipalInvestigator, person_uri ))
                        else:
                            g.add( (project, projectfunding.hasCoInvestigator, person_uri ))
                g.add( (person_uri, RDF.type, FOAF.Person ))
                if (type(person_firstname) != safeJSON.SafeNoneClass):
                    g.add( (person_uri, FOAF.givenName, Literal(person_firstname,datatype=XSD.string) ))
                if (type(person_surname) != safeJSON.SafeNoneClass):
                    g.add( (person_uri, FOAF.familyName, Literal(person_surname,datatype=XSD.string) ))
                if (type(person_url) != safeJSON.SafeNoneClass):
                    g.add( (person_uri, FOAF.homepage, Literal(person_url,datatype=XSD.string) ))

#Publications
        for publication in publications:
            publication_id = publication['id']
            publication_title = publication['title']
            publication_url = publication['url']
            publication_parent = publication['parentPublicationTitle']
            publication_isbn = publication['isbn']
            publication_date_str = publication['date']
            if (type(publication_id) != safeJSON.SafeNoneClass):
                publication_uri_string = ahproject_base_uri + 'publication/' + publication_id
                publication_uri = URIRef(publication_uri_string)
                g.add( (project, frapo.hasOutput, publication_uri ))
                g.add( (publication_uri, RDF.type, DC.BibliographicResource ))
                if (type(publication_title) != safeJSON.SafeNoneClass):
                    g.add( (publication_uri, DC.title, Literal(publication_title,datatype=XSD.string) ))
                if (type(publication_url) != safeJSON.SafeNoneClass):
                    g.add( (publication_uri, fabio.hasURL, Literal(publication_url,datatype=XSD.string) ))
                if (type(publication_parent) != safeJSON.SafeNoneClass):
                    g.add( (publication_uri, DC.isPartOf, Literal(publication_parent,datatype=XSD.string) ))
                if (type(publication_isbn) != safeJSON.SafeNoneClass):
                    g.add( (publication_uri, prism.isbn, Literal(publication_isbn,datatype=XSD.string) ))
                if (type(publication_date_str) != safeJSON.SafeNoneClass):
                    publication_date = datetime.strptime(publication_date_str,'%Y-%m-%d')
                    g.add( (publication_uri, DC.issued, Literal(publication_date,datatype=XSD.dateTime) ))
        
#Collaborating Organisations
        for collab_org in collaborators:
            collab_org_id = collab_org['id']
            collab_org_name = collab_org['name']
            collab_org_url = collab_org['url']
            collab_org_address_line1 = collab_org['address']['line1']
            collab_org_address_line2 = collab_org['address']['line2']
            collab_org_address_line3 = collab_org['address']['line3']
            collab_org_address_line4 = collab_org['address']['line4']
            collab_org_address_line5 = collab_org['address']['line5']
            collab_org_postcode = collab_org['address']['postCode']
            collab_org_region = collab_org['address']['region']
            collab_org_country = collab_org['address']['country']
            if (type(collab_org_id) != safeJSON.SafeNoneClass):
                collab_org_uri_string = ahproject_base_uri + 'organisation/' + collab_org_id
                collab_org_uri = URIRef(collab_org_uri_string)
                g.add( (project, vivo.hasCollaborator, collab_org_uri ))
                g.add( (collab_org_uri, RDF.type, org.Organization ))
                if (type(collab_org_name) != safeJSON.SafeNoneClass):
                    g.add( (collab_org_uri, vcard.hasOrganizationName, Literal(collab_org_name,datatype=XSD.string) ))
                if (type(collab_org_url) != safeJSON.SafeNoneClass):
                    g.add( (collab_org_uri, FOAF.homepage, Literal(collab_org_url,datatype=XSD.string) ))
                if ((type(collab_org_address_line1) != safeJSON.SafeNoneClass) or (type(collab_org_address_line2) != safeJSON.SafeNoneClass) or (type(collab_org_address_line3) != safeJSON.SafeNoneClass) or (type(collab_org_address_line4) != safeJSON.SafeNoneClass) or (type(collab_org_address_line5) != safeJSON.SafeNoneClass) or (type(collab_org_postcode) != safeJSON.SafeNoneClass) or (type(collab_org_region) != safeJSON.SafeNoneClass) or (type(collab_org_country) != safeJSON.SafeNoneClass)):
                    collab_org_address_uri_string = collab_org_uri_string + '#address'
                    collab_org_address_uri = URIRef(collab_org_address_uri_string)
                    g.add( (collab_org_uri, org.siteAddress, collab_org_address_uri ))
                    g.add( (collab_org_address_uri, RDF.type, vcard.Address ))
                    if ((type(collab_org_address_line1) != safeJSON.SafeNoneClass) or (type(collab_org_address_line2) != safeJSON.SafeNoneClass) or (type(collab_org_address_line3) != safeJSON.SafeNoneClass) or (type(collab_org_address_line4) != safeJSON.SafeNoneClass) or (type(collab_org_address_line5) != safeJSON.SafeNoneClass)):
                        collab_org_address_lines = ''
                        if (type(collab_org_address_line1) != safeJSON.SafeNoneClass):
                            collab_org_address_lines = collab_org_address_lines + collab_org_address_line1
                            if ((type(collab_org_address_line2) != safeJSON.SafeNoneClass) or (type(collab_org_address_line3) != safeJSON.SafeNoneClass) or (type(collab_org_address_line4) != safeJSON.SafeNoneClass) or (type(collab_org_address_line5) != safeJSON.SafeNoneClass)):
                                collab_org_address_lines = collab_org_address_lines + ', '
                        if (type(collab_org_address_line2) != safeJSON.SafeNoneClass):
                            collab_org_address_lines = collab_org_address_lines + collab_org_address_line2
                            if ((type(collab_org_address_line3) != safeJSON.SafeNoneClass) or (type(collab_org_address_line4) != safeJSON.SafeNoneClass) or (type(collab_org_address_line5) != safeJSON.SafeNoneClass)):
                                collab_org_address_lines = collab_org_address_lines + ', '
                        if (type(collab_org_address_line3) != safeJSON.SafeNoneClass):
                            collab_org_address_lines = collab_org_address_lines + collab_org_address_line3
                            if ((type(collab_org_address_line4) != safeJSON.SafeNoneClass) or (type(collab_org_address_line5) != safeJSON.SafeNoneClass)):
                                collab_org_address_lines = collab_org_address_lines + ', '
                        if (type(collab_org_address_line4) != safeJSON.SafeNoneClass):
                            collab_org_address_lines = collab_org_address_lines + collab_org_address_line4
                            if (type(collab_org_address_line5) != safeJSON.SafeNoneClass):
                                collab_org_address_lines = collab_org_address_lines + ', '
                        if (type(collab_org_address_line5) != safeJSON.SafeNoneClass):
                            collab_org_address_lines = collab_org_address_lines + collab_org_address_line5
                        collab_org_address_lines = collab_org_address_lines.replace("\r\n", ", ")
                        g.add( (collab_org_address_uri, frapo.hasPostalAddressLine, Literal(collab_org_address_lines,datatype=XSD.string) ))
                    if (type(collab_org_postcode) != safeJSON.SafeNoneClass):
                        g.add( (collab_org_address_uri, vcard.hasPostalCode, Literal(collab_org_postcode,datatype=XSD.string) ))
                    if (type(collab_org_region) != safeJSON.SafeNoneClass):
                        g.add( (collab_org_address_uri, vcard.region, Literal(collab_org_region,datatype=XSD.string) ))
                    if (type(collab_org_country) != safeJSON.SafeNoneClass):
                        g.add( (collab_org_address_uri, vcard.hasCountryName, Literal(collab_org_country,datatype=XSD.string) ))

#Outputs
        for output_category in outputs:
# The JSON syntax for keyFindingsOutput renders it as a dictionary rather than a list, presumably because only one key findings output is permitted. This means it needs to be treated differently from the other output categories.
            if (output_category != 'keyFindingsOutput'):
                output_list = outputs[output_category]
                for output in output_list:
                    output_id = output['id']
                    if (type(output_id) != safeJSON.SafeNoneClass):
                        output_uri_string = ahproject_base_uri + 'output/' + output_id
                        output_uri = URIRef(output_uri_string)
                        g.add( (project, frapo.hasOutput, output_uri ))
                        output_class = output_category[0].upper() + output_category[1:]
                        output_class_uri_string = ahproject_base_uri + output_class
                        output_class_uri = URIRef(output_class_uri_string)
                        g.add( (output_uri, RDF.type, output_class_uri ))
                        output_description = output['description']
                        if (type(output_description) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, DC.description, Literal(output_description,datatype=XSD.string )))
                        output_title = output['title']
                        if (type(output_title) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, DC.title, Literal(output_title,datatype=XSD.string )))
                        output_impact = output['impact']
                        if (type(output_impact) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, ahproject.Impact, Literal(output_impact,datatype=XSD.string )))
                        output_url = output['url']
                        if (type(output_url) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, FOAF.homepage, Literal(output_url,datatype=XSD.string )))
                        output_type = output['type']
                        if (type(output_type) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, DC.type, Literal(output_type,datatype=XSD.string )))
                        output_sector = output['sector']
                        if (type(output_sector) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, ahproject.sector, Literal(output_sector,datatype=XSD.string )))
                        output_geographic_reach = output['geographicReach']
                        if (type(output_geographic_reach) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, ahproject.geographicReach, Literal(output_geographic_reach,datatype=XSD.string )))
                        output_year_first_provided = output['yearFirstProvided']
                        if (type(output_year_first_provided) != safeJSON.SafeNoneClass):
                            g.add( (output_uri, DC.available, Literal(output_year_first_provided,datatype=XSD.integer )))

# Iterate over triples in store and print them out.
# print("--- printing raw triples ---")
#for s, p, o in g:
    #print((s, p, o))

# Write the output to a Turtle file:

    #turtle_file = 'turtle_files/file' + x + '.ttl'
    
file = open('AHRCDataToRDF_AllFiles_V1_20161203.ttl', "w+b")

file.write(g.serialize(format='turtle'))

file.close()

#print(g.serialize(format='turtle'))

In [None]:
qres = g.query(
    """PREFIX dc: <http://purl.org/dc/elements/1.1/>
       PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
       PREFIX projectfunding: <http://vocab.ox.ac.uk/projectfunding#>
       SELECT DISTINCT ?project_title
       WHERE {
          ?project a projectfunding:Project .
          ?project dc:title ?project_title .
       }""")

for row in qres:
    print(row)

In [36]:
qres = g.query(
    """PREFIX dc: <http://purl.org/dc/elements/1.1/>
       PREFIX frapo: <http://purl.org/cerif/frapo/>
       PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
       PREFIX ahproject: <http://data.open.ac.uk/meta/ontology/ahproject/>
       SELECT DISTINCT ?project_title ?output_title
       WHERE {
          ?project dc:title ?project_title .
          ?project frapo:hasOutput ?output .
          ?output rdf:type ahproject:ResearchDatabaseAndModelOutput .
          ?output dc:title ?output_title .
       }""")

for row in qres:
    print("Project: %s; Output: %s" % row)

Project: Reconfiguring the Canon of Twentieth-Century Russian Poetry, 1991-2008; Output: Russian poetry database
Project: Mixing It: Diversity in Second World War Britain; Output: 15 Oral History interviews
Project: Human Experiments under National Socialism: Victims, Perpetrators and Post-War Trials; Output: Victims of Coerced Experiments and Research under National Socialism
Project: Records of Early English Drama, Middlesex/Westminster: Eight Theatres north of the Thames; Output: Records of Early English Drama, Middlesex/Westminster
Project: Mysticism, Myth and 'Celtic' Nationalism: A Case Study of Cornwall; Output: Archive of 60 oral history interviews housed at the Cornish Audio Visual Archive
Project: Megachurches and Social Engagement in London; Output: Jesus House Dataset
Project: A Pilot Historical Thesaurus of Scots; Output: Pilot Historical Thesaurus of Scots
Project: Design and innovation in the British Empire: a historical consideration of the innovation ecosystem; Output:

In [37]:
qres = g.query(
    """PREFIX dc: <http://purl.org/dc/elements/1.1/>
       PREFIX frapo: <http://purl.org/cerif/frapo/>
       PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
       PREFIX ahproject: <http://data.open.ac.uk/meta/ontology/ahproject/>
       SELECT DISTINCT ?project_title ?output_title
       WHERE {
          ?project dc:title ?project_title .
          ?project frapo:hasOutput ?output .
          ?output rdf:type ahproject:SoftwareAndTechnicalProductOutput .
          ?output dc:title ?output_title .
       }""")

for row in qres:
    print("Project: %s; Output: %s" % row)

Project: Crowd- and Community-fuelled Archaeological Research; Output: Forum software (Discourse fork)
Project: Creating a Web-Based Platform for English Language Teaching and Learning; Output: Englicious
Project: Extending the Englicious Platform for Primary English; Output: Englicious
Project: Stepping stones to the Neolithic? Islands, maritime connectivity and the 'western seaways' of Britain, 5000-3500 BC; Output: Google Earth sea level maps
Project: The Creative Exchange; Output: NewsDrop: a toolkit for supporting community journalism of parliamentary debates
Project: Empowering Data Citizens; Output: MobileMiner (Funf branch)
Project: The Creative Exchange; Output: Physical Playlist: Prototype
Project: Live Coding Network; Output: Threnoscope
Project: Live Coding Network; Output: Tidal
Project: Digital Music Lab - Analysing Big Music Data; Output: DML Research Information and Result Management System
Project: Semantic Technologies Enhancing Links and Linked data for Archaeologica

In [47]:
qres = g.query(
    """PREFIX dc: <http://purl.org/dc/elements/1.1/>
       PREFIX frapo: <http://purl.org/cerif/frapo/>
       PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
       PREFIX ahproject: <http://data.open.ac.uk/meta/ontology/ahproject/>
       PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
       PREFIX projectfunding: <http://vocab.ox.ac.uk/projectfunding#>
       SELECT DISTINCT ?project_title ?output_title ?start ?end
       WHERE {
          ?project dc:title ?project_title .
          ?project dc:subject ?subject .
          ?subject skos:prefLabel "Classics"^^xsd:string .
          ?project frapo:hasOutput ?output .
          ?output rdf:type ahproject:ResearchDatabaseAndModelOutput .
          ?output dc:title ?output_title .
          ?fund projectfunding:funds ?project .
          ?fund projectfunding:startDate ?start .
          ?fund projectfunding:endDate ?end .
       }""")

for row in qres:
    print("Project: %s; Output: %s; Start: %s; End: %s" % row)

Project: Hestia2: reading texts spatially; Output: A database has been compiled from the digital text of Herodotus; Start: 2013-07-01T00:00:00; End: 2014-06-30T00:00:00
Project: Network, Relation, Flow: Imaginations of Space in Herodotus' History; Output: A database has been compiled from the digital text of Herodotus; Start: 2008-09-01T00:00:00; End: 2009-07-01T00:00:00
Project: Poetry by Numbers, Then and Now: Metre, Mathematics, Machines and Manufacture; Output: Database of archival sources and mechanics; Start: 2014-12-31T00:00:00; End: 2016-06-30T00:00:00
Project: Network, Relation, Flow: Imaginations of Space in Herodotus' History; Output: The Perseus digital text of Herodotus was converted from TEI P4 to P5 and was subject to intense data cleaning; Start: 2009-07-01T00:00:00; End: 2010-07-31T00:00:00
Project: Online Corpus of the Inscriptions of Ancient North Arabia; Output: OCIANA (Database); Start: 2013-10-01T00:00:00; End: 2017-03-31T00:00:00
Project: Imaging Papyri at Oxford