In [45]:
import pandas as pd

In [1]:
from rdflib import Graph, Literal, RDF, URIRef
from rdflib.namespace import XSD

# Creating an RDF graph
g = Graph()

# Namespace definitions
base_uri = "http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#"
ns = URIRef(base_uri)

# Custom namespaces for properties and classes
PAPER = URIRef(base_uri + "Paper")
AUTHOR = URIRef(base_uri + "Author")
YEAR = URIRef(base_uri + "Year")


Creating from paper_details.csv to paper_details.ttl

In [7]:
import csv

# Assuming the CSV file is loaded as `paper_details.csv`
with open('data/papers_details.csv', newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        # Create a URI for each paper based on its ID
        paper_uri = URIRef(ns + row['paperId'])
        
        # Add RDF types and literals for properties
        g.add((paper_uri, RDF.type, PAPER))
        g.add((paper_uri, URIRef(ns + "title"), Literal(row['title'], datatype=XSD.string)))
        g.add((paper_uri, URIRef(ns + "abstract"), Literal(row['abstract'], datatype=XSD.string)))
        g.add((paper_uri, URIRef(ns + "year"), Literal(row['year'], datatype=XSD.gYear)))
        g.add((paper_uri, URIRef(ns + "keywords"), Literal(row['keywords'], datatype=XSD.string)))
        g.add((paper_uri, URIRef(ns + "doi"), Literal(row['doi'], datatype=XSD.string)))

        # Additional properties would be added here based on availability

# Serialize graph to Turtle format and save
g.serialize(destination='turtle/papers_details.ttl', format='turtle')
print("Serialized and saved the Turtle file.")



Serialized and saved the Turtle file.


Testing paper_details.ttl 

In [21]:
from rdflib import Graph, URIRef

# Load the Turtle file into an RDFLib graph
g = Graph()
g.parse("turtle/papers_details.ttl", format="turtle")

# Namespace of your ontology
base_uri = "http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#"
PAPER = URIRef(base_uri + "Paper")

# Test 1: Check if papers are loaded correctly
papers = list(g.subjects(RDF.type, PAPER))
print(f"Total papers loaded: {len(papers)}")

# Test 2: Print titles and years of papers to verify data
for paper in papers:
    title = g.value(paper, URIRef(base_uri + "title"))
    year = g.value(paper, URIRef(base_uri + "year"))
    print(f"Title: {title}, Year: {year}")

# Optional: Run a SPARQL query to retrieve specific data
query = """
SELECT ?title ?year WHERE {
    ?paper a <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper>.
    ?paper <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#title> ?title.
    ?paper <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#year> ?year.
}
ORDER BY ?year
LIMIT 10
"""
for row in g.query(query):
    print(f"Title: {row.title}, Year: {row.year}")


Total papers loaded: 174
Title: Signature File Methods for Indexing Object-Oriented Database Systems, Year: 1992
Title: Enhance Student Learning with PowerPoint Games: Using Twenty Questions to Promote Active Learning in Managerial Accounting, Year: 2011
Title: Early Detection of Aphid Infestation and Insect-Plant Interaction Assessment in Wheat Using a Low-Cost Electronic Nose (E-Nose), Near-Infrared Spectroscopy and Machine Learning Modeling, Year: 2021
Title: O-Minimal Hybrid Reachability Games, Year: 2009
Title: Advanced Techniques for Content-Based Management of Multimedia Databases, Year: 2005
Title: Computational Trust Models and Machine Learning, Year: 2014
Title: Combining Machine Learning with Knowledge Engineering to detect Fake News in Social Networks - A Survey, Year: 2022
Title: Applying Machine Learning Techniques to Improve Linux Process Scheduling, Year: 2005
Title: Serious Games in Surgical Medical Education: A Virtual Emergency Department as a Tool for Teaching Clini

In [105]:
def escape_literal(text):
    """ Escapes quotes and controls characters in a text string for Turtle output. """
    if pd.isna(text):
        return ""
    # Escape backslashes first, then quotes, and replace newlines and carriage returns
    text = text.replace('\\', '\\\\').replace('"', '\\"').replace('\n', ' ').replace('\r', ' ')
    return text

# Load the CSV file
df = pd.read_csv('data/papers_details_enriched.csv')

# Define the base namespace for your papers
base_ns = "http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#"

# Open a file to write the Turtle content
with open('turtle/papers_details_enriched.ttl', 'w') as ttl_file:
    ttl_file.write('@prefix ns1: <{}> .\n'.format(base_ns))
    ttl_file.write('@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n')
    
    # Iterate over the DataFrame rows
    for index, row in df.iterrows():
        paper_id = 'Paper_' + str(row['paperId'])
        title = escape_literal(row['title'])
        abstract = escape_literal(row['abstract'])
        year = str(int(row['year'])) if pd.notna(row['year']) else ""
        keywords = escape_literal(row['keywords'])
        doi = escape_literal(row['doi'])

        # Create an entry for each paper with its details
        ttl_file.write('ns1:{} a ns1:Paper ;\n'.format(paper_id))
        ttl_file.write('    ns1:title "{}"^^xsd:string ;\n'.format(title))
        ttl_file.write('    ns1:abstract "{}"^^xsd:string ;\n'.format(abstract))
        ttl_file.write('    ns1:year "{}"^^xsd:gYear ;\n'.format(year))
        ttl_file.write('    ns1:keywords "{}"^^xsd:string ;\n'.format(keywords))
        ttl_file.write('    ns1:doi "{}"^^xsd:string .\n\n'.format(doi))

print("Turtle file has been created with sanitized literals.")


Turtle file has been created with sanitized literals.


In [103]:
import re

def escape_literals(input_line):
    # This pattern matches text between quotes
    pattern = re.compile(r'"(.*?)(?<!\\)"', re.DOTALL)
    
    def replace_func(match):
        # Replace internal quotes with escaped quotes and escape newlines within the string
        escaped_string = match.group(1).replace('"', '\\"').replace('\n', ' ')
        # Ensure we do not double escape already escaped quotes
        escaped_string = re.sub(r'\\\\"', r'\\"', escaped_string)
        return f"\"{escaped_string}\""
    
    return pattern.sub(replace_func, input_line)

input_file_path = 'turtle/papers_details_enriched.ttl'
output_file_path = 'papers_details_enriched_fixed.ttl'

with open(input_file_path, 'r', encoding='utf-8') as infile, open(output_file_path, 'w', encoding='utf-8') as outfile:
    for line in infile:
        corrected_line = escape_literals(line)
        outfile.write(corrected_line)

print(f"Corrected file written to {output_file_path}")


Corrected file written to papers_details_enriched_fixed.ttl


Converting authors.csv --> authors.ttl 

In [11]:
import csv
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, FOAF

# Define the namespaces
ex = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
schema = Namespace("http://schema.org/")

# Create a graph
g = Graph()

# Bind the namespaces
g.bind("ex", ex)
g.bind("foaf", FOAF)

# Function to add authors to the graph
def add_author(author_id, name, affiliation, email):
    author_uri = ex[f"Author_{author_id}"]
    g.add((author_uri, RDF.type, ex.Author))
    g.add((author_uri, FOAF.name, Literal(name)))
    g.add((author_uri, schema.email, Literal(email)))
    g.add((author_uri, ex.affiliated_with, Literal(affiliation)))

# Read from the CSV file
with open('data/authors.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        add_author(row['authorId'], row['name'], row['affiliations'], row['email'])

# Serialize the graph to Turtle format
g.serialize(destination='turtle/authors.ttl', format='turtle')
print("Conversion to Turtle completed.")



Conversion to Turtle completed.


In [14]:
import csv
import re

def clean_affiliation_name(name):
    # Remove unwanted characters and encode spaces
    return re.sub(r'[^a-zA-Z0-9\s]', '', name).replace(' ', '_')

# Open the CSV file and read it
with open('data/affiliations.csv', 'r', encoding='utf-8') as csv_file:
    reader = csv.DictReader(csv_file)
    affiliations = list(reader)

# Open the Turtle file for writing
with open('turtle/affiliations.ttl', 'w', encoding='utf-8') as ttl_file:
    ttl_file.write("@prefix ap: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#> .\n")
    ttl_file.write("@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n")
    ttl_file.write("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n")
    ttl_file.write("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n")
    ttl_file.write("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n")
    
    # Iterate through each row and write data in Turtle format
    for affiliation in affiliations:
        cleaned_name = clean_affiliation_name(affiliation['name'])
        uri = f"ap:Affiliation_{cleaned_name}"
        address = affiliation['address'].replace('\n', ', ')
        
        ttl_file.write(f"{uri} rdf:type ap:Affiliation .\n")
        ttl_file.write(f"{uri} ap:affiliation_name \"{affiliation['name']}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:type \"{affiliation['type']}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:address \"{address}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:affiliation_email \"{affiliation['email']}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:phone_number \"{affiliation['phone_number']}\"^^xsd:string .\n")
        ttl_file.write("\n")


In [52]:
import pandas as pd
import re

# Load the CSV file
df = pd.read_csv('data/affiliated_with.csv')

# Function to convert affiliation names to a safe IRI format
def affiliation_to_iri(name):
    # Remove or replace invalid characters for IRI
    name = re.sub(r'[^\w\s]', '', name)  # Removes any non-alphanumeric characters
    name = name.replace(' ', '_')  # Replace spaces with underscores
    return f'ex:Affiliation_{name}'

# Prefixes for the Turtle file
prefixes = """
@prefix ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

"""

# Open a file to write the Turtle content
with open('turtle/affiliated_with.ttl', 'w') as ttl_file:
    ttl_file.write(prefixes)
    
    # Iterate through the DataFrame and write each Turtle statement
    for index, row in df.iterrows():
        author_iri = f"ex:Author_{row['authorId']}"
        affiliation_iri = affiliation_to_iri(row['affiliation'])
        ttl_file.write(f"{author_iri} ex:affiliated_with {affiliation_iri} .\n")

print("Turtle file has been created.")


Turtle file has been created.


In [24]:
# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

# Create a new graph
g = Graph()

# Bind prefixes
g.bind("ex", EX)
g.bind("foaf", FOAF)

# Read CSV data
import csv
with open('data/journals.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new journal resource
        journal = EX[f"Journal_{row['ss_venue_id']}"]
        
        # Add properties to the journal
        g.add((journal, RDF.type, EX.Journal))
        g.add((journal, EX.name, Literal(row['name'])))
        g.add((journal, EX.issn, Literal(row['issn'])))
        g.add((journal, FOAF.homepage, URIRef(row['url'])))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/journals.ttl', format='turtle')


<Graph identifier=N17b68c34565148108b03a4eaedad4b08 (<class 'rdflib.graph.Graph'>)>

In [27]:
import rdflib
from rdflib import Graph, Literal, RDF, URIRef, Namespace

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

# Create a new graph
g = Graph()

# Bind prefixes
g.bind("ex", EX)
g.bind("foaf", FOAF)

# Read CSV data
import csv
with open('data/journals_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new journal resource
        journal = EX[f"Journal_{row['ss_venue_id']}"]
        
        # Add properties to the journal
        g.add((journal, RDF.type, EX.Journal))
        g.add((journal, EX.name, Literal(row['name'])))
        g.add((journal, FOAF.homepage, URIRef(row['url'])))
        g.add((journal, EX.year, Literal(row['year'], datatype=rdflib.XSD.integer)))
        g.add((journal, EX.volume, Literal(row['volume'], datatype=rdflib.XSD.integer)))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/journals_enriched.ttl', format='turtle')


<Graph identifier=N09a9ad2604d94417be794e57698901d3 (<class 'rdflib.graph.Graph'>)>

In [29]:
import rdflib
from rdflib import Graph, Literal, RDF, URIRef, Namespace

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

# Create a new graph
g = Graph()

# Bind prefixes
g.bind("ex", EX)
g.bind("foaf", FOAF)

# Read CSV data
import csv
with open('data/conferences.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new conference resource
        conference = EX[f"Conference_{row['ss_venue_id']}"]
        
        # Add properties to the conference
        g.add((conference, RDF.type, EX.Conference))
        g.add((conference, EX.name, Literal(row['name'])))
        if row['url']:  # Ensure there is a URL before adding
            g.add((conference, FOAF.homepage, URIRef(row['url'])))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/conferences.ttl', format='turtle')


<Graph identifier=Nd1e553ddca604758b4289957e3fd5cee (<class 'rdflib.graph.Graph'>)>

In [31]:
import rdflib
from rdflib import Graph, Literal, RDF, URIRef, Namespace

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

# Create a new graph
g = Graph()

# Bind prefixes
g.bind("ex", EX)
g.bind("foaf", FOAF)

# Read CSV data
import csv
with open('data/conferences_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new conference resource
        conference = EX[f"Conference_{row['ss_venue_id']}"]
        
        # Add properties to the conference
        g.add((conference, RDF.type, EX.Conference))
        g.add((conference, EX.name, Literal(row['name'])))
        if row['url']:  # Ensure there is a URL before adding
            g.add((conference, FOAF.homepage, URIRef(row['url'])))
        g.add((conference, EX.city, Literal(row['city'])))
        g.add((conference, EX.year, Literal(row['year'], datatype=rdflib.XSD.gYear)))
        g.add((conference, EX.edition, Literal(int(row['edition']))))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/conferences_enriched.ttl', format='turtle')


<Graph identifier=N33b6d35781364f17810c30aee0241750 (<class 'rdflib.graph.Graph'>)>

In [33]:
import rdflib
from rdflib import Graph, Literal, RDF, URIRef, Namespace

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")

# Create a new graph
g = Graph()

# Bind prefixes
g.bind("ex", EX)
g.bind("xsd", XSD)

# Read CSV data
import csv
with open('data/reviews.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new review resource
        review = EX[f"Review_{row['review_id']}"]
        
        # Add properties to the review
        g.add((review, RDF.type, EX.Review))
        g.add((review, EX.decision, Literal(row['decision'])))
        g.add((review, EX.date, Literal(row['date'], datatype=XSD.date)))
        g.add((review, EX.abstract, Literal(row['abstract'])))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/reviews.ttl', format='turtle')


<Graph identifier=Nd104cc9b20c3459a9f3bca18a8c49faa (<class 'rdflib.graph.Graph'>)>

In [35]:
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/written_by.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create relationships
        paper = EX[f"Paper_{row['paperId']}"]
        author = EX[f"Author_{row['authorId']}"]
        
        # Add "written by" relationship
        g.add((paper, EX.written_by, author))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/written_by.ttl', format='turtle')


<Graph identifier=Na747425b959646698d6e72bd98cbe5dd (<class 'rdflib.graph.Graph'>)>

In [50]:
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/written_by_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create relationships
        paper = EX[f"Paper_{row['paperId']}"]
        author = EX[f"Author_{row['authorId']}"]
        
        # Add "written by" relationship
        g.add((paper, EX.written_by, author))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/written_by_enriched.ttl', format='turtle')


<Graph identifier=N3175b32670d74f318f1247cd95d2bf6b (<class 'rdflib.graph.Graph'>)>

In [37]:
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace, Literal

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/citations.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create citation relationships
        citing_paper = EX[f"Paper_{row['paperId']}"]
        cited_paper = EX[f"Paper_{row['referenceId']}"]
        year = Literal(row['year'], datatype=rdflib.XSD.integer)
        
        # Add citation relationship
        g.add((citing_paper, EX.cites_to, cited_paper))
        g.add((citing_paper, EX.citation_year, year))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/citations.ttl', format='turtle')


<Graph identifier=N82596b213ab1483a8c065039bbb0b5fe (<class 'rdflib.graph.Graph'>)>

In [40]:
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace, Literal

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/published_in_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for paper and venue
        paper = EX[f"Paper_{row['paper_id']}"]
        venue = EX[f"Venue_{row['ss_venue_id']}"]
        year = Literal(row['year'], datatype=rdflib.XSD.integer)
        
        # Add triples to the graph
        g.add((paper, EX.published_in, venue))
        g.add((paper, EX.publish_year, year))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/published_in_enriched.ttl', format='turtle')


<Graph identifier=Nd135c2c7ad8b4f899543e250c94fd9b2 (<class 'rdflib.graph.Graph'>)>

In [52]:
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace, Literal

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/published_in_enriched_v2.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for paper and venue
        paper = EX[f"Paper_{row['paper_id']}"]
        venue = EX[f"Venue_{row['ss_venue_id']}"]
        year = Literal(row['year'], datatype=rdflib.XSD.integer)
        
        # Add triples to the graph
        g.add((paper, EX.published_in, venue))
        g.add((paper, EX.publish_year, year))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/published_in_enriched_v2.ttl', format='turtle')


<Graph identifier=Nd6fcd5029f3b463397c48533e0008eb1 (<class 'rdflib.graph.Graph'>)>

In [42]:
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace, Literal

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/published_in.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for paper and venue
        paper = EX[f"Paper_{row['paper_id']}"]
        venue = EX[f"Venue_{row['ss_venue_id']}"]
        
        # Add triples to the graph
        g.add((paper, EX.published_in, venue))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/published_in.ttl', format='turtle')


<Graph identifier=N355f42db72c747159da90d1edd252f9c (<class 'rdflib.graph.Graph'>)>

In [45]:
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace, Literal

# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/reviewed_by.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for review and author
        review = EX[f"Review_{row['review_id']}"]
        author = EX[f"Author_{row['author_id']}"]
        
        # Add triples to the graph
        g.add((review, EX.reviewed_by, author))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/reviewed_by.ttl', format='turtle')


<Graph identifier=Nfb05bc38f45f450fa99efc09c46c62b9 (<class 'rdflib.graph.Graph'>)>

In [48]:
# Define namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# Create a new graph
g = Graph()

# Bind prefix
g.bind("ex", EX)

# Read CSV data
import csv
with open('data/review_on.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for review and paper
        review = EX[f"Review_{row['review_id']}"]
        paper = EX[f"Paper_{row['paper_id']}"]
        
        # Add triples to the graph
        g.add((review, EX.reviewed_on, paper))

# Serialize the graph in Turtle format
g.serialize(destination='turtle/review_on.ttl', format='turtle')


<Graph identifier=Nccbf3e20e2f64864a2e7b103d02ec329 (<class 'rdflib.graph.Graph'>)>

In [64]:
import rdflib
from urllib.parse import quote

# Function to correct IRIs in the graph
def correct_iri(graph):
    corrected_graph = rdflib.Graph()
    for s, p, o in graph:
        new_s = rdflib.URIRef(quote(str(s), safe=":/#"))
        new_o = o
        if isinstance(o, rdflib.URIRef):
            new_o = rdflib.URIRef(quote(str(o), safe=":/#"))
        corrected_graph.add((new_s, p, new_o))
    return corrected_graph

# Load your TTL file
g = rdflib.Graph()
try:
    g.parse("turtle/affiliations.ttl", format="ttl")
    corrected_g = correct_iri(g)
    # Save the corrected graph
    corrected_g.serialize("corrected_affiliated_with.ttl", format="ttl")
    print("Corrected TTL has been saved.")
except Exception as e:
    print(f"Error loading or correcting TTL: {e}")

Corrected TTL has been saved.


Now lets connect to out GraphDb, with some queries we can check if the abox is uploaded and connected sucesfully

In [1]:
pip install SPARQLWrapper

Collecting SPARQLWrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Installing collected packages: SPARQLWrapper
Successfully installed SPARQLWrapper-2.0.0
Note: you may need to restart the kernel to use updated packages.


Lets check if written_by_enriched and authours are connected. If we can reach, papers with the authour names we can verify it.

In [75]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Configure the SPARQL endpoint
sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")

# Define the query
query = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?paper ?authorName
WHERE {
  ?paper ex:written_by ?author .
  ?author foaf:name ?authorName .
}
LIMIT 15
"""

# Set the query and the return format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()
for result in results["results"]["bindings"]:
    print(f'Paper: {result["paper"]["value"]}, Author Name: {result["authorName"]["value"]}')


Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_00227f73b6547b43d9f6c22e0136ab1fadb7036c, Author Name: M. Preti
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_00227f73b6547b43d9f6c22e0136ab1fadb7036c-1, Author Name: M. Preti
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_00227f73b6547b43d9f6c22e0136ab1fadb7036c-2, Author Name: M. Preti
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_00227f73b6547b43d9f6c22e0136ab1fadb7036c-3, Author Name: M. Preti
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_00227f73b6547b43d9f6c22e0136ab1fadb7036c-4, Author Name: M. Preti
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_00227f73b6547b43d9f6c22e0136ab1fadb7036c-5, Author Name: M. Preti
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_00227f73b6547b43d9f6c22e0136ab1fadb7036c-6, Author Na

Lets check if authours are correctly linked to their respective institutions

In [76]:
def query_authors_and_affiliations():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX ap: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

    SELECT ?authorName ?affiliationName
    WHERE {
      ?author ex:affiliated_with ?affiliation .
      ?author foaf:name ?authorName .
      ?affiliation ap:affiliation_name ?affiliationName .
    }
    LIMIT 100
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    for result in results["results"]["bindings"]:
        print(f'Author Name: {result["authorName"]["value"]}, Affiliation Name: {result["affiliationName"]["value"]}')

# Run the function to query GraphDB
query_authors_and_affiliations()


Author Name: Yulyani Arifin, Affiliation Name: University of Washington
Author Name: Claudia Gonzalez Viejo, Affiliation Name: University of Washington
Author Name: M. Holanda, Affiliation Name: University of Washington
Author Name: S. Naqvi, Affiliation Name: University of Washington
Author Name: M. Mazzara, Affiliation Name: University of Washington
Author Name: P. H. Seo, Affiliation Name: University of Washington
Author Name: Dan Suciu, Affiliation Name: University of Washington
Author Name: S. Fuentes, Affiliation Name: University of Washington
Author Name: Emilio Molina, Affiliation Name: University of Washington
Author Name: J. Schmidhuber, Affiliation Name: University of Washington
Author Name: W. Kew, Affiliation Name: University of Washington
Author Name: A. Gionis, Affiliation Name: University of Washington
Author Name: B. Landfeldt, Affiliation Name: University of Washington
Author Name: A. Barto, Affiliation Name: University of Washington
Author Name: E. Bertino, Affiliati

Lets check authours and papers

In [84]:
def query_papers_with_authors():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ns1: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>

    SELECT ?paperTitle ?authorName
    WHERE {
      ?paper ns1:title ?paperTitle.
      ?paper ns1:written_by ?author.
      ?author foaf:name ?authorName.
    }
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    print("Papers with Their Authors:")
    for result in results["results"]["bindings"]:
        print(f'Paper Title: {result["paperTitle"]["value"]}, Author Name: {result["authorName"]["value"]}')

# Run the function to query GraphDB
query_papers_with_authors()


Papers with Their Authors:
Paper Title: The Game of Triangles, Author Name: M. Preti
Paper Title: The Game of Triangles-1, Author Name: M. Preti
Paper Title: The Game of Triangles-2, Author Name: M. Preti
Paper Title: The Game of Triangles-3, Author Name: M. Preti
Paper Title: The Game of Triangles-4, Author Name: M. Preti
Paper Title: The Game of Triangles-5, Author Name: M. Preti
Paper Title: The Game of Triangles-6, Author Name: M. Preti
Paper Title: The Game of Triangles-7, Author Name: M. Preti
Paper Title: The Game of Triangles-8, Author Name: M. Preti
Paper Title: Bounds on the Price of stability of Undirected Network Design Games with Three Players, Author Name: Robert Bove
Paper Title: Bounds on the Price of stability of Undirected Network Design Games with Three Players-1, Author Name: Robert Bove
Paper Title: Bounds on the Price of stability of Undirected Network Design Games with Three Players-2, Author Name: Robert Bove
Paper Title: Bounds on the Price of stability of Undi

Lets check reviews-reviewed_by-authours

In [78]:
def query_graphdb():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    SELECT ?review ?abstract ?date ?decision ?author
    WHERE {
      ?review a ex:Review;
              ex:abstract ?abstract;
              ex:date ?date;
              ex:decision ?decision.
      ?review ex:reviewed_by ?author.
    }
    LIMIT 20
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    for result in results["results"]["bindings"]:
        print(f'Review: {result["review"]["value"]}, Abstract: {result["abstract"]["value"]}, '
              f'Date: {result["date"]["value"]}, Decision: {result["decision"]["value"]}, '
              f'Author: {result["author"]["value"]}')

# Run the function to query GraphDB
query_graphdb()


Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Date: 2020-06-29, Decision: Accepted, Author: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_11063801
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Date: 2020-06-29, Decision: Accepted, Author: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_1716140
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Date: 2020-06-29, Decision: Accepted, Author: http:/

In [79]:
from SPARQLWrapper import SPARQLWrapper, JSON

def query_reviews_on_papers_with_authors():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>

    SELECT ?review ?abstract ?decision ?paper ?authorName
    WHERE {
      ?review a ex:Review;
              ex:abstract ?abstract;
              ex:decision ?decision;
              ex:reviewed_on ?paper.
      ?paper ex:written_by ?author.
      ?author foaf:name ?authorName.
    }
    LIMIT 20
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    for result in results["results"]["bindings"]:
        print(f'Review: {result["review"]["value"]}, Abstract: {result["abstract"]["value"]}, '
              f'Decision: {result["decision"]["value"]}, Paper: {result["paper"]["value"]}, '
              f'Author Name: {result["authorName"]["value"]}')

# Run the function to query GraphDB
query_reviews_on_papers_with_authors()


Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Decision: Accepted, Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0091346f761e451761f48eadd304bd3fb8a4126e, Author Name: E. Gordon
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Decision: Accepted, Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0091346f761e451761f48eadd304bd3fb8a4126e, Author Name: L. Konopka
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_00be8000-ff8c-41d7-b8ad-eb645761a796, Abstract: Police a themselves make guy. Western paper chance management training move teac

Lets check if papers_details_enriched is sucesfully imported to our database.

In [80]:
from SPARQLWrapper import SPARQLWrapper, JSON

def query_paper_details():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ns1: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

    SELECT DISTINCT ?paper ?abstract ?doi ?keywords ?title ?year
    WHERE {
      ?paper a ns1:Paper;
             ns1:abstract ?abstract;
             ns1:doi ?doi;
             ns1:keywords ?keywords;
             ns1:title ?title;
             ns1:year ?year.
    }
    ORDER BY ?paper
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    print("Paper Details:")
    for result in results["results"]["bindings"]:
        print(f'Paper: {result["paper"]["value"]}, Abstract: {result["abstract"]["value"]}, '
              f'DOI: {result["doi"]["value"]}, Keywords: {result["keywords"]["value"]}, '
              f'Title: {result["title"]["value"]}, Year: {result["year"]["value"]}')

# Run the function to query GraphDB
query_paper_details()


Paper Details:
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#0000d36f99f61d6768e7c21c46ce4350b41a6a89, Abstract: , DOI: , Keywords: , Title: Signature File Methods for Indexing Object-Oriented Database Systems, Year: 1992
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#0000d36f99f61d6768e7c21c46ce4350b41a6a89-1, Abstract: , DOI: , Keywords: , Title: Signature File Methods for Indexing Object-Oriented Database Systems-1, Year: 1992
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#0000d36f99f61d6768e7c21c46ce4350b41a6a89-2, Abstract: , DOI: , Keywords: , Title: Signature File Methods for Indexing Object-Oriented Database Systems-2, Year: 1992
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#0000d36f99f61d6768e7c21c46ce4350b41a6a89-3, Abstract: , DOI: , Keywords: , Title: Signature File Methods for Indexing Object-Oriented Database Systems-3, Year: 1992
Paper: http://www.semanticweb.org/koc

Lets check if citations are sucesfully imported.

In [81]:
def query_citations():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

    SELECT ?paper ?citesTo ?citationYear
    WHERE {
      ?paper ex:cites_to ?citesTo.
      OPTIONAL { ?paper ex:citation_year ?citationYear. }
    }
    LIMIT 20
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    print("Citation Details:")
    for result in results["results"]["bindings"]:
        paper = result["paper"]["value"]
        citesTo = result["citesTo"]["value"]
        citationYear = result.get("citationYear", {}).get("value", "Not provided")
        print(f'Paper: {paper}, Cites To: {citesTo}, Citation Year: {citationYear}')

# Run the function to query GraphDB
query_citations()


Citation Details:
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0000d36f99f61d6768e7c21c46ce4350b41a6a89-1, Cites To: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0022282d20f3c507fba576062c6c58c02dd4c8f7, Citation Year: 2020
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0000d36f99f61d6768e7c21c46ce4350b41a6a89-1, Cites To: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0022282d20f3c507fba576062c6c58c02dd4c8f7, Citation Year: 2021
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0000d36f99f61d6768e7c21c46ce4350b41a6a89-1, Cites To: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_007098d098b83163f76d17b75fb47f2eec43fd2d, Citation Year: 2020
Paper: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper_0000d36f99f61d6768e7c21c46ce4350b41a6a89-1, Cites To: http://www.semanticweb.org/kocak/ontologies/2024

In [83]:
from SPARQLWrapper import SPARQLWrapper, JSON

def query_citation_titles():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX ns1: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

    SELECT ?citingPaperTitle ?citedPaperTitle
    WHERE {
      ?citingPaper ex:cites_to ?citedPaper.
      ?citingPaper ns1:title ?citingPaperTitle.
      ?citedPaper ns1:title ?citedPaperTitle.
    }
    LIMIT 20
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    print("Citation Titles:")
    for result in results["results"]["bindings"]:
        print(f'Citing Paper Title: {result["citingPaperTitle"]["value"]}, Cited Paper Title: {result["citedPaperTitle"]["value"]}')

# Run the function to query GraphDB
query_citation_titles()


Citation Titles:
Citing Paper Title: Signature File Methods for Indexing Object-Oriented Database Systems-1, Cited Paper Title: 2005 Special Issue: Beyond emotion archetypes: Databases for emotion modelling using neural networks
Citing Paper Title: Signature File Methods for Indexing Object-Oriented Database Systems-1, Cited Paper Title: Ontology searching and browsing at the Rat Genome Database
Citing Paper Title: Signature File Methods for Indexing Object-Oriented Database Systems-1, Cited Paper Title: A Classification and Comparison of Main Memory Database Recovery Techniques
Citing Paper Title: Signature File Methods for Indexing Object-Oriented Database Systems-2, Cited Paper Title: The Impact of Monitoring in Infinitely Repeated Games: Perfect, Public, and Private
Citing Paper Title: Signature File Methods for Indexing Object-Oriented Database Systems-3, Cited Paper Title: miRLocator: Machine Learning-Based Prediction of Mature MicroRNAs within Plant Pre-miRNA Sequences
Citing Pa

Now lets check papers-review_on-reviews

In [85]:
def query_reviews_with_paper_details():
    # Set up the SPARQL connection to your GraphDB instance
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Define your SPARQL query
    query = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX ns1: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

    SELECT ?review ?reviewAbstract ?reviewDecision ?reviewDate ?paperTitle ?paperAbstract
    WHERE {
      ?review a ex:Review;
              ex:abstract ?reviewAbstract;
              ex:date ?reviewDate;
              ex:decision ?reviewDecision;
              ex:reviewed_on ?paper.
      
      ?paper ns1:title ?paperTitle;
             ns1:abstract ?paperAbstract.
    }
    """
    
    # Set the query to the SPARQL connection and specify the return format as JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and get the results
    results = sparql.query().convert()
    
    # Print the results
    print("Review Details with Linked Paper Information:")
    for result in results["results"]["bindings"]:
        print(f'Review Abstract: {result["reviewAbstract"]["value"]}, Decision: {result["reviewDecision"]["value"]}, Date: {result["reviewDate"]["value"]}')
        print(f'Paper Title: {result["paperTitle"]["value"]}, Paper Abstract: {result["paperAbstract"]["value"]}\n')

# Run the function to query GraphDB
query_reviews_with_paper_details()


Review Details with Linked Paper Information:
Review Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Decision: Accepted, Date: 2020-06-29
Paper Title: EEG Databases in Research and Clinical Practice: Current Status and Future Directions, Paper Abstract: The influence of genetics and the environment, identibing reliable pre-morbid markers, expanding syndrome profiles, building quantitative models of brain function: the current interest in databases is motivated by such scientific imperatives. It is also fueled by the improved availability of the necessary technology, and by a number of high-profile initiatives. In particular, the Human Brain Project has seeded a plethora of distributed databases across scale and species, helped develop the field of Neuroinformatics, and provided a major impetus to the development of the analytical, biocomputational modeling, simulation tools and the interoperability that will link a federat

In [86]:
from SPARQLWrapper import SPARQLWrapper, JSON

def query_review_details():
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Query for reviews and their authors
    query_authors = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>

    SELECT ?review ?abstract ?decision ?date ?authorName
    WHERE {
      ?review a ex:Review;
              ex:abstract ?abstract;
              ex:date ?date;
              ex:decision ?decision;
              ex:reviewed_by ?author.
      ?author foaf:name ?authorName.
    }
    """
    
    # Query for reviews and the papers they reviewed
    query_papers = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX ns1: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

    SELECT ?review ?abstract ?decision ?date ?paperTitle
    WHERE {
      ?review a ex:Review;
              ex:abstract ?abstract;
              ex:date ?date;
              ex:decision ?decision;
              ex:reviewed_on ?paper.
      ?paper ns1:title ?paperTitle.
    }
    """
    
    # Execute and print results for authors
    sparql.setQuery(query_authors)
    sparql.setReturnFormat(JSON)
    results_authors = sparql.query().convert()
    print("Reviews with Authors:")
    for result in results_authors["results"]["bindings"]:
        print(f'Review: {result["review"]["value"]}, Abstract: {result["abstract"]["value"]}, Decision: {result["decision"]["value"]}, Date: {result["date"]["value"]}, Author: {result["authorName"]["value"]}')
    
    # Execute and print results for papers
    sparql.setQuery(query_papers)
    sparql.setReturnFormat(JSON)
    results_papers = sparql.query().convert()
    print("\nReviews with Papers:")
    for result in results_papers["results"]["bindings"]:
        print(f'Review: {result["review"]["value"]}, Abstract: {result["abstract"]["value"]}, Decision: {result["decision"]["value"]}, Date: {result["date"]["value"]}, Paper Title: {result["paperTitle"]["value"]}')

# Run the function to query GraphDB
query_review_details()


Reviews with Authors:
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Decision: Accepted, Date: 2020-06-29, Author: Barry Liu
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Decision: Accepted, Date: 2020-06-29, Author: M. Yun
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0, Abstract: Strong sometimes soldier large offer letter. State be us well. Town expert issue opportunity choose., Decision: Accepted, Date: 2020-06-29, Author: V. Lempitsky
Review: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Review_000d4802-8894-4fb0-9a4e-3d5b305a1fe0

Lets check journals

In [88]:
from SPARQLWrapper import SPARQLWrapper, JSON

def query_journals_and_linked_papers():
    sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/lab2")
    
    # Query for all journal details
    query_journals = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>

    SELECT ?journal ?name ?volume ?year ?homepage
    WHERE {
      ?journal a ex:Journal;
               ex:name ?name;
               ex:volume ?volume;
               ex:year ?year;
               foaf:homepage ?homepage.
    }
    """
    
    # Query for linking journals with papers
    query_journal_papers = """
    PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
    PREFIX ns1: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

    SELECT ?paperTitle ?journalName ?journalVolume ?journalYear
    WHERE {
      ?journal a ex:Journal;
               ex:name ?journalName;
               ex:volume ?journalVolume;
               ex:year ?journalYear.
      ?paper ns1:title ?paperTitle;
             ex:published_in ?journal.
    }
    """
    
    # Execute and print results for journals
    sparql.setQuery(query_journals)
    sparql.setReturnFormat(JSON)
    results_journals = sparql.query().convert()
    print("Journal Details:")
    for result in results_journals["results"]["bindings"]:
        print(f'Journal: {result["name"]["value"]}, Volume: {result["volume"]["value"]}, Year: {result["year"]["value"]}, Homepage: {result["homepage"]["value"]}')

    # Execute and print results for journal-linked papers
    sparql.setQuery(query_journal_papers)
    sparql.setReturnFormat(JSON)
    results_papers = sparql.query().convert()
    print("\nPapers and Their Journals:")
    for result in results_papers["results"]["bindings"]:
        print(f'Paper Title: {result["paperTitle"]["value"]}, Journal: {result["journalName"]["value"]}, Volume: {result["journalVolume"]["value"]}, Year: {result["journalYear"]["value"]}')

# Run the function to query GraphDB
query_journals_and_linked_papers()


Journal Details:
Journal: Journal of Network and Systems Management, Volume: 3, Year: 1977, Homepage: https://link.springer.com/journal/10922
Journal: International Journal of Financial Studies, Volume: 1, Year: 2004, Homepage: http://www.e-helvetica.nb.admin.ch/directAccess?callnumber=bel-281817
Journal: Molecular Informatics, Volume: 3, Year: 1992, Homepage: http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1868-1751
Journal: Molecular Informatics, Volume: 1, Year: 2007, Homepage: http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1868-1751
Journal: Clinical Chemistry, Volume: 3, Year: 1996, Homepage: http://www.clinchem.org/
Journal: Clinical Chemistry, Volume: 4, Year: 2001, Homepage: http://www.clinchem.org/
Journal: Molecular Informatics, Volume: 2, Year: 2000, Homepage: http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1868-1751
Journal: Journal of Chemical Information and Modeling, Volume: 7, Year: 1975, Homepage: http://pubs.acs.org/jcim
Journal: International Journal 

## B3. Create the final ontology

In [7]:
from rdflib import Graph

# List of Turtle files to combine
ttl_files = [
    "turtle/journals_enriched.ttl",
    "turtle/conferences_enriched.ttl",
    "turtle/papers_details_enriched.ttl",
    "turtle/citations.ttl",
    "turtle/review_on.ttl",
    "turtle/reviews.ttl",
    "turtle/reviewed_by.ttl",
    "turtle/published_in_enriched_v2.ttl",
    "turtle/affiliated_with.ttl",
    "turtle/affiliations.ttl",
    "turtle/written_by_enriched.ttl",
    "turtle/authors.ttl"
]

# Create an empty Graph
combined_graph = Graph()

# Load each file into the graph
for file_name in ttl_files:
    combined_graph.parse(file_name, format="ttl")

# Serialize and save the combined graph to an RDF/XML file
combined_graph.serialize(destination="combined_graph.rdf", format="xml")

print("All TTL files have been combined and saved as 'combined_graph.rdf'")


All TTL files have been combined and saved as 'combined_graph.rdf'


In [8]:
from rdflib import Graph, URIRef, RDF

# Load TBOX and ABOX
tbox = Graph().parse("p2-2.rdf", format='xml')
abox = Graph().parse("combined_graph.rdf", format='xml')

# Link instances to classes
for s, p, o in abox.triples((None, RDF.type, None)):
    class_uri = URIRef(f"http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#{o}")
    abox.set((s, RDF.type, class_uri))

# Merge TBOX and ABOX
combined_graph = tbox + abox
combined_graph.serialize(destination='final_ontology.rdf', format='pretty-xml')


<Graph identifier=N2d70701615aa4ae3a712c4379f064150 (<class 'rdflib.graph.Graph'>)>

In [9]:
from rdflib import Graph, URIRef
from urllib.parse import urlparse

def is_valid_iri(iri):
    parsed = urlparse(iri)
    return all([parsed.scheme, parsed.netloc, parsed.path])

g = Graph()
g.parse("final_ontology.rdf", format="xml")

for s, p, o in g:
    for term in [s, p, o]:
        if isinstance(term, URIRef) and not is_valid_iri(str(term)):
            print(f"Invalid IRI found: {term}")


In [113]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Establish the connection to the SPARQL endpoint
sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/bacaksiz")

# Queries
queries = {
    "4Q1": "SELECT ?author WHERE { ?author a <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author> }",
    "4Q2": "SELECT DISTINCT ?property WHERE { ?property <http://www.w3.org/2000/01/rdf-schema#domain> <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author> }",
    "4Q3": "SELECT DISTINCT ?property WHERE { ?property <http://www.w3.org/2000/01/rdf-schema#domain> ?domain . FILTER(?domain IN (<http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Conference>, <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Journal>)) }",
    "4Q4": """
        SELECT ?paper ?title WHERE { 
            ?paper a <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Paper> ;
                   <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#written_by> <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#SampleAuthor> ;
                   <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#published_in> ?proceeding .
            ?proceeding a <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Proceeding> ;
                        <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#conference_name> ?conferenceName .
            FILTER(CONTAINS(LCASE(STR(?conferenceName)), "database"))
        }
        """
}

# Execute each query and save the results to a text file
for key, query in queries.items():
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    with open(f"{key}.txt", "w") as file:
        for result in results["results"]["bindings"]:
            file.write(f"{result}\n")

print("All queries have been executed and results are saved.")


All queries have been executed and results are saved.


In [117]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Configuration
endpoint_url = "http://onur-MacBook-Pro.local:7200/repositories/bacaksiz"

# Initialize the SPARQL wrapper with your endpoint
sparql = SPARQLWrapper(endpoint_url)

def run_query(query):
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

# Queries to fetch statistics
prefixes = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
"""

queries = {
    "Number of Classes": prefixes + "SELECT (COUNT(DISTINCT ?class) AS ?count) WHERE {[] a ?class.}",
    "Number of Properties": prefixes + "SELECT (COUNT(DISTINCT ?property) AS ?count) WHERE {[] ?property [].}",
    "Number of Instances for Main Classes": prefixes + """
        SELECT ?class (COUNT(?instance) AS ?count)
        WHERE {
            ?instance a ?class.
            FILTER(?class IN (ex:Paper, ex:Author, ex:Affiliation, ex:Review, ex:Citation))
        }
        GROUP BY ?class
    """,
    "Total Number of Triples": prefixes + "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o. }"
}

# Run queries and print results
for description, query in queries.items():
    results = run_query(query)
    print(description)
    for result in results["results"]["bindings"]:
        class_label = result['class']['value'].split('#')[-1] if 'class' in result else 'Total'
        print(f"  {class_label}: {result['count']['value']}")
    print()


Number of Classes
  Total: 18

Number of Properties
  Total: 37

Number of Instances for Main Classes
  Paper: 1566
  Author: 518
  Affiliation: 32
  Review: 180

Total Number of Triples
  Total: 30218



In [123]:
import csv
from SPARQLWrapper import SPARQLWrapper, JSON

# Configure the endpoint
endpoint_url = "http://onur-MacBook-Pro.local:7200/repositories/bacaksiz"
sparql = SPARQLWrapper(endpoint_url)

# Define the SPARQL query
query = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?author ?name
WHERE {
  ?author a ex:Author .
  ?author foaf:name ?name .
}
"""

# Set the query and format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Open a file for writing
with open('4.1.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Author URI', 'Name'])  # Writing the headers

    for result in results["results"]["bindings"]:
        author = result["author"]["value"]
        name = result["name"]["value"]
        writer.writerow([author, name])  # Writing each row

print("Results have been saved to '4.1.csv'.")


Results have been saved to '4.1.csv'.


2. Find all properties whose domain is Author.

In [124]:
import csv
from SPARQLWrapper import SPARQLWrapper, JSON

# Configure the endpoint
endpoint_url = "http://onur-MacBook-Pro.local:7200/repositories/bacaksiz"
sparql = SPARQLWrapper(endpoint_url)

# Define the SPARQL query
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

SELECT DISTINCT ?property
WHERE {
  ?property rdfs:domain ex:Author .
}
"""

# Set the query and format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Open a file for writing
with open('4.2.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Property'])  # Writing the headers

    for result in results["results"]["bindings"]:
        property = result["property"]["value"]
        writer.writerow([property])  # Writing each row

print("Results have been saved to '4.2.csv'.")


Results have been saved to '4.2.csv'.


3. Find all properties whose domain is either Conference or Journal.

In [5]:
import csv
import SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON

# Continue from the existing imports and setup
# Define the SPARQL query
query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

SELECT DISTINCT ?property
WHERE {
  { ?property rdfs:domain ex:Proceeding }
}
"""

# Set the query and format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Open a file for writing
with open('4.3.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Property'])  # Writing the headers

    for result in results["results"]["bindings"]:
        property = result["property"]["value"]
        writer.writerow([property])  # Writing each row

print("Results have been saved to '4.3.csv'.")


NameError: name 'sparql' is not defined

4. Find all the papers written by a given author that were published in database conferences. (Jim Brown)

In [127]:
# Continue from the existing imports and setup
# Define the SPARQL query
query = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT DISTINCT ?paper ?title
WHERE {
  ?paper ex:written_by ?author .
  ?author foaf:name "Jim Brown" .
  ?paper ex:published_in ?conference .
  ?conference a ex:Conference .
  ?conference ex:name ?confName .
  FILTER CONTAINS(LCASE(str(?confName)), "database")
}
"""

# Set the query and format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Open a file for writing
with open('4.4.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Paper URI', 'Title'])  # Writing the headers

    for result in results["results"]["bindings"]:
        paper = result["paper"]["value"]
        title = result["title"]["value"]
        writer.writerow([paper, title])  # Writing each row

print("Results have been saved to '4.4.csv'.")


Results have been saved to '4.4.csv'.


- 4.5 Authors with Most Publications Across Different Conferences and Journals
- 4.6 Trend Analysis: Growth in Paper Publications Over Years by Topic

In [130]:
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

# Initialize the SPARQL endpoint
sparql = SPARQLWrapper("http://onur-MacBook-Pro.local:7200/repositories/bacaksiz")

# Define the SPARQL queries
query1 = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?authorName (COUNT(?paper) AS ?totalPapers)
WHERE {
  ?paper ex:written_by ?author .
  ?author foaf:name ?authorName .
}
GROUP BY ?authorName
ORDER BY DESC(?totalPapers)
LIMIT 10

"""

query2 = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?year (SAMPLE(?keyword) AS ?sampleKeyword) (COUNT(?paper) AS ?totalPapers)
WHERE {
  ?paper ex:year ?year ;
         ex:keywords ?keywords .
  BIND(STRAFTER(?keywords, ",") AS ?keyword)  # Assuming keywords are separated by commas
}
GROUP BY ?year ?keyword
ORDER BY ?year

"""

def run_query(query):
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return pd.json_normalize(results['results']['bindings'])

# Execute the queries and save the results
df1 = run_query(query1)
df2 = run_query(query2)

# Print the results
print("Query 1 Results:")
print(df1)
print("\nQuery 2 Results:")
print(df2)

# Save the results to CSV
df1.to_csv('4.5.csv', index=False)
df2.to_csv('4.6.csv', index=False)


Query 1 Results:
  authorName.type  authorName.value                      totalPapers.datatype  \
0         literal     A. Bensoussan  http://www.w3.org/2001/XMLSchema#integer   
1         literal            S. Yam  http://www.w3.org/2001/XMLSchema#integer   
2         literal  Bradford W. Mott  http://www.w3.org/2001/XMLSchema#integer   
3         literal        N. Vieille  http://www.w3.org/2001/XMLSchema#integer   
4         literal   James C. Lester  http://www.w3.org/2001/XMLSchema#integer   
5         literal            Yu Bao  http://www.w3.org/2001/XMLSchema#integer   
6         literal       Guosun Zeng  http://www.w3.org/2001/XMLSchema#integer   
7         literal     Doina Caragea  http://www.w3.org/2001/XMLSchema#integer   
8         literal        Xinming Ou  http://www.w3.org/2001/XMLSchema#integer   
9         literal          Su Zhang  http://www.w3.org/2001/XMLSchema#integer   

  totalPapers.type totalPapers.value  
0          literal                18  
1          li