In [1]:
pip install rdflib



In [2]:
import pandas as pd
import random
import csv
import os
from itertools import islice
import urllib.parse
from rdflib import Graph
from rdflib import Literal
from rdflib import Namespace
from rdflib.namespace import RDF, RDFS, FOAF, XSD, URIRef

In [3]:
Abox = Namespace("http://SDM_LAB2.org/abox/")

g = Graph()
g.bind("abox", Abox)

In [4]:
csv_dir = '/content/drive/MyDrive/SDM_lab2/data/CSVs/'

csv_files = [f for f in os.listdir(csv_dir) if f.endswith('.csv')]

dataframes = {}
for csv_file in csv_files:
    file_path = os.path.join(csv_dir, csv_file)
    df_name = os.path.splitext(csv_file)[0].replace('-', '_')
    dataframes[df_name] = pd.read_csv(file_path)

for df_name in dataframes.keys():
    print(df_name)

paper_belong_to_proceeding
paper
conference
paper_presented_in_conference
paper_cite_paper
scopus_500
sample
journal
journal_in_year
authors
author_write
paper_belong_to_journal
conference_detail
author_review
paper_in_year
paper_has_keywords
year
proceeding
proceeding_in_year
keywords


# Load Class and Properties

## Load Class ResearchPaper

In [5]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [6]:
ResearchPaper = Abox.ResearchPaper

paper_properties = {
    "title": Abox.title,
    "abstract": Abox.abstract,
    "pages": Abox.pages,
    "DOI": Abox.DOI,
    "link": Abox.link,
    "year": Abox.year
}

def process_papers(df):
    for _, record in df.iterrows():
        paper_uri = URIRef(Abox["ResearchPaper_" + str(record['DOI'])])
        g.add((paper_uri, RDF.type, ResearchPaper))
        for key, prop in paper_properties.items():
            if pd.notna(record[key]):
                g.add((paper_uri, prop, Literal(record[key])))

process_papers(dataframes['paper'])

## Load Property of Cites

In [7]:
dataframes['paper_cite_paper'].head(3)

Unnamed: 0,start_id,end_id
0,10.1002/asi.24855,10.4230/LIPIcs.ICDT.2024.23
1,10.1002/asi.24855,10.1109/SSI58917.2023.10387755
2,10.1002/asi.24855,10.4230/LIPIcs.ICDT.2023.14


In [8]:
Cites = Abox.Cites

Cites_properties = {
    "start_id": Abox.start_id,
    "end_id": Abox.end_id
}

def process_paper(df):
    for _, record in df.iterrows():
        # Create URIs for the start and end papers
        start_paper_uri = URIRef(Abox["ResearchPaper_" + str(record['start_id'])])
        end_paper_uri = URIRef(Abox["ResearchPaper_" + str(record['end_id'])])

        # Add the type triples for both papers to the graph
        # g.add((start_paper_uri, RDF.type, ResearchPaper)) #Already Added Before
        # g.add((end_paper_uri, RDF.type, ResearchPaper))

        # Add the citation relationship to the graph
        g.add((start_paper_uri, Cites, end_paper_uri))

process_paper(dataframes['paper_cite_paper'])

## Load Class Topics

In [9]:
dataframes['keywords'].head(3)

Unnamed: 0,keywords
0,filtering process
1,measurement while drilling (MWD)
2,normalizing index


In [10]:
Topics = Abox.Topics  # Name in Abox

Topics_properties = {
    "keywords": Abox.topics
}

def process_topics(df):
    for _, record in df.iterrows():
        keywords = urllib.parse.quote("Topics_" + str(record['keywords']))  # URL Code
        keywords_uri = URIRef(Abox[keywords])
        g.add((keywords_uri, RDF.type, Topics))  #
        for key, prop in Topics_properties.items():
            if pd.notna(record[key]):
                g.add((keywords_uri, prop, Literal(record[key])))

process_topics(dataframes['keywords'])

## Load Property HasTopics

In [11]:
dataframes['paper_has_keywords'].head(3)

Unnamed: 0,paper_id,keywords
0,10.3390/s24041209,filtering process
1,10.3390/s24041209,measurement while drilling (MWD)
2,10.3390/s24041209,normalizing index


In [12]:
HasTopics = Abox.HasTopics

HasTopics_properties = {
    "paper_id": Abox.paper_id,
    "keywords": Abox.keywords
}

def process_paper(df):
    for _, record in df.iterrows():
        # Create URIs
        paper_id_uri = URIRef(Abox["ResearchPaper_" + str(record['paper_id'])])
        keywords = urllib.parse.quote("Topics_" + str(record['keywords']))  # URL Code
        keywords_uri = URIRef(Abox[keywords])

        # Add the relationship to the graph
        g.add((paper_id_uri, HasTopics, keywords_uri))

process_paper(dataframes['paper_has_keywords'])

## Load Class SubmittedPaper

In [13]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [14]:
SubmittedPaper = Abox.SubmittedPaper

paper_properties = {
    "title": Abox.title,
    "abstract": Abox.abstract,
    "pages": Abox.pages,
    "DOI": Abox.DOI,
    "link": Abox.link,
    "year": Abox.year
}

def SubmittedPaper_process(df):
    for _, record in df.iterrows():
        paper_uri = URIRef(Abox["SubmittedPaper_" + str(record['DOI'])])
        g.add((paper_uri, RDF.type, SubmittedPaper))
        for key, prop in paper_properties.items():
            if pd.notna(record[key]):
                g.add((paper_uri, prop, Literal(record[key])))

SubmittedPaper_process(dataframes['paper'])

## Load Property Submitted

In [15]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [16]:
Submitted = Abox.Submitted

Submitted_properties = {
    "DOI": Abox.paper_id
}

def Submitted_process(df):
    for _, record in df.iterrows():
        # Create URIs
        paper_id_uri = URIRef(Abox["ResearchPaper_" + str(record['DOI'])])
        submitted_paper_id_uri = URIRef(Abox["SubmittedPaper_" + str(record['DOI'])])

        # Add the relationship to the graph
        g.add((paper_id_uri, Submitted, submitted_paper_id_uri))

Submitted_process(dataframes['paper'])

## Load Class Person

In [17]:
# The RDF should Automate Generate Person Accourding to Tbox

## Load Class Author

In [18]:
dataframes['authors'].head(3)

Unnamed: 0,author_id,author_name,author_affiliation
0,57219571200,Abalkina A.,"Freie Universität Berlin, Berlin, Germany"
1,35726817600,Abbaszadeh Shahri A.,"Johan Lundberg AB, Uppsala, 754 50, Sweden, D..."
2,58897160900,Shan C.,"Division of Rock Engineering, Tyrens, Stockho..."


In [19]:
Author = Abox.Author

Author_properties = {
    "author_id": Abox.title,
    "author_name": Abox.abstract,
    "author_affiliation": Abox.pages
}

def Author_process(df):
    for _, record in df.iterrows():
        Author_uri = URIRef(Abox["Author_" + str(record['author_id'])])
        g.add((Author_uri, RDF.type, Author))
        for key, prop in Author_properties.items():
            if pd.notna(record[key]):
                g.add((Author_uri, prop, Literal(record[key])))

Author_process(dataframes['authors'])

## Load Property Write

In [20]:
dataframes['author_write'].head(3)

Unnamed: 0,author_id,paper_id,corresponding
0,57219571200,10.1002/asi.24855,True
1,35726817600,10.3390/s24041209,True
2,58897160900,10.3390/s24041209,False


In [21]:
Write = Abox.Write

Write_properties = {
    "author_id": Abox.author_id,
    "paper_id": Abox.paper_id,
    "corresponding": Abox.corresponding
}

def Write_process(df):
    for _, record in df.iterrows():
        # Create URIs
        Author_id_uri = URIRef(Abox["Author_" + str(record['author_id'])])
        ResearchPaper_id_uri = URIRef(Abox["ResearchPaper_" + str(record['paper_id'])])

        # Add the relationship to the graph
        g.add((Author_id_uri, Write, ResearchPaper_id_uri))

Write_process(dataframes['author_write'])

## Load Property CorrespondingAuthor

In [22]:
dataframes['author_write'].head(3)

Unnamed: 0,author_id,paper_id,corresponding
0,57219571200,10.1002/asi.24855,True
1,35726817600,10.3390/s24041209,True
2,58897160900,10.3390/s24041209,False


In [23]:
CorrespondingAuthor = Abox.CorrespondingAuthor

Write_properties = {
    "author_id": Abox.author_id,
    "paper_id": Abox.paper_id,
    "corresponding": Abox.corresponding
}

def CorrespondingAuthor_process(df):
    for _, record in df.iterrows():
        # Create URIs
        Author_id_uri = URIRef(Abox["Author_" + str(record['author_id'])])
        ResearchPaper_id_uri = URIRef(Abox["ResearchPaper_" + str(record['paper_id'])])

        # If the corresponding field is True, add the CorrespondingAuthor relationship
        if record['corresponding']:
            g.add((Author_id_uri, CorrespondingAuthor, ResearchPaper_id_uri))

CorrespondingAuthor_process(dataframes['author_write'])

## Load Class Reviewer

In [24]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [25]:
Reviewer = Abox.Reviewer

Reviewer_properties = {
    "paper_id": Abox.paper_id,
    "reviewer_id": Abox.reviewer_id,
    "review_content": Abox.review_content,
    "accept_possibility": Abox.accept_possibility
}

def Reviewer_process(df):
    for _, record in df.iterrows():
        Reviewer_uri = URIRef(Abox["Reviewer_" + str(record['reviewer_id'])])
        g.add((Reviewer_uri, RDF.type, Author))
        for key, prop in Reviewer_properties.items():
            if pd.notna(record[key]):
                g.add((Reviewer_uri, prop, Literal(record[key])))

Reviewer_process(dataframes['author_review'])

## Load Property WriteReview

In [26]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [27]:
WriteReview = Abox.WriteReview

WriteReview_properties = {
    "paper_id": Abox.paper_id,
    "reviewer_id": Abox.reviewer_id,
    "review_content": Abox.review_content,
    "accept_possibility": Abox.accept_possibility
}

def WriteReview_process(df):
    for _, record in df.iterrows():
        # Create URIs
        Reviewer_id_uri = URIRef(Abox["Reviewer_" + str(record['reviewer_id'])])
        SubmittedPaper_id_uri = URIRef(Abox["SubmittedPaper_" + str(record['paper_id'])])

        # Add the relationship to the graph
        g.add((Reviewer_id_uri, WriteReview, SubmittedPaper_id_uri))

WriteReview_process(dataframes['author_review'])

## Load Class ReviewText

In [28]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [29]:
ReviewText = Abox.ReviewText

ReviewText_properties = {
    "paper_id": Abox.paper_id,
    "reviewer_id": Abox.reviewer_id,
    "review_content": Abox.review_content,
    "accept_possibility": Abox.accept_possibility
}

def ReviewText_process(df):
    for _, record in df.iterrows():
        ReviewText = urllib.parse.quote("ReviewText_" + str(record['review_content']))  # URL Code
        ReviewText_uri = URIRef(Abox[ReviewText])
        g.add((ReviewText_uri, RDF.type, Author))
        for key, prop in ReviewText_properties.items():
            if pd.notna(record[key]):
                g.add((ReviewText_uri, prop, Literal(record[key])))

ReviewText_process(dataframes['author_review'])

## Load Property ReviewOf

In [30]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [31]:
ReviewOf = Abox.ReviewOf

ReviewOf_properties = {
    "paper_id": Abox.paper_id,
    "reviewer_id": Abox.reviewer_id,
    "review_content": Abox.review_content,
    "accept_possibility": Abox.accept_possibility
}

def ReviewOf_process(df):
    for _, record in df.iterrows():
        # Create URIs
        ReviewText = urllib.parse.quote("ReviewText_" + str(record['review_content']))  # URL Code
        ReviewText_uri = URIRef(Abox[ReviewText])
        SubmittedPaper_id_uri = URIRef(Abox["SubmittedPaper_" + str(record['paper_id'])])

        # Add the relationship to the graph
        g.add((ReviewText_uri, ReviewOf, SubmittedPaper_id_uri))

ReviewOf_process(dataframes['author_review'])

## Load Property AcceptPossibility

In [32]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [33]:
AcceptPossibility = Abox.AcceptPossibility

ReviewOf_properties = {
    "paper_id": Abox.paper_id,
    "reviewer_id": Abox.reviewer_id,
    "review_content": Abox.review_content,
    "accept_possibility": Abox.accept_possibility
}

def AcceptPossibility_process(df):
    for _, record in df.iterrows():
        # Create URIs
        ReviewText = urllib.parse.quote("ReviewText_" + str(record['review_content']))  # URL Code
        ReviewText_uri = URIRef(Abox[ReviewText])
        AcceptPossibility_id_uri = URIRef(Abox["AcceptPossibility_" + str(record['accept_possibility'])])

        # Add the relationship to the graph
        g.add((ReviewText_uri, AcceptPossibility, AcceptPossibility_id_uri))

AcceptPossibility_process(dataframes['author_review'])

## Load Class Journal

In [34]:
dataframes['journal'].head(3)

Unnamed: 0,journal_name
0,VLDB Journal
1,"Circuits, Systems, and Signal Processing"
2,Computer Methods and Programs in Biomedicine


In [35]:
Journal = Abox.Journal

Journal_properties = {
    "journal_name": Abox.journal_name
}

def Journal_process(df):
    for _, record in df.iterrows():
        Journal = urllib.parse.quote("Journal_" + str(record['journal_name']))  # URL Code
        Journal_uri = URIRef(Abox[ReviewText])
        g.add((Journal_uri, RDF.type, Author))
        for key, prop in Journal_properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

Journal_process(dataframes['journal'])

## Load Class Volumes(Not Finished)

In [36]:
dataframes['journal'].head(3)

Unnamed: 0,journal_name
0,VLDB Journal
1,"Circuits, Systems, and Signal Processing"
2,Computer Methods and Programs in Biomedicine


## Load Property OfJournal(Not Finished)

## Load Class Proceedings

In [37]:
dataframes['proceeding'].head(3)

Unnamed: 0,proceeding_name,city
0,"19th Machine Translation Summit, MT Summit 2023",Macau
1,7th International Conference on Big Data and I...,Beijing
2,2023 5th International Conference on Artificia...,Dalian


In [38]:
Proceedings = Abox.Proceedings

properties = {
    "proceeding_name": Abox.proceeding_name,
    "city": Abox.city
}

def process(df):
    for _, record in df.iterrows():
        Journal = urllib.parse.quote("Proceedings_" + str(record['proceeding_name']))  # URL Code
        Journal_uri = URIRef(Abox[ReviewText])
        g.add((Journal_uri, RDF.type, Proceedings))
        for key, prop in properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

process(dataframes['proceeding'])

## Load Class Conference

In [39]:
dataframes['conference_detail'].head(3)

Unnamed: 0,name,year,city,proceeding_name
0,International Conference on Integration of Com...,2024,Pune,2023 International Conference on Integration o...
1,"International Conference on Electronics, Commu...",2023,Fukuoka,"6th International Conference on Electronics, C..."
2,IEEE International Conference on Next Generati...,2021,Vellore,2023 IEEE International Conference on Next Gen...


In [40]:
Conference = Abox.Conference

properties = {
    "name": Abox.name,
    "year": Abox.year,
    "city": Abox.city,
    "proceeding_name": Abox.proceeding_name
}

def process(df):
    for _, record in df.iterrows():
        Journal = urllib.parse.quote("Conference_" + str(record['name']))  # URL Code
        Journal_uri = URIRef(Abox[ReviewText])
        g.add((Journal_uri, RDF.type, Conference))
        for key, prop in properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

process(dataframes['conference_detail'])

## Load Class Workshop(Not Finished)

## Load Class RegularConference(Not Finished)

## Load Property WorkshopIn(Not Finished)

## Load Property ConIn(Not Finished)

## Load Property IsInProceeding

In [41]:
dataframes['paper_belong_to_proceeding'].head(3)

Unnamed: 0,start_id,end_id
0,10.1145/3623509.3633352,ACM International Conference Proceeding Series
1,10.1051/e3sconf/202448204001,E3S Web of Conferences
2,10.1145/3636243.3636263,ACM International Conference Proceeding Series


In [42]:
IsInProceeding = Abox.IsInProceeding

properties = {
    "start_id": Abox.start_id,
    "end_id": Abox.end_id
}

def process(df):
    for _, record in df.iterrows():
        # Create URIs
        firstText = urllib.parse.quote("ResearchPaper_" + str(record['start_id']))  # URL Code
        firstText_uri = URIRef(Abox[firstText])
        secondText = urllib.parse.quote("Conference_" + str(record['end_id']))  # URL Code
        secondText_uri = URIRef(Abox[secondText])

        # Add the relationship to the graph
        g.add((firstText_uri, IsInProceeding, secondText_uri))

process(dataframes['paper_belong_to_proceeding'])

## Add Property IsInJournal

In [43]:
dataframes['paper_belong_to_journal'].head(3)

Unnamed: 0,start_id,end_id
0,10.1002/asi.24855,Journal of the Association for Information Sci...
1,10.3390/s24041209,Sensors
2,10.1007/s11042-023-16751-6,Multimedia Tools and Applications


In [44]:
IsInJournal = Abox.IsInJournal

properties = {
    "start_id": Abox.start_id,
    "end_id": Abox.end_id
}

def process(df):
    for _, record in df.iterrows():
        # Create URIs
        firstText = urllib.parse.quote("ResearchPaper_" + str(record['start_id']))  # URL Code
        firstText_uri = URIRef(Abox[firstText])
        secondText = urllib.parse.quote("Volumes_" + str(record['end_id']))  # URL Code
        secondText_uri = URIRef(Abox[secondText])

        # Add the relationship to the graph
        g.add((firstText_uri, IsInJournal, secondText_uri))

process(dataframes['paper_belong_to_journal'])

# Export graph

In [45]:
for stmt in islice(g, 5):
    print(stmt)

(rdflib.term.URIRef('http://SDM_LAB2.org/abox/Author_58897001100'), rdflib.term.URIRef('http://SDM_LAB2.org/abox/Write'), rdflib.term.URIRef('http://SDM_LAB2.org/abox/ResearchPaper_10.3233/JIFS-232918'))
(rdflib.term.URIRef('http://SDM_LAB2.org/abox/ResearchPaper_10.1109/THMS.2023.3329536'), rdflib.term.URIRef('http://SDM_LAB2.org/abox/Cites'), rdflib.term.URIRef('http://SDM_LAB2.org/abox/ResearchPaper_10.1016/j.bspc.2023.105791'))
(rdflib.term.URIRef('http://SDM_LAB2.org/abox/ReviewText_More%20face%20tax%20final.'), rdflib.term.URIRef('http://SDM_LAB2.org/abox/reviewer_id'), rdflib.term.Literal('57201801048', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.URIRef('http://SDM_LAB2.org/abox/ReviewText_Land%20read%20black%20oil%20future%20grow%20build%20yeah.'), rdflib.term.URIRef('http://SDM_LAB2.org/abox/paper_id'), rdflib.term.Literal('10.3390/s24041259'))
(rdflib.term.URIRef('http://SDM_LAB2.org/abox/ResearchPaper_10.1109/CIPAE60493.2023.00081')

In [46]:
# Export as Turtle
g.serialize(destination='output.ttl', format='turtle')

<Graph identifier=N5c7f411be85c46f1aa628776549fba8f (<class 'rdflib.graph.Graph'>)>