In [1]:
# import libraries
from rdflib import Graph,Literal,RDF,URIRef
from rdflib.namespace import FOAF,XSD,RDFS

from rdflib import Namespace
import pandas as pd
import string
import random
import numpy as np
import os
# ignore the warning
import warnings
warnings.filterwarnings('ignore')

In [2]:
g=Graph()
sdm_lab = Namespace("http://sdm_lab/")

In [3]:
def URLparse(url:str):
    url=url.replace("\'","_").replace("\"","_")
    for i in string.punctuation:
        url = url.replace(i,"_")
    url = url.replace(" ","_")
    return url
def save_rdf_file(g,filename,rdf_format='ttl'):
    g.serialize(filename+'.'+rdf_format,format= rdf_format)

# Read all csv into dataframe and load them into Graph

In [4]:
directory=os.getcwd()

### Paper

In [5]:
df_paper=pd.read_csv(directory+"/ABox_data/papers.csv")
for index,row in df_paper.iterrows():
    paper_id = row['id']
    paper_title = Literal(str(row['title']))
    paper_year = Literal(str(row['year']))
    paper_type = str(row['type'])
    paper_publication = row['publication']
    # create ABox
    paper_node = URIRef(f"http://sdm_lab/{URLparse(paper_id)}")
    g.add((paper_node, sdm_lab.title, paper_title))
    g.add((paper_node, sdm_lab.year, paper_year))
    if paper_type=='FullPaper':
        g.add((paper_node, RDF.type, sdm_lab.FullPaper))
    elif paper_type=='DemoPaper':
        g.add((paper_node, RDF.type, sdm_lab.DemoPaper))
    elif paper_type=='ShortPaper':
        g.add((paper_node, RDF.type, sdm_lab.ShortPaper))
    elif paper_type=='Poster':
        g.add((paper_node, RDF.type, sdm_lab.Poster))
    g.add((paper_node, sdm_lab.title, paper_title))
    g.add((paper_node, sdm_lab.year, paper_year))
df_paper.head(2)

Unnamed: 0,id,title,year,type,publication
0,53e9978ab7602d9701f45b8e,Correspondence,1874,DemoPaper,0
1,53e9978db7602d9701f4cccd,Miscellany.,1903,DemoPaper,0


### Area

In [6]:
df_area=pd.read_csv(directory+"/ABox_data/keyword.csv")
for index,row in df_area.iterrows():
    area_id = row['id']
    area_words = Literal(str(row['words']))
    # create ABox
    keyword_node = URIRef(f"http://sdm_lab/{area_id}")
    g.add((keyword_node, RDF.type, sdm_lab.Keyword))
    g.add((keyword_node, sdm_lab.word, area_words))
df_area.head(2)

Unnamed: 0,id,words
0,1,technology
1,2,education


### Author

In [7]:
df_authors=pd.read_csv(directory+"/ABox_data/authors.csv")
for index,row in df_authors.iterrows():
    authors_author_id = row['author_id']
    authors_author_name = Literal(str(row['author_name']))
    # create ABox
    author_node = URIRef(f"http://sdm_lab/{URLparse(authors_author_id)}")
    g.add((author_node, RDF.type, sdm_lab.Author))
    g.add((author_node, sdm_lab.name, authors_author_name))
df_authors.head(2)

Unnamed: 0,author_id,author_name
0,53f45728dabfaec09f209538,Peijuan Wang
1,5601754345cedb3395e59457,Jiahua Zhang


### Author-writes-paper

In [8]:
df_author_wrote_paper=pd.read_csv(directory+"/ABox_data/author_writes_paper.csv")
for index,row in df_author_wrote_paper.iterrows():
    author_wrote_paper_author_id = row['author_id']
    author_wrote_paper_paper_id = row['paper_id']
    # create ABox
    author_node = URIRef(f"http://sdm_lab/{URLparse(author_wrote_paper_author_id)}")
    paper_node = URIRef(f"http://sdm_lab/{URLparse(author_wrote_paper_paper_id)}")
    g.add((author_node, sdm_lab.writes, paper_node))
df_author_wrote_paper.head(2)

Unnamed: 0,author_id,paper_id
0,53f45728dabfaec09f209538,53e99784b7602d9701f3e133
1,5601754345cedb3395e59457,53e99784b7602d9701f3e133


### Conference-relatedTo-Keyword

In [9]:
df_conf_area=pd.read_csv(directory+"/ABox_data/conf_area.csv")
for index,row in df_conf_area.iterrows():
    conf_area_conf_id = row['conf_id']
    conf_area_area_id = row['area_id']
    # create ABox
    conference_node = URIRef(f"http://sdm_lab/{URLparse(conf_area_conf_id)}")
    keyword_node = URIRef(f"http://sdm_lab/{conf_area_area_id}")
    g.add((conference_node, sdm_lab.cRelatedTo, keyword_node))
df_conf_area.head(2)

Unnamed: 0,conf_id,area_id
0,53e1815b20f7dfbc07e8b8e9,26
1,53a730a620f7420be8d00a51,2


### Conference

In [10]:
df_conferences=pd.read_csv(directory+"/ABox_data/conferences.csv")
df_conferences=df_conferences.replace('WorkGroup', 'ExpertGroup')
for index,row in df_conferences.iterrows():
    conferences_conf_id = row['conf_id']
    conferences_type = row['type']
    # create ABox
    conference_node = URIRef(f"http://sdm_lab/{URLparse(conferences_conf_id)}")
    if conferences_type=='Symposium':
        g.add((conference_node, RDF.type, sdm_lab.Symposium))
    elif conferences_type=='RegularConference':
        g.add((conference_node, RDF.type, sdm_lab.RegularConference))
    elif conferences_type=='ExpertGroup':
        g.add((conference_node, RDF.type, sdm_lab.ExpertGroup))
    elif conferences_type=='Workshop':
        g.add((conference_node, RDF.type, sdm_lab.Workshop))
df_conferences.head(20)

Unnamed: 0,conf_id,type
0,53e1815b20f7dfbc07e8b8e9,Symposium
1,53a730a620f7420be8d00a51,Symposium
2,555036f37cea80f95416924b,RegularConference
3,555036dd7cea80f954161b94,ExpertGroup
4,53e1851520f7dfbc07e8d228,RegularConference
5,53a7326c20f7420be8d9588a,ExpertGroup
6,53a7306120f7420be8ced937,ExpertGroup
7,53a731e220f7420be8d68c8f,RegularConference
8,57d0879d0a3ac5db49914281,Symposium
9,53a72dbe20f7420be8c72ca2,RegularConference


### Conference-has-Proceeding

In [11]:
df_has_proceedings=pd.read_csv(directory+"/ABox_data/has_proceedings.csv")
for index,row in df_has_proceedings.iterrows():
    has_proceedings_conf_id = row['conf_id']
    has_proceedings_edition_id = row['edition_id']
    # create ABox
    proceeding_node = URIRef(f"http://sdm_lab/{URLparse(has_proceedings_edition_id)}")
    conference_node = URIRef(f"http://sdm_lab/{URLparse(has_proceedings_conf_id)}")
    g.add((conference_node, sdm_lab.has, proceeding_node))
df_has_proceedings.head(2)

Unnamed: 0,conf_id,edition_id
0,55f8e082c35f4f6dfd416033,20iw32vi
1,53a730f520f7420be8d175f9,50hu76py


### Journal-contains-Volume

In [12]:
df_has_volume=pd.read_csv(directory+"/ABox_data/contains_volume.csv")
for index,row in df_has_volume.iterrows():
    has_volume_journal_id = row['journal_id']
    has_volume_edition_id = row['edition_id']
    # create ABox
    volume_node = URIRef(f"http://sdm_lab/{URLparse(has_volume_edition_id)}")
    journal_node = URIRef(f"http://sdm_lab/{URLparse(has_volume_journal_id)}")
    g.add((journal_node, sdm_lab.contains, volume_node))
df_has_volume.head(2)

Unnamed: 0,journal_id,edition_id
0,53a726b420f7420be8b7ec67,02ul10br
1,5390b03920f70186a0ed659b,14id46ro


### Conference-handledBy-Chair

In [13]:
df_is_chair_of=pd.read_csv(directory+"/ABox_data/cHandledBy.csv")
for index,row in df_is_chair_of.iterrows():
    is_chair_of_chair_id = row['chair_id']
    is_chair_of_conf_id = row['conf_id']
    #abox
    conference_node = URIRef(f"http://sdm_lab/{URLparse(is_chair_of_conf_id)}")
    chair_node= URIRef(f"http://sdm_lab/{URLparse(is_chair_of_chair_id)}")
    g.add((conference_node, sdm_lab.cHandledBy, chair_node))
df_is_chair_of.head(2)

Unnamed: 0,chair_id,conf_id
0,HAN169iwb,53e1815b20f7dfbc07e8b8e9
1,ABY251oez,53a730a620f7420be8d00a51


### Journal-relatedTo-Keyword

In [14]:
df_journal_area=pd.read_csv(directory+"/ABox_data/journal_area.csv")
for index,row in df_journal_area.iterrows():
    journal_area_journal_id = row['journal_id']
    journal_area_area_id = row['area_id']
    # create ABox
    journal_node = URIRef(f"http://sdm_lab/{URLparse(journal_area_journal_id)}")
    keyword_node = URIRef(f"http://sdm_lab/{journal_area_area_id}")
    g.add((journal_node, sdm_lab.jRelatedTo, keyword_node))
df_journal_area.head(2)

Unnamed: 0,journal_id,area_id
0,539ffcf2831432abcb63c645,26
1,53e1824d20f7dfbc07e8bf54,2


### Paper-relatedTo-Keyword

In [15]:
df_paper_area=pd.read_csv(directory+"/ABox_data/paper_area.csv")
for index,row in df_paper_area.iterrows():
    paper_area_paper_id = row['paper_id']
    paper_area_area_id = row['area_id']
    # create ABox
    paper_node = URIRef(f"http://sdm_lab/{URLparse(paper_area_paper_id)}")
    keyword_node = URIRef(f"http://sdm_lab/{paper_area_area_id}")
    g.add((paper_node, sdm_lab.pRelatedTo, keyword_node))
df_paper_area.head(2)

Unnamed: 0,paper_id,area_id
0,53e9979bb7602d9701f63a92,1
1,53e9978ab7602d9701f4a046,1


### Reviews

In [16]:
df_reviews=pd.read_csv(directory+"/ABox_data/reviews.csv")
for index,row in df_reviews.iterrows():
    reviews_review_id = row['review_id']
    reviews_paper_id = row['paper_id']
    reviews_decision = Literal(str(row['decision']))
    reviews_review_text = Literal(str(row['review_text']))
    reviews_reviewer_id = row['author_id']
    # create ABox
    paper_node = URIRef(f"http://sdm_lab/{URLparse(reviews_paper_id)}")
    reviewe_node = URIRef(f"http://sdm_lab/{reviews_review_id}")
    reviewer_node = URIRef(f"http://sdm_lab/{reviews_reviewer_id}")
    g.add((reviewe_node, sdm_lab.reviewText, reviews_review_text))
    g.add((reviewe_node, sdm_lab.reviewDecision, reviews_decision))
    g.add((paper_node, sdm_lab.isUnderReview, reviewe_node))
    g.add((reviewer_node, sdm_lab.performs, reviewe_node))
df_reviews.head(2)

Unnamed: 0,review_id,paper_id,decision,review_text,author_id
0,H273nzb124sxg,53e9978ab7602d9701f45b8e,rejected,I did not like it,53f55f6ddabfae53def8045b
1,A366nxt216qsg,53e9978db7602d9701f4cccd,rejected,I did not like it,53f445a3dabfaee0d9bb0103


### Journal

In [17]:
df_journals=pd.read_csv(directory+"/ABox_data/journals.csv")
for index,row in df_journals.iterrows():
    journals_journal_id = row['journal_id']
    #ABox-create journal nodes
    journal_node = URIRef(f"http://sdm_lab/{URLparse(journals_journal_id)}")
    g.add((journal_node, RDF.type, sdm_lab.Journal))
df_journals.head(2)

Unnamed: 0,journal_id
0,539ffcf2831432abcb63c645
1,53e1824d20f7dfbc07e8bf54


### Journal-handledBy-Editor

In [18]:
df_is_editor_of=pd.read_csv(directory+"/ABox_data/jHandledBy.csv")
for index,row in df_is_editor_of.iterrows():
    is_editor_of_editor_id = row['editor_id']
    is_editor_of_journal_id = row['journal_id']
    #abox
    journal_node = URIRef(f"http://sdm_lab/{URLparse(is_editor_of_journal_id)}")
    editor_node= URIRef(f"http://sdm_lab/{URLparse(is_editor_of_editor_id)}")
    g.add((journal_node, sdm_lab.jHandledBy, editor_node))
df_is_editor_of.head(2)

Unnamed: 0,editor_id,journal_id
0,DEL416hsw,539ffcf2831432abcb63c645
1,OMI943ydf,53e1824d20f7dfbc07e8bf54


### Paper-isSubmittedTo-Venue

In [19]:
df_has_publication=pd.read_csv(directory+"/ABox_data/has_publication.csv")
for index,row in df_has_publication.iterrows():
    has_publication_edition_id = row['edition_id']
    has_publication_publication_id = row['publication_id']
df_has_publication.head(2)

Unnamed: 0,edition_id,publication_id
0,77xf41sa,53e9978db7602d9701f50657
1,82il87me,53e9979bb7602d9701f63a92


In [20]:
#property paper_node IsSubmittedTo venue_node
df1=pd.merge(df_has_publication,df_has_proceedings,on=['edition_id','edition_id'],how='right').rename(columns={'conf_id':'venue_id'})
df2=pd.merge(df_has_publication,df_has_volume,on=['edition_id','edition_id'],how='right').rename(columns={'journal_id':'venue_id'})
df_publication_venue=pd.concat([df1,df2]).dropna(axis='index', how='all', subset=['publication_id'])
for index,row in df_publication_venue.iterrows():
    publication_venue_publication_id = row['publication_id']
    publication_venue_venue_id = row['venue_id']
    #abox
    paper_node = URIRef(f"http://sdm_lab/{URLparse(publication_venue_publication_id)}")
    venue_node= URIRef(f"http://sdm_lab/{URLparse(publication_venue_venue_id)}")
    g.add((paper_node, sdm_lab.isSubmittedTo, venue_node))
df_publication_venue.head(2)

Unnamed: 0,edition_id,publication_id,venue_id
0,20iw32vi,53e9979bb7602d9701f66d80,55f8e082c35f4f6dfd416033
2,09np14nm,53e9979bb7602d9701f67b74,5736ae01d39c4f40a797354b


### Responsible-assigns-reviewer

In [21]:
list_chair_id=['VZN926rmo','JMY431sue','KYK994nja','VZN926rmo','LMI499xkv']
list_reviewer_id=['53f8078adabfae8faa4d3418','53f4309ddabfaeb1a7bb434b','54869ab9dabfae8a11fb33bb','53f8078adabfae8faa4d3418','53f45f55dabfaee0d9c13d70']
df_responsible_assgin_reviewer = pd.DataFrame()
df_responsible_assgin_reviewer['responsible_id']=list_chair_id
df_responsible_assgin_reviewer['reviewer']=list_reviewer_id
for index,row in df_responsible_assgin_reviewer.iterrows():
    responsible_assgin_reviewer_responsible_id = row['responsible_id']
    responsible_assgin_reviewer_reviewer = row['reviewer']
    #abox
    responsible_node = URIRef(f"http://sdm_lab/{URLparse(responsible_assgin_reviewer_responsible_id)}")
    reviewer_node= URIRef(f"http://sdm_lab/{URLparse(responsible_assgin_reviewer_reviewer)}")
    g.add((responsible_node, sdm_lab.assigns, reviewer_node))
df_responsible_assgin_reviewer.head(2)

Unnamed: 0,responsible_id,reviewer
0,VZN926rmo,53f8078adabfae8faa4d3418
1,JMY431sue,53f4309ddabfaeb1a7bb434b


### Save ABox into file

In [22]:
save_rdf_file(g,"ABox",rdf_format='nt')