In [1]:
pip install rdflib



In [2]:
import pandas as pd
import random
import csv
import os
import urllib.parse
from rdflib import Graph
from rdflib import Namespace
from rdflib import Literal
from rdflib.namespace import RDF, RDFS, FOAF, XSD, URIRef

In [3]:
Abox = Namespace("http://SDM_LAB2.org/abox/")

g = Graph()
g.bind("abox", Abox)

# Define URIRefs
ResearchPaper = URIRef(Abox["ResearchPaper"])
SubmittedPaper = URIRef(Abox["SubmittedPaper"])
Author = URIRef(Abox["Author"])
Person = URIRef(Abox["Person"])
Reviewer = URIRef(Abox["Reviewer"])
Volumes = URIRef(Abox["Volumes"])
Proceedings = URIRef(Abox["Proceedings"])
Topics = URIRef(Abox["Topics"])
Workshop = URIRef(Abox["Workshop"])
RegularConference = URIRef(Abox["RegularConference"])
Conference = URIRef(Abox["Conference"])
Venue = URIRef(Abox["Venue"])
Journal = URIRef(Abox["Journal"])

# Read CSV
csv_files = [
    "abstracts-sample.csv",
    "affiliated-to.csv",
    "authors-sample.csv",
    "belongs-to.csv",
    "categoriesRelations.csv",
    "citations-sample.csv",
    "cited-by.csv",
    "companies.csv",
    "conferences.csv",
    "is-from.csv",
    "journals.csv",
    "keywords.csv",
    "paper-ids-sample.csv",
    "papers-processed.csv",
    "papers-sample.csv",
    "publication-venues-sample.csv",
    "published-in.csv",
    "related-to.csv",
    "reviewed-by.csv",
    "reviews.csv",
    "universities.csv",
    "volume-from.csv",
    "withAbstracts.csv",
    "written-by.csv"
]

In [4]:
csv_dir = '/content/drive/MyDrive/SDM_lab2/data/CSVs/'

csv_files = [f for f in os.listdir(csv_dir) if f.endswith('.csv')]

dataframes = {}
for csv_file in csv_files:
    file_path = os.path.join(csv_dir, csv_file)
    df_name = os.path.splitext(csv_file)[0].replace('-', '_')
    dataframes[df_name] = pd.read_csv(file_path)

for df_name in dataframes.keys():
    print(df_name)

paper_belong_to_proceeding
paper
conference
paper_presented_in_conference
paper_cite_paper
scopus_500
sample
journal
journal_in_year
authors
author_write
paper_belong_to_journal
conference_detail
author_review
paper_in_year
paper_has_keywords
year
proceeding
proceeding_in_year
keywords


# Load Class and Properties

## Load Class Paper

In [5]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [6]:
ResearchPaper = Abox.ResearchPaper

paper_properties = {
    "title": Abox.title,
    "abstract": Abox.abstract,
    "pages": Abox.pages,
    "DOI": Abox.DOI,
    "link": Abox.link,
    "year": Abox.year
}

def process_papers(df):
    for _, record in df.iterrows():
        paper_uri = URIRef(Abox["ResearchPaper_" + str(record['DOI'])])
        g.add((paper_uri, RDF.type, ResearchPaper))
        for key, prop in paper_properties.items():
            if pd.notna(record[key]):
                g.add((paper_uri, prop, Literal(record[key])))

process_papers(dataframes['paper'])

## Load Property of Paper

In [7]:
dataframes['authors'].head(3)

Unnamed: 0,author_id,author_name,author_affiliation
0,57219571200,Abalkina A.,"Freie Universität Berlin, Berlin, Germany"
1,35726817600,Abbaszadeh Shahri A.,"Johan Lundberg AB, Uppsala, 754 50, Sweden, D..."
2,58897160900,Shan C.,"Division of Rock Engineering, Tyrens, Stockho..."


In [8]:
Paper = Abox.Paper

paper_properties = {
    "author_id": Abox.author_id,
    "author_name": Abox.author_name,
    "author_affiliation": Abox.author_affiliation
}

def process_paper(df):
    for _, record in df.iterrows():
        author_uri = URIRef(Abox[str(record['author_id'])])
        g.add((author_uri, RDF.type, Author))
        for key, prop in paper_properties.items():
            if pd.notna(record[key]):  # Check not Null
                g.add((author_uri, prop, Literal(record[key])))

process_paper(dataframes['authors'])

## Load Class Topics

In [9]:
dataframes['keywords'].head(3)

Unnamed: 0,keywords
0,filtering process
1,measurement while drilling (MWD)
2,normalizing index


In [10]:
topics = Abox.Topics  # Name in Abox

topics_properties = {
    "keywords": Abox.topics
}

def process_topics(df):
    for _, record in df.iterrows():
        keywords = urllib.parse.quote(str(record['keywords']))  # URL Code
        keywords_uri = URIRef(Abox[keywords])
        g.add((keywords_uri, RDF.type, Topics))  #
        for key, prop in topics_properties.items():
            if pd.notna(record[key]):
                g.add((keywords_uri, prop, Literal(record[key])))

process_topics(dataframes['keywords'])

## Author-Write->ResearchPaper

In [11]:
dataframes['author_write'].head(3)

Unnamed: 0,author_id,paper_id,corresponding
0,57219571200,10.1002/asi.24855,True
1,35726817600,10.3390/s24041209,True
2,58897160900,10.3390/s24041209,False


In [12]:
Write = Abox.Write

g.add((Abox.Author, Abox.Write, Abox.ResearchPaper))
def process_authors_papers(df):
    for _, record in df.iterrows():
        author_uri = URIRef(Abox[str(record['author_id'])])
        paper_uri = URIRef(Abox[str(record['paper_id'])])
        g.add((author_uri, Write, paper_uri))

process_authors_papers(dataframes['author_write'])