In [66]:
pip install rdflib

Note: you may need to restart the kernel to use updated packages.


In [67]:
import pandas as pd
import numpy as np
import random
import csv
import os
from itertools import islice
import urllib.parse
from rdflib import Graph
from rdflib import Literal
from rdflib import Namespace
from rdflib.namespace import RDF, RDFS, FOAF, XSD, URIRef

In [68]:
URI = Namespace("http://SDM_lab_2.org/")
g = Graph()
g.bind("lab", URI)

In [69]:
# csv_dir = '/content/drive/MyDrive/SDM_lab2/data/CSVs/'
csv_dir = '/Users/zzy13/Desktop/Classes_at_UPC/SDM_Semantic_data_management/Lab_2/Lab_doc/data/CSVs/' 

csv_files = [f for f in os.listdir(csv_dir) if f.endswith('.csv')]

dataframes = {}
for csv_file in csv_files:
    file_path = os.path.join(csv_dir, csv_file)
    df_name = os.path.splitext(csv_file)[0].replace('-', '_')
    dataframes[df_name] = pd.read_csv(file_path)

for df_name in dataframes.keys():
    print(df_name)

paper_cite_paper
journal_in_year
proceeding_in_year
scopus_500
authors
paper_in_year
journal
sample
paper_has_keywords
author_write
year
paper_belong_to_journal
conference
proceeding
paper
paper_belong_to_proceeding
keywords
paper_presented_in_conference
conference_detail
author_review


# Load Class and Properties

## Load Class ResearchPaper

In [70]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [71]:
ResearchPaper = URI.ResearchPaper

paper_properties = {
    "title": URI.PaperTitle,
    "abstract": URI.PaperAbstract,
    "pages": URI.pages,
    "DOI": URI.DOI,
    "link": URI.link,
    "year": URI.year
}

def process_papers(df):
    for _, record in df.iterrows():
        paper_uri = URIRef(URI["ResearchPaper_" + str(record['DOI'])])
        g.add((paper_uri, RDF.type, ResearchPaper))
        for key, prop in paper_properties.items():
            if pd.notna(record[key]):
                g.add((paper_uri, prop, Literal(record[key])))

process_papers(dataframes['paper'])

## Load Property of Cites

In [72]:
dataframes['paper_cite_paper'].head(3)

Unnamed: 0,start_id,end_id
0,10.1002/asi.24855,10.4230/LIPIcs.ICDT.2024.23
1,10.1002/asi.24855,10.1109/SSI58917.2023.10387755
2,10.1002/asi.24855,10.4230/LIPIcs.ICDT.2023.14


In [73]:
Cites = URI.Cites

Cites_properties = {
    "start_id": URI.start_id,
    "end_id": URI.end_id
}

def process_paper(df):
    for _, record in df.iterrows():
        # Create URIs for the start and end papers
        start_paper_uri = URIRef(URI["ResearchPaper_" + str(record['start_id'])])
        end_paper_uri = URIRef(URI["ResearchPaper_" + str(record['end_id'])])

        # Add the type triples for both papers to the graph
        # g.add((start_paper_uri, RDF.type, ResearchPaper)) #Already Added Before
        # g.add((end_paper_uri, RDF.type, ResearchPaper))

        # Add the citation relationship to the graph
        g.add((start_paper_uri, Cites, end_paper_uri))

process_paper(dataframes['paper_cite_paper'])

## Load Class Topics

In [74]:
dataframes['keywords'].head(3)

Unnamed: 0,keywords
0,filtering process
1,measurement while drilling (MWD)
2,normalizing index


In [75]:
Topics = URI.Topics  # Name in URI

Topics_properties = {
    "keywords": URI.Topics
}

def process_topics(df):
    for _, record in df.iterrows():
        keywords = urllib.parse.quote("Topics_" + str(record['keywords']))  # URL Code
        keywords_uri = URIRef(URI[keywords])
        g.add((keywords_uri, RDF.type, Topics))  #
        for key, prop in Topics_properties.items():
            if pd.notna(record[key]):
                g.add((keywords_uri, prop, Literal(record[key])))

process_topics(dataframes['keywords'])

## Load Property HasTopics

In [76]:
dataframes['paper_has_keywords'].head(3)

Unnamed: 0,paper_id,keywords
0,10.3390/s24041209,filtering process
1,10.3390/s24041209,measurement while drilling (MWD)
2,10.3390/s24041209,normalizing index


In [77]:
HasTopics = URI.HasTopics

HasTopics_properties = {
    "paper_id": URI.paper_id,
    "keywords": URI.keywords
}

def process_paper(df):
    for _, record in df.iterrows():
        # Create URIs
        paper_id_uri = URIRef(URI["ResearchPaper_" + str(record['paper_id'])])
        keywords = urllib.parse.quote("Topics_" + str(record['keywords']))  # URL Code
        keywords_uri = URIRef(URI[keywords])

        # Add the relationship to the graph
        g.add((paper_id_uri, HasTopics, keywords_uri))

process_paper(dataframes['paper_has_keywords'])

## Load Class SubmittedPaper

In [78]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [79]:
SubmittedPaper = URI.SubmittedPaper

paper_properties = {
    "title": URI.PaperTitle,
    "abstract": URI.PaperAbstract,
    "pages": URI.Pages,
    "DOI": URI.DOI,
    "link": URI.Link,
    "year": URI.Year
}

def SubmittedPaper_process(df):
    for _, record in df.iterrows():
        paper_uri = URIRef(URI["SubmittedPaper_" + str(record['DOI'])])
        g.add((paper_uri, RDF.type, SubmittedPaper))
        for key, prop in paper_properties.items():
            if pd.notna(record[key]):
                g.add((paper_uri, prop, Literal(record[key])))

SubmittedPaper_process(dataframes['paper'])

## Load Property Submitted

In [80]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [81]:
Submitted = URI.Submitted

Submitted_properties = {
    "DOI": URI.paper_id
}

def Submitted_process(df):
    for _, record in df.iterrows():
        # Create URIs
        paper_id_uri = URIRef(URI["ResearchPaper_" + str(record['DOI'])])
        submitted_paper_id_uri = URIRef(URI["SubmittedPaper_" + str(record['DOI'])])

        # Add the relationship to the graph
        g.add((paper_id_uri, Submitted, submitted_paper_id_uri))

Submitted_process(dataframes['paper'])

## Load Class Person

In [82]:
# The RDF should Automate Generate Person Accourding to Tbox

## Load Class Author

In [83]:
dataframes['authors'].head(3)

Unnamed: 0,author_id,author_name,author_affiliation
0,57219571200,Abalkina A.,"Freie Universität Berlin, Berlin, Germany"
1,35726817600,Abbaszadeh Shahri A.,"Johan Lundberg AB, Uppsala, 754 50, Sweden, D..."
2,58897160900,Shan C.,"Division of Rock Engineering, Tyrens, Stockho..."


In [84]:
Author = URI.Author

Author_properties = {
    "author_id": URI.Author,
    "author_name": URI.AuthorName
    # "author_affiliation": URI.pages
}

def Author_process(df):
    for _, record in df.iterrows():
        Author_uri = URIRef(URI["Author_" + str(record['author_id'])])
        g.add((Author_uri, RDF.type, Author))
        for key, prop in Author_properties.items():
            if pd.notna(record[key]):
                g.add((Author_uri, prop, Literal(record[key])))

Author_process(dataframes['authors'])

## Load Property Write

In [85]:
dataframes['author_write'].head(3)

Unnamed: 0,author_id,paper_id,corresponding
0,57219571200,10.1002/asi.24855,True
1,35726817600,10.3390/s24041209,True
2,58897160900,10.3390/s24041209,False


In [86]:
Write = URI.Write

Write_properties = {
    "author_id": URI.Author,
    "paper_id": URI.paper_id,
    "corresponding": URI.corresponding
}

def Write_process(df):
    for _, record in df.iterrows():
        # Create URIs
        Author_id_uri = URIRef(URI["Author_" + str(record['author_id'])])
        ResearchPaper_id_uri = URIRef(URI["ResearchPaper_" + str(record['paper_id'])])

        # Add the relationship to the graph
        g.add((Author_id_uri, Write, ResearchPaper_id_uri))

Write_process(dataframes['author_write'])

## Load Property CorrespondingAuthor

In [87]:
dataframes['author_write'].head(3)

Unnamed: 0,author_id,paper_id,corresponding
0,57219571200,10.1002/asi.24855,True
1,35726817600,10.3390/s24041209,True
2,58897160900,10.3390/s24041209,False


In [88]:
CorrespondingAuthor = URI.CorrespondingAuthor

Write_properties = {
    "author_id": URI.Author,
    "paper_id": URI.paper_id,
    "corresponding": URI.corresponding
}

def CorrespondingAuthor_process(df):
    for _, record in df.iterrows():
        # Create URIs
        Author_id_uri = URIRef(URI["Author_" + str(record['author_id'])])
        ResearchPaper_id_uri = URIRef(URI["ResearchPaper_" + str(record['paper_id'])])

        # If the corresponding field is True, add the CorrespondingAuthor relationship
        if record['corresponding']:
            g.add((Author_id_uri, CorrespondingAuthor, ResearchPaper_id_uri))

CorrespondingAuthor_process(dataframes['author_write'])

## Load Class Reviewer

In [89]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [90]:
Reviewer = URI.Reviewer

Reviewer_properties = {
    "paper_id": URI.ResearchPaper,
    "reviewer_id": URI.Reviewer,
    "review_content": URI.ReviewText,
    "accept_possibility": URI.AcceptPossibility
}

def Reviewer_process(df):
    for _, record in df.iterrows():
        Reviewer_uri = URIRef(URI["Reviewer_" + str(record['reviewer_id'])])
        g.add((Reviewer_uri, RDF.type, Author))
        for key, prop in Reviewer_properties.items():
            if pd.notna(record[key]):
                g.add((Reviewer_uri, prop, Literal(record[key])))

Reviewer_process(dataframes['author_review'])

## Load Property WriteReview

In [91]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [92]:
WriteReview = URI.WriteReview

WriteReview_properties = {
    "paper_id": URI.paper_id,
    "reviewer_id": URI.reviewer_id,
    "review_content": URI.review_content,
    "accept_possibility": URI.accept_possibility
}

def WriteReview_process(df):
    for _, record in df.iterrows():
        # Create URIs
        Reviewer_id_uri = URIRef(URI["Reviewer_" + str(record['reviewer_id'])])
        SubmittedPaper_id_uri = URIRef(URI["SubmittedPaper_" + str(record['paper_id'])])

        # Add the relationship to the graph
        g.add((Reviewer_id_uri, WriteReview, SubmittedPaper_id_uri))

WriteReview_process(dataframes['author_review'])

## Load Class ReviewText

In [93]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [94]:
ReviewText = URI.ReviewText

ReviewText_properties = {
    "paper_id": URI.ResearchPaper,
    "reviewer_id": URI.Reviewer,
    "review_content": URI.ReviewText,
    "accept_possibility": URI.AcceptPossibility
}

def ReviewText_process(df):
    for _, record in df.iterrows():
        ReviewText = urllib.parse.quote("ReviewText_" + str(record['review_content']))  # URL Code
        ReviewText_uri = URIRef(URI[ReviewText])
        g.add((ReviewText_uri, RDF.type, Author))
        for key, prop in ReviewText_properties.items():
            if pd.notna(record[key]):
                g.add((ReviewText_uri, prop, Literal(record[key])))

ReviewText_process(dataframes['author_review'])

## Load Property ReviewOf

In [95]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [96]:
ReviewOf = URI.ReviewOf

ReviewOf_properties = {
    "paper_id": URI.paper_id,
    "reviewer_id": URI.reviewer_id,
    "review_content": URI.review_content,
    "accept_possibility": URI.accept_possibility
}

def ReviewOf_process(df):
    for _, record in df.iterrows():
        # Create URIs
        ReviewText = urllib.parse.quote("ReviewText_" + str(record['review_content']))  # URL Code
        ReviewText_uri = URIRef(URI[ReviewText])
        SubmittedPaper_id_uri = URIRef(URI["SubmittedPaper_" + str(record['paper_id'])])

        # Add the relationship to the graph
        g.add((ReviewText_uri, ReviewOf, SubmittedPaper_id_uri))

ReviewOf_process(dataframes['author_review'])

## Load Property AcceptPossibility

In [97]:
dataframes['author_review'].head(3)

Unnamed: 0,paper_id,reviewer_id,review_content,accept_possibility
0,10.1002/asi.24855,55445000700,Next certain tend million country compare.,0.254872
1,10.1002/asi.24855,57946022500,Hold thus safe difficult focus.,0.466417
2,10.1002/asi.24855,25921550000,Drug fund sure political social.,0.122583


In [98]:
AcceptPossibility = URI.AcceptPossibility

ReviewOf_properties = {
    "paper_id": URI.paper_id,
    "reviewer_id": URI.reviewer_id,
    "review_content": URI.review_content,
    "accept_possibility": URI.accept_possibility
}

def AcceptPossibility_process(df):
    for _, record in df.iterrows():
        # Create URIs
        ReviewText = urllib.parse.quote("ReviewText_" + str(record['review_content']))  # URL Code
        ReviewText_uri = URIRef(URI[ReviewText])
        AcceptPossibility_id_uri = URIRef(URI["AcceptPossibility_" + str(record['accept_possibility'])])

        # Add the relationship to the graph
        g.add((ReviewText_uri, AcceptPossibility, AcceptPossibility_id_uri))

AcceptPossibility_process(dataframes['author_review'])

## Load Class Journal

In [99]:
dataframes['journal'].head(3)

Unnamed: 0,journal_name
0,VLDB Journal
1,"Circuits, Systems, and Signal Processing"
2,Computer Methods and Programs in Biomedicine


In [100]:
Journal = URI.Journal

Journal_properties = {
    "journal_name": URI.journal_name
}

def Journal_process(df):
    for _, record in df.iterrows():
        Journal = urllib.parse.quote("Journal_" + str(record['journal_name']))  # URL Code
        Journal_uri = URIRef(URI[ReviewText])
        g.add((Journal_uri, RDF.type, Author))
        for key, prop in Journal_properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

Journal_process(dataframes['journal'])

## Load Class Volumes

In [101]:
dataframes['journal_in_year'].head(3)

Unnamed: 0,journal_name,year
0,Journal of the Association for Information Sci...,2023
1,Sensors,2022
2,Multimedia Tools and Applications,2024


In [102]:
dataframes['paper'].head(3)

Unnamed: 0,title,abstract,pages,DOI,link,year
0,Challenges posed by hijacked journals in Scopus,This study presents and explains the phenomeno...,395-422,10.1002/asi.24855,https://www.scopus.com/inward/record.uri?eid=2...,2023
1,Normalizing Large Scale Sensor-Based MWD Data:...,In the context of geo-infrastructures and spec...,-,10.3390/s24041209,https://www.scopus.com/inward/record.uri?eid=2...,2022
2,Instant learning based on deep neural network ...,Biometric-based identity verification systems ...,32099-32122,10.1007/s11042-023-16751-6,https://www.scopus.com/inward/record.uri?eid=2...,2024


In [103]:
dataframes['paper_belong_to_journal'].head(3)

Unnamed: 0,start_id,end_id
0,10.1002/asi.24855,Journal of the Association for Information Sci...
1,10.3390/s24041209,Sensors
2,10.1007/s11042-023-16751-6,Multimedia Tools and Applications


In [104]:
# Combine 'journal_name' and 'year'
dataframes['journal_in_year']['volume'] = dataframes['journal_in_year'].apply(lambda row: f"{row['journal_name']} ({row['year']})", axis=1)

volume = dataframes['journal_in_year'][['volume', 'year']]
volume_in_journal = dataframes['journal_in_year'][['volume', 'journal_name', 'year']]

volume.head(3)

Unnamed: 0,volume,year
0,Journal of the Association for Information Sci...,2023
1,Sensors (2022),2022
2,Multimedia Tools and Applications (2024),2024


In [105]:
Volumes = URI.Volumes

properties = {
    "volume": URI.volume,
    "year": URI.year
}

def process(df):
    for _, record in df.iterrows():
        Journal = urllib.parse.quote("Volumes_" + str(record['volume']))  # URL Code
        Journal_uri = URIRef(URI[ReviewText])
        g.add((Journal_uri, RDF.type, Volumes))
        for key, prop in properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

process(volume)

## Load Property OfJournal

In [106]:
volume_in_journal.head(3)

Unnamed: 0,volume,journal_name,year
0,Journal of the Association for Information Sci...,Journal of the Association for Information Sci...,2023
1,Sensors (2022),Sensors,2022
2,Multimedia Tools and Applications (2024),Multimedia Tools and Applications,2024


In [107]:
OfJournal = URI.OfJournal

properties = {
    "journal_name": URI.journal_name,
    "volume": URI.volume
}

def process(df):
    for _, record in df.iterrows():
        # Create URIs
        firstText = urllib.parse.quote("Volumes_" + str(record['volume']))  # URL Code
        firstText_uri = URIRef(URI[firstText])
        secondText = urllib.parse.quote("Journal_" + str(record['journal_name']))  # URL Code
        secondText_uri = URIRef(URI[secondText])

        # Add the relationship to the graph
        g.add((firstText_uri, OfJournal, secondText_uri))

process(volume_in_journal)

## Load Class Proceedings

In [108]:
dataframes['proceeding'].head(3)

Unnamed: 0,proceeding_name,city
0,"19th Machine Translation Summit, MT Summit 2023",Macau
1,7th International Conference on Big Data and I...,Beijing
2,2023 5th International Conference on Artificia...,Dalian


In [109]:
Proceedings = URI.Proceedings

properties = {
    "proceeding_name": URI.proceeding_name,
    "city": URI.city
}

def process(df):
    for _, record in df.iterrows():
        Journal = urllib.parse.quote("Proceedings_" + str(record['proceeding_name']))  # URL Code
        Journal_uri = URIRef(URI[ReviewText])
        g.add((Journal_uri, RDF.type, Proceedings))
        for key, prop in properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

process(dataframes['proceeding'])

## Load Class Conference

In [110]:
dataframes['conference_detail'].head(3)

Unnamed: 0,name,year,city,proceeding_name
0,International Conference on Integration of Com...,2024,Pune,2023 International Conference on Integration o...
1,"International Conference on Electronics, Commu...",2023,Fukuoka,"6th International Conference on Electronics, C..."
2,IEEE International Conference on Next Generati...,2021,Vellore,2023 IEEE International Conference on Next Gen...


In [111]:
Conference = URI.Conference

properties = {
    "name": URI.name,
    "year": URI.year,
    "city": URI.city,
    "proceeding_name": URI.proceeding_name
}

def process(df):
    for _, record in df.iterrows():
        Journal = urllib.parse.quote("Conference_" + str(record['name']))  # URL Code
        Journal_uri = URIRef(URI[ReviewText])
        g.add((Journal_uri, RDF.type, Conference))
        for key, prop in properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

process(dataframes['conference_detail'])

## Load Class Workshop

In [112]:
dataframes['conference_detail']['IsWorkshop'] = np.random.choice([True, False], size=len(dataframes['conference_detail']))

In [113]:
dataframes['conference_detail'].head(5)

Unnamed: 0,name,year,city,proceeding_name,IsWorkshop
0,International Conference on Integration of Com...,2024,Pune,2023 International Conference on Integration o...,False
1,"International Conference on Electronics, Commu...",2023,Fukuoka,"6th International Conference on Electronics, C...",False
2,IEEE International Conference on Next Generati...,2021,Vellore,2023 IEEE International Conference on Next Gen...,True
3,"International Conference on Database Theory, ICDT",2019,Ioannina,22th International Conference on Database Theo...,True
4,"International Conference on Database Theory, ICDT",2020,Ioannina,23th International Conference on Database Theo...,True


In [114]:
Workshop = URI.Workshop

properties = {
    "name": URI.name,
    "year": URI.year,
    "city": URI.city,
    "proceeding_name": URI.proceeding_name,
    "IsWorkshop": URI.IsWorkshop
}

def process(df):
    for _, record in df.iterrows():
      if record['IsWorkshop']:
        Journal = urllib.parse.quote("Workshop_" + str(record['name']))  # URL Code
        Journal_uri = URIRef(URI[ReviewText])
        g.add((Journal_uri, RDF.type, Workshop))
        for key, prop in properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

process(dataframes['conference_detail'])

## Load Class RegularConference

In [115]:
dataframes['conference_detail'].head(5)

Unnamed: 0,name,year,city,proceeding_name,IsWorkshop
0,International Conference on Integration of Com...,2024,Pune,2023 International Conference on Integration o...,False
1,"International Conference on Electronics, Commu...",2023,Fukuoka,"6th International Conference on Electronics, C...",False
2,IEEE International Conference on Next Generati...,2021,Vellore,2023 IEEE International Conference on Next Gen...,True
3,"International Conference on Database Theory, ICDT",2019,Ioannina,22th International Conference on Database Theo...,True
4,"International Conference on Database Theory, ICDT",2020,Ioannina,23th International Conference on Database Theo...,True


In [116]:
RegularConference = URI.RegularConference

properties = {
    "name": URI.name,
    "year": URI.year,
    "city": URI.city,
    "proceeding_name": URI.proceeding_name,
    "IsWorkshop": URI.IsWorkshop
}

def process(df):
    for _, record in df.iterrows():
      if not record['IsWorkshop']:
        Journal = urllib.parse.quote("RegularConference_" + str(record['name']))  # URL Code
        Journal_uri = URIRef(URI[ReviewText])
        g.add((Journal_uri, RDF.type, RegularConference))
        for key, prop in properties.items():
            if pd.notna(record[key]):
                g.add((Journal_uri, prop, Literal(record[key])))

process(dataframes['conference_detail'])

## Load Property WorkshopIn

In [117]:
conference_detail = dataframes['conference_detail']
paper_presented_in_conference = dataframes['paper_presented_in_conference']

# Remove leading and trailing spaces and convert the string to lowercase to ensure a match
conference_detail['proceeding_name'] = conference_detail['proceeding_name'].str.strip().str.lower()
paper_presented_in_conference['end_id'] = paper_presented_in_conference['end_id'].str.strip().str.lower()

# Create New Column to Save IsWorkshop
paper_presented_in_conference['IsWorkshop'] = False

# Check for inclusion relationships and update the IsWorkshop column
for i, end_id in paper_presented_in_conference['end_id'].items():
    match = conference_detail[conference_detail['proceeding_name'].str.contains(end_id, na=False)]
    if not match.empty:
        paper_presented_in_conference.at[i, 'IsWorkshop'] = match['IsWorkshop'].values[0]
        paper_presented_in_conference.at[i, 'proceeding_name'] = match['proceeding_name'].values[0]

# Select Columns Needed
paper_presented_in_workshop = paper_presented_in_conference[['start_id', 'end_id', 'proceeding_name', 'IsWorkshop']]

paper_presented_in_workshop.head(5)

  paper_presented_in_conference.at[i, 'proceeding_name'] = match['proceeding_name'].values[0]
  match = conference_detail[conference_detail['proceeding_name'].str.contains(end_id, na=False)]


Unnamed: 0,start_id,end_id,proceeding_name,IsWorkshop
0,10.1109/ICICIS56802.2023.10430275,international conference on integration of com...,2023 international conference on integration o...,False
1,10.1145/3592307.3592335,"international conference on electronics, commu...","6th international conference on electronics, c...",False
2,10.1109/NEleX59773.2023.10421720,ieee international conference on next generati...,2023 ieee international conference on next gen...,True
3,10.4230/LIPIcs.ICDT.2023.14,"international conference on database theory, icdt",22th international conference on database theo...,True
4,10.4230/LIPIcs.ICDT.2023.7,"international conference on database theory, icdt",22th international conference on database theo...,True


In [118]:
WorkshopIn = URI.WorkshopIn

properties = {
    "start_id": URI.start_id,
    "end_id": URI.end_id,
    "IsWorkshop": URI.IsWorkshop,
    "proceeding_name": URI.proceeding_name
}

def process(df):
    for _, record in df.iterrows():
      if record['IsWorkshop']:
        # Create URIs
        firstText = urllib.parse.quote("Workshop_" + str(record['end_id']))  # URL Code
        firstText_uri = URIRef(URI[firstText])
        secondText = urllib.parse.quote("Proceedings_" + str(record['proceeding_name']))  # URL Code
        secondText_uri = URIRef(URI[secondText])

        # Add the relationship to the graph
        g.add((firstText_uri, WorkshopIn, secondText_uri))

process(paper_presented_in_workshop)

## Load Property ConIn

In [119]:
ConIn = URI.ConIn

properties = {
    "start_id": URI.start_id,
    "end_id": URI.end_id,
    "IsWorkshop": URI.IsWorkshop,
    "proceeding_name": URI.proceeding_name
}

def process(df):
    for _, record in df.iterrows():
      if not record['IsWorkshop']:
        # Create URIs
        firstText = urllib.parse.quote("Workshop_" + str(record['end_id']))  # URL Code
        firstText_uri = URIRef(URI[firstText])
        secondText = urllib.parse.quote("Proceedings_" + str(record['proceeding_name']))  # URL Code
        secondText_uri = URIRef(URI[secondText])

        # Add the relationship to the graph
        g.add((firstText_uri, ConIn, secondText_uri))

process(paper_presented_in_workshop)

## Load Property IsInProceeding

In [120]:
dataframes['paper_belong_to_proceeding'].head(3)

Unnamed: 0,start_id,end_id
0,10.1145/3623509.3633352,ACM International Conference Proceeding Series
1,10.1051/e3sconf/202448204001,E3S Web of Conferences
2,10.1145/3636243.3636263,ACM International Conference Proceeding Series


In [121]:
IsInProceeding = URI.IsInProceeding

properties = {
    "start_id": URI.start_id,
    "end_id": URI.end_id
}

def process(df):
    for _, record in df.iterrows():
        # Create URIs
        firstText = urllib.parse.quote("ResearchPaper_" + str(record['start_id']))  # URL Code
        firstText_uri = URIRef(URI[firstText])
        secondText = urllib.parse.quote("Conference_" + str(record['end_id']))  # URL Code
        secondText_uri = URIRef(URI[secondText])

        # Add the relationship to the graph
        g.add((firstText_uri, IsInProceeding, secondText_uri))

process(dataframes['paper_belong_to_proceeding'])

## Load Property IsInJournal

In [122]:
merged_df = pd.merge(
    dataframes['paper'],
    dataframes['paper_belong_to_journal'],
    left_on='DOI',
    right_on='start_id',
    how='inner'
)

# Create 'volume' column，Combine 'end_id' and 'year' column
merged_df['volume'] = merged_df.apply(lambda row: f"{row['end_id']} ({row['year']})", axis=1)

# Select columns
paper_belong_to_volume = merged_df[['DOI', 'volume']]

paper_belong_to_volume.head(3)

Unnamed: 0,DOI,volume
0,10.1002/asi.24855,Journal of the Association for Information Sci...
1,10.3390/s24041209,Sensors (2022)
2,10.1007/s11042-023-16751-6,Multimedia Tools and Applications (2024)


In [123]:
IsInJournal = URI.IsInJournal

properties = {
    "DOI": URI.DOI,
    "volume": URI.volume
}

def process(df):
    for _, record in df.iterrows():
        # Create URIs
        firstText = urllib.parse.quote("ResearchPaper_" + str(record['DOI']))  # URL Code
        firstText_uri = URIRef(URI[firstText])
        secondText = urllib.parse.quote("Volumes_" + str(record['volume']))  # URL Code
        secondText_uri = URIRef(URI[secondText])

        # Add the relationship to the graph
        g.add((firstText_uri, IsInJournal, secondText_uri))

process(paper_belong_to_volume)

# Export graph

In [124]:
for stmt in islice(g, 5):
    print(stmt)

(rdflib.term.URIRef('http://SDM_lab_2.org/Author_8859945700'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://SDM_lab_2.org/Author'))
(rdflib.term.URIRef('http://SDM_lab_2.org/Reviewer_58897001100'), rdflib.term.URIRef('http://SDM_lab_2.org/WriteReview'), rdflib.term.URIRef('http://SDM_lab_2.org/SubmittedPaper_10.1109/TNET.2024.3352029'))
(rdflib.term.URIRef('http://SDM_lab_2.org/Reviewer_57762424900'), rdflib.term.URIRef('http://SDM_lab_2.org/AcceptPossibility'), rdflib.term.Literal('0.8059984858614481', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')))
(rdflib.term.URIRef('http://SDM_lab_2.org/Author_58865334700'), rdflib.term.URIRef('http://SDM_lab_2.org/AuthorName'), rdflib.term.Literal('Shi Y.'))
(rdflib.term.URIRef('http://SDM_lab_2.org/Author_57224980204'), rdflib.term.URIRef('http://SDM_lab_2.org/Write'), rdflib.term.URIRef('http://SDM_lab_2.org/ResearchPaper_10.1016/j.ins.2024.120272'))


In [125]:
# Export as Turtle
g.serialize(destination='/Users/zzy13/Desktop/Classes_at_UPC/SDM_Semantic_data_management/Lab_2/Lab_doc/data/abox_2.ttl', format='turtle')

<Graph identifier=Nf65694c4954d46a7a918b31cbeb40be4 (<class 'rdflib.graph.Graph'>)>