In [38]:
from neo4j import GraphDatabase
import pandas as pd

### **Database connection**

Here, the connection settings to link this script with Neo4j are setup

In [39]:
# Connection settings
uri = "neo4j://localhost:7687"
pass="pass"
driver = GraphDatabase.driver(uri, auth=("neo4j", pass))

### **Data import**

The data to be uploaded is imported in CSV format

In [40]:
# Data import
# Nodes
authors = pd.read_csv('./data/auth.csv')
articles = pd.read_csv('./data/articles.csv')
proceedings = pd.read_csv('./data/proceedings.csv')
conferences = pd.read_csv('./data/conferences.csv')
volumes = pd.read_csv('./data/volumes.csv')
journals = pd.read_csv('./data/journals.csv')
journal_editors = pd.read_csv('./data/journal_editors.csv')
conference_chair = pd.read_csv('./data/conference_chair.csv')
reviewer_groups = pd.read_csv('./data/assigned_rewiers.csv')
keywords = pd.read_csv('./data/keywords.csv')

# Relations
authors_articles = pd.read_csv('./data/art_auth.csv')
articles_proceedings= pd.read_csv('./data/art_in_proc.csv')
articles_volumes = pd.read_csv('./data/art_in_vol.csv')
articles_keywords = pd.read_csv('./data/art_keyword.csv')
article_article = pd.read_csv('./data/art_quote_art.csv')
groupreviewers_article = pd.read_csv('./data/assignedRewiers_rev_art.csv')
authors_reviewergroup = pd.read_csv('./data/auth_is_reviewer.csv')
conferencechairs_conferences = pd.read_csv('./data/confChair_conf.csv')
journaleditors_journals = pd.read_csv('./data/jourEdit_journ.csv')
journaleditors_reviewergroups = pd.read_csv('./data/journEd_reviewers.csv')
proceedings_conferences = pd.read_csv('./data/proc_in_conf.csv')
volumes_journals = pd.read_csv('./data/vol_in_journ.csv')

### **Data Loading**

In this stage we wil load the data in the graph database

#### **Nodes**

*Authors:*

In [41]:
# Implementation
def create_authors(tx, authors):
    for i in range(len(authors)):
        tx.run("MERGE (:Author {author_id: $author_id, name: $name})", 
        author_id=int(authors.loc[i, ":ID"]), 
        name=authors.loc[i, "author:string"])

# Invocation
with driver.session() as session:
    session.write_transaction(create_authors, authors)

# Index
def index_author(tx):
    answer = tx.run("""
    CREATE INDEX author_index
    FOR (n:Author)
    ON (n.author_id)
    """)
with driver.session() as session:
    articles = session.write_transaction(index_author)

*Articles:*

In [42]:
# Implementation
def create_articles(tx, articles):
    for i in range(len(articles)):
        tx.run("MERGE (:Article {article_id: $article_id, title: $title, doi:$doi, pages:$pages, abstract:$abstract})", 
        article_id=int(articles.loc[i, "article:ID"]), 
        title=str(articles.loc[i, "title:string[]"]),
        doi=str(articles.loc[i, "ee:string[]"]),
        pages=str(articles.loc[i, "pages:string"]),
        abstract=str(articles.loc[i, "abstract:string[]"])
        )

# Invocation
with driver.session() as session:
    session.write_transaction(create_articles, articles)

# Index
def index_article(tx):
    answer = tx.run("""
    CREATE INDEX article_index
    FOR (n:Article)
    ON (n.article_id)
    """)
with driver.session() as session:
    articles = session.write_transaction(index_article)

*Journals:*

In [43]:
# Implementation
def create_journals(tx, journals):
    for i in range(len(journals)):
        tx.run("MERGE (:Journal {journal_id: $journal_id, name: $name})", 
        journal_id=int(journals.loc[i, ":ID"]), 
        name=journals.loc[i, "journal:string"])

# Invocation
with driver.session() as session:
    session.write_transaction(create_journals, journals)

*Conference:*

In [44]:
# Implementation
def create_conference(tx, conferences):
    for i in range(len(conferences)):
        tx.run("MERGE (:Conference {conference_id: $conference_id, name: $name})", 
        conference_id=int(conferences.loc[i, "conference:ID"]), 
        name=conferences.loc[i, "name:string[]"])

# Invocation
with driver.session() as session:
    session.write_transaction(create_conference, conferences)

*Volume:*

In [45]:
# Implementation
def create_volumes(tx, volumes):
    for i in range(len(volumes)):
        tx.run("MERGE (:Volume {volume_id: $volume_id, date: $date})", 
        volume_id=(volumes.loc[i, "volume:ID"]), 
        date=volumes.loc[i, "date:date"])

# Invocation
with driver.session() as session:
    session.write_transaction(create_volumes, volumes)

*Journal editor:*

In [46]:
# Implementation
def create_journalEditors(tx, journal_editors):
    for i in range(len(journal_editors)):
        tx.run("MERGE (:JournalEditors {journalEditor_id: $journalEditors_id, name: $name})", 
        journalEditors_id=int(journal_editors.loc[i, "journal_editors:ID"]), 
        name=journal_editors.loc[i, "name:string[]"])

# Invocation
with driver.session() as session:
    session.write_transaction(create_journalEditors, journal_editors)

*Conference chair:*

In [47]:
# Implementation
def create_conferenceChairs(tx, conference_chair):
    for i in range(len(conference_chair)):
        tx.run("MERGE (:ConferenceChair {conferenceChair_id: $conferenceChair_id, organizers: $organizers})", 
        conferenceChair_id=int(conference_chair.loc[i, "conference_chair:ID"]), 
        organizers=conference_chair.loc[i, "name:string[]"])

# Invocation
with driver.session() as session:
    session.write_transaction(create_conferenceChairs, conference_chair)

*Reviewer Groups:*

In [48]:
# Implementation
def create_assignedReviewers(tx, reviewer_groups):
    for i in range(len(reviewer_groups)):
        tx.run("MERGE (:ReviewGroup {reviewGroup_id: $reviewGroup_id})", 
        reviewGroup_id=int(reviewer_groups.loc[i, "rewier:ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_assignedReviewers, reviewer_groups)

# Index
def index_reviewGroup(tx):
    answer = tx.run("""
    CREATE INDEX reviewGroup_index
    FOR (n:ReviewGroup)
    ON (n.reviewGroup_id)
    """)
with driver.session() as session:
    articles = session.write_transaction(index_reviewGroup)

*Keywords:*

In [49]:
# Implementation
def create_keywords(tx, keywords):
    for i in range(len(keywords)):
        tx.run("MERGE (:Keyword {keyword_id: $keyword_id, word: $word})", 
                keyword_id=int(keywords.loc[i, "keyword:ID"]), 
                word=keywords.loc[i, "text:string"])

# Invocation
with driver.session() as session:
    session.write_transaction(create_keywords, keywords)

*Proceedings:*

In [50]:
# Implementation
def create_proceedings(tx, proceedings):
    for i in range(len(proceedings)):
        tx.run("MERGE (:Proceeding {proceeding_id: $proceeding_id, title: $title, url: $url, isbn: $isbn, publisher: $publisher, series: $series, year:$year })", 
        proceeding_id=int(proceedings.loc[i, "proceedings:ID"]), 
        title=proceedings.loc[i, "title:string"],
        url=proceedings.loc[i, "url:string"],
        publisher=str(proceedings.loc[i, "editor:string[]"]),
        series=str(proceedings.loc[i, "series:string[]"]),
        isbn=str(proceedings.loc[i, "isbn:string[]"]),
        year=int(proceedings.loc[i, "year:int"])
        )

# Invocation
with driver.session() as session:
    session.write_transaction(create_proceedings, proceedings)

driver.close()

#### **Edges**

*Author-Article*

In [51]:
# Implementation
def create_written_by(tx, authors_articles):
    for i in range(len(authors_articles)):
        tx.run("MATCH (n: Author {author_id: $end}), (m: Article {article_id: $start}) MERGE (n) <- [:written_by {mainAuthor: $mainAuthor}] - (m) ", 
        start=int(authors_articles.loc[i, ":START_ID"]), 
        end=int(authors_articles.loc[i, ":END_ID"]),
        mainAuthor=bool(authors_articles.loc[i, "MAIN_AUTH:bool"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_written_by, authors_articles)

*Articles-Proceeding*

In [52]:
# Implementation
def create_published_in(tx, articles_proceedings):
    for i in range(len(articles_proceedings)):
        tx.run("MATCH (n: Article {article_id: $start}), (m: Proceeding {proceeding_id: $end}) MERGE (n) - [:published_in] -> (m) ", 
        start=int(articles_proceedings.loc[i, ":START_ID"]), 
        end=int(articles_proceedings.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_published_in, articles_proceedings)

*Proceedings-Conference*

In [53]:
# Implementation
def create_belongs_to(tx, proceedings_conferences):
    for i in range(len(proceedings_conferences)):
        tx.run("MATCH (n: Proceeding {proceeding_id: $start}), (m: Conference {conference_id: $end}) MERGE (n) - [:belongs_to] -> (m) ", 
        start=int(proceedings_conferences.loc[i, ":START_ID"]), 
        end=int(proceedings_conferences.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_belongs_to, proceedings_conferences)

*Article-Article*

In [54]:
# Implementation
def create_cited_by(tx, article_article):
    for i in range(len(article_article)):
        tx.run("MATCH (n: Article {article_id: $start}), (m: Article {article_id: $end}) MERGE (n) - [:cited_by] -> (m) ", 
        start=int(article_article.loc[i, ":START_ID"]), 
        end=int(article_article.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_cited_by, article_article)

*Article-Volume*

In [55]:
# Implementation
def create_published_in(tx, articles_volumes):
    for i in range(len(articles_volumes)):
        tx.run("MATCH (n: Article {article_id: $start}), (m: Volume {volume_id: $end}) MERGE (n) - [:published_in] -> (m) ", 
        start=int(articles_volumes.loc[i, ":START_ID"]), 
        end=str(articles_volumes.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_published_in, articles_volumes)

*Volume-Journal*

In [56]:
# Implementation
def create_belongs_to(tx, volumes_journals):
    for i in range(len(volumes_journals)):
        tx.run("MATCH (n: Volume {volume_id: $start}), (m: Journal {journal_id: $end}) MERGE (n) - [:belongs_to] -> (m) ", 
        start=str(volumes_journals.loc[i, ":START_ID"]), 
        end=int(volumes_journals.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_belongs_to, volumes_journals)

*Journal-Journal Editor*

In [57]:
# Implementation
def create_edited_by(tx, journaleditors_journals):
    for i in range(len(journaleditors_journals)):
        tx.run("MATCH (n: JournalEditors {journalEditor_id: $start}), (m: Journal {journal_id: $end}) MERGE (n) - [:edits] -> (m) ", 
        start=int(journaleditors_journals.loc[i, ":START_ID"]), 
        end=int(journaleditors_journals.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_edited_by, journaleditors_journals)

*Journal Editor - Review groups*

In [58]:
# Implementation
def create_assignes(tx, journaleditors_reviewergroups):
    for i in range(len(journaleditors_reviewergroups)):
        tx.run("MATCH (n: JournalEditors {journalEditor_id: $start}), (m: ReviewGroup {reviewGroup_id: $end}) MERGE (n) - [:assignes] -> (m) ", 
        start=int(journaleditors_reviewergroups.loc[i, ":START_ID"]), 
        end=int(journaleditors_reviewergroups.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_assignes, journaleditors_reviewergroups)

*Author-Reviewer Group*

In [59]:
# Implementation
def create_reviewer_in(tx, authors_reviewergroup):
    for i in range(len(authors_reviewergroup)):
        tx.run("MATCH (n: Author {author_id: $start}), (m: ReviewGroup {reviewGroup_id: $end}) MERGE (n) - [:reviewer_in] -> (m) ", 
        start=int(authors_reviewergroup.loc[i, ":START_ID"]), 
        end=int(authors_reviewergroup.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_reviewer_in, authors_reviewergroup)

*Reviewer Group-Article*

In [60]:
def create_reviewed_by(tx, groupreviewers_article):
    for i in range(len(groupreviewers_article)):
        tx.run("MATCH (n: ReviewGroup {reviewGroup_id: $start}), (m: Article {article_id: $end}) MERGE (n) - [:reviews] -> (m) ", 
        start=int(groupreviewers_article.loc[i, ":START_ID"]), 
        end=int(groupreviewers_article.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_reviewed_by, groupreviewers_article)

*Conference Chair-Conference*

In [61]:
def create_organized_by(tx, conferencechairs_conferences):
    for i in range(len(conferencechairs_conferences)):
        tx.run("MATCH (n: ConferenceChair {conferenceChair_id: $start}), (m: Conference {conference_id: $end}) MERGE (n) - [:organizes] -> (m) ", 
        start=int(conferencechairs_conferences.loc[i, ":START_ID"]), 
        end=int(conferencechairs_conferences.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_organized_by, conferencechairs_conferences)

*Article-Keywords*

In [62]:
def create_talks_about(tx, articles_keywords):
    for i in range(len(articles_keywords)):
        tx.run("MATCH (n: Article {article_id: $start}), (m: Keyword {keyword_id: $end}) CREATE (n) - [:talks_about] -> (m)", 
            start=int(articles_keywords.loc[i, ":START_ID"]), 
            end=int(articles_keywords.loc[i, ":END_ID"]))

# Invocation
with driver.session() as session:
    session.write_transaction(create_talks_about, articles_keywords)