In [56]:
from semanticscholar import SemanticScholar
import csv
import pandas as pd
import random
import json
import shutil
import numpy as np

import os
from neo4j import GraphDatabase


In [57]:
NEO4J_URI = "bolt://localhost:7687"  # Default for Neo4j Desktop
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "pass1234"  # Change this to your actual password
IMPORT_FOLDER = os.path.expanduser("~/Library/Application Support/Neo4j Desktop/Application/relate-data/dbmss/dbms-6f9440a9-c56d-4ebc-b244-90d92b771350/import/")

In [58]:
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

session = driver.session() 

In [59]:
# Function to generate review comments
def generate_review_comment(decision):
    if decision == 1:  
        return random.choice([
            "This paper makes a significant contribution.",
            "Findings are novel.",
            "Well-written and clear paper.",
            "Good Experimental scheme." ])
    else:  
        return random.choice([
            "The methodology has flaws.",
            "Minor and unclear contribution.",
            "TNo novelty.",
            "Bad writing quality" ])

In [60]:
# Add property 'decision' for review relationship

# First generate fake reviews decisions 
reviews_df = pd.read_csv("reviews.csv")
reviews_df["Decision"] = np.random.choice([0, 1], size=len(reviews_df))
reviews_df["Comment"] = reviews_df.apply(lambda x: generate_review_comment(x["Decision"]), axis=1)

# Save to CSV
reviews_df[["Paper_DOI", "Reviewer_ID", "Decision", "Comment"]].to_csv("reviews_decisions.csv", index=False)


In [61]:
# Cypher to update reviews
shutil.copy("reviews_decisions.csv", IMPORT_FOLDER)

query = """
            LOAD CSV WITH HEADERS FROM 'file:///reviews_decisions.csv' AS row
            MATCH (a:Author {ID: toInteger(row.Reviewer_ID)})-[r:Reviews]->(p:Paper {DOI: row.Paper_DOI})
            SET r.decision = row.Decision, r.comment=row.Comment
        """

session.run(query)


<neo4j._sync.work.result.Result at 0x12bd04a50>

In [62]:
# Cypher to set final decision of a paper
query = """
            MATCH (a:Author)-[r:Reviews]->(p:Paper)
            WITH p, SUM(toInteger(r.decision)) AS total_decision
            SET p.Acceptance = (total_decision >= 2)
        """

session.run(query)

<neo4j._sync.work.result.Result at 0x12c37c550>

In [63]:
# Assign Synthetic Affiliations to authors 

authors_df = pd.read_csv("Author_nodes.csv")

# Generate lists of possible affiliations
universities = [
    "Universitat Politecnica de Catalunya", "Stanford University", "MIT", 
    "Harvard University", "University of California, Berkeley", "ETH Zurich",
    "University of Toronto", "University of Cambridge",
]

companies = [
    "Google Research", "Microsoft Research", "Meta AI",
    "DeepMind", "OpenAI", "IBM Research", "AWS",
    "NVIDIA Research", "Adobe Research", "Intel Labs"
]

all_affiliations = universities + companies


In [64]:
affiliations = []
for _,auth in authors_df.iterrows():
    affiliations.append({
        "Author_ID" : auth["Author_ID"],
        "Affiliation" : random.choice(all_affiliations)
    })

affiliations_df = pd.DataFrame(affiliations)
affiliations_df.to_csv("affiliations.csv", index=False)

In [None]:
# Cypher Query to update authors
shutil.copy("affiliations.csv", IMPORT_FOLDER)

query = """
            LOAD CSV WITH HEADERS FROM "file:///affiliations.csv" AS row
            MATCH (a:Author {ID: toInteger(row.Author_ID)})
            SET a.affiliation = row.Affiliation
        """

session.run(query)
