This file executes the queries by connecting to the neo4j database and executing cypher commands.

In [1]:
from faker import Faker
import pandas as pd
import random
from tqdm import tqdm
import uuid
from neo4j import GraphDatabase
import subprocess

In [2]:
# ENTER NEO4J CREDENTIALS HERE
uri = "bolt://localhost:7687"  
username = "neo4j"  
password = "12345678"  

In [3]:
# Start neo4j
try:
    command = "neo4j start"
    result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
    print(result.stdout)
except subprocess.CalledProcessError as e:
    print("Failed to start neo4j - it may already be running") 

Failed to start neo4j - it may already be running


In [4]:
# Establish connection to the Neo4j database
driver = GraphDatabase.driver(uri, auth=(username, password))

# Create query execution function
def execute_query(query):
    try:
        with driver.session() as session:
            result = session.run(query)
            return [record.data() for record in result]
    except Exception as e:
        print("An error occurred:", e)

# Query 1: Find the top 3 most cited papers of each conference.

```python
MATCH (c:CONFERENCE_WORKSHOP)<-[:PUBLISHED_IN]-(p:PAPER) # Match conferences and papers
OPTIONAL MATCH (p)<-[:CITES]-(citing:PAPER) # Match papers citing other papers, if any
WITH c, p, COUNT(citing) AS citationCount # Count the number of citations per paper
ORDER BY c.name, citationCount DESC # Order by conference and citation count
WITH c.name AS conferenceName, COLLECT({paper: p, citations: citationCount})[0..3] AS topPapers # Collect the top 3 papers per conference
UNWIND topPapers AS topPaper # Unwind the top papers
RETURN c.name AS Conference, topPaper.paper.title AS PaperTitle, topPaper.citations AS CitationCount
ORDER BY Conference, CitationCount DESC

In [5]:
# Define the Cypher query to create Review nodes and relationships
top_papers_per_conference_query = """
MATCH (c:CONFERENCE_WORKSHOP)<-[:PUBLISHED_IN]-(p:PAPER)
OPTIONAL MATCH (p)<-[:CITES]-(citing:PAPER)
WITH c, p, COUNT(citing) AS citationCount
ORDER BY c.name, citationCount DESC
WITH c.name AS conferenceName, COLLECT({paper: p, citations: citationCount})[0..3] AS topPapers
UNWIND topPapers AS topPaper
RETURN conferenceName AS Conference, topPaper.paper.title AS PaperTitle, topPaper.citations AS CitationCount
ORDER BY Conference, CitationCount DESC
"""

# Execute the query and get the results
top_papers_per_conference = execute_query(top_papers_per_conference_query)
top_papers_per_conference

[{'Conference': 'Browning-Davis Workshop',
  'PaperTitle': 'Yes professor itself step.',
  'CitationCount': 16},
 {'Conference': 'Browning-Davis Workshop',
  'PaperTitle': 'Maintain present middle only speech voice spring.',
  'CitationCount': 15},
 {'Conference': 'Browning-Davis Workshop',
  'PaperTitle': 'Democratic citizen rock some fight something take.',
  'CitationCount': 13},
 {'Conference': 'Gray-Johnson Conference',
  'PaperTitle': 'Ground space among theory.',
  'CitationCount': 15},
 {'Conference': 'Gray-Johnson Conference',
  'PaperTitle': 'Popular face wish various various note.',
  'CitationCount': 15},
 {'Conference': 'Gray-Johnson Conference',
  'PaperTitle': 'Really law between.',
  'CitationCount': 13},
 {'Conference': 'Martinez, Perry and Smith Workshop',
  'PaperTitle': 'I listen share environment chance bill usually.',
  'CitationCount': 18},
 {'Conference': 'Martinez, Perry and Smith Workshop',
  'PaperTitle': 'Result risk half use.',
  'CitationCount': 13},
 {'Co

# Query 2: For each conference find its community: i.e., those authors that have published papers on that conference in, at least, 4 different editions.

```python
MATCH (a:AUTHOR)-[:WRITTEN_BY]->(p:PAPER)-[:PUBLISHED_IN]->(c:CONFERENCE_WORKSHOP) # Find authors, papers, and conferences
WITH a, c.name AS conferenceName, c.edition AS edition # Collect author names, conference names and editions
ORDER BY a, conferenceName, edition
WITH a, conferenceName, COUNT(DISTINCT edition) AS editions # Count number of editions for each author
WHERE editions >= 4 # Filter authors with at least 4 editions
RETURN conferenceName AS Conference, a.name AS Author, editions
ORDER BY Conference, Author

In [46]:
# Define the Cypher query to create Review nodes and relationships
conference_comunity_query = """
MATCH (a:AUTHOR)-[:WRITTEN_BY]->(p:PAPER)-[:PUBLISHED_IN]->(c:CONFERENCE_WORKSHOP)
WITH a, c.name AS conferenceName, c.edition AS edition
ORDER BY a, conferenceName, edition
WITH a, conferenceName, COUNT(DISTINCT edition) AS editions
WHERE editions >= 4
RETURN conferenceName AS Conference, a.name AS Author, editions
ORDER BY Conference, editions DESC
"""

# Execute the query and get the results
conference_comunity = execute_query(conference_comunity_query)
conference_comunity

[{'Conference': 'Browning-Davis Workshop',
  'Author': 'Desiree Peck',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Theresa Schmitt',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Joy Sanchez',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Aaron Freeman',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Amanda Meyers',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Russell Howell',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Thomas Kramer',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Shawn Wallace',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Christy Manning',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Jacob Austin',
  'editions': 8},
 {'Conference': 'Browning-Davis Workshop',
  'Author': 'Adam Reed',
  'editions': 8},
 {'Conference

## Query 3: Find the impact factors of the journals

```python
MATCH (j:JOURNAL)<-[:PUBLISHED_IN]-(p:PAPER)
WHERE p.year IN [2021, 2022, 2023]
OPTIONAL MATCH (p)<-[:CITES]-(citing:PAPER)
WITH j, p.year AS publicationYear, COUNT(citing) AS citationCount, p
RETURN j.name AS Journal, 
       CASE WHEN SUM(CASE WHEN publicationYear IN [2021, 2022] THEN 1 ELSE 0 END) = 0 THEN 0 # checks if papers published in year
            ELSE (SUM(CASE WHEN publicationYear = 2023 THEN citationCount ELSE 0 END) * 1.0 / # calculate impact factor of jounrals
                  SUM(CASE WHEN publicationYear IN [2021, 2022] THEN 1 ELSE 0 END)) 
       END AS citationRatio

In [47]:
# Define the Cypher query to create Review nodes and relationships
impact_factor_query = """
MATCH (j:JOURNAL)<-[:PUBLISHED_IN]-(p:PAPER)
WHERE p.year IN [2021, 2022, 2023]
OPTIONAL MATCH (p)<-[:CITES]-(citing:PAPER)
WITH j, p.year AS publicationYear, COUNT(citing) AS citationCount, p
RETURN j.name AS Journal, 
       CASE WHEN SUM(CASE WHEN publicationYear IN [2021, 2022] THEN 1 ELSE 0 END) = 0 THEN 0 
            ELSE (SUM(CASE WHEN publicationYear = 2023 THEN citationCount ELSE 0 END) * 1.0 / 
                  SUM(CASE WHEN publicationYear IN [2021, 2022] THEN 1 ELSE 0 END)) 
       END AS citationRatio
"""

# Execute the query and get the results
impact_factor = execute_query(impact_factor_query)
impact_factor

[{'Journal': 'Ware, Fowler and Garcia Journal',
  'citationRatio': 0.3072916666666667},
 {'Journal': 'York, Foster and Johnston Journal',
  'citationRatio': 0.10857142857142857}]

# Query 4: Find the h-indexes of the authors in your graph

In [56]:
# Define the Cypher query to create Review nodes and relationships
h_index_query = """
MATCH (a:AUTHOR)-[:WRITTEN_BY]->(p:PAPER)
OPTIONAL MATCH (p)<-[:CITES]-(citing:PAPER)
WITH a, p, COUNT(citing) AS citationCount
ORDER BY a.name, citationCount DESC
WITH a.name AS Author, COLLECT(citationCount) AS citations
UNWIND range(0, SIZE(citations) - 1) AS i
WITH Author, i + 1 AS rank, citations[i] AS citationCount
WHERE citationCount >= rank
RETURN Author, MAX(rank) AS h_index
ORDER BY h_index DESC
"""

# Execute the query and get the results
h_index = execute_query(h_index_query)
h_index

[{'Author': 'Alexis Marshall', 'h_index': 10},
 {'Author': 'Gloria Webb', 'h_index': 10},
 {'Author': 'Katrina Nelson', 'h_index': 10},
 {'Author': 'Lauren Castillo', 'h_index': 10},
 {'Author': 'Linda Garcia', 'h_index': 10},
 {'Author': 'Mark Schultz', 'h_index': 10},
 {'Author': 'Ronald Shelton', 'h_index': 10},
 {'Author': 'Russell Howell', 'h_index': 10},
 {'Author': 'Trevor Hampton', 'h_index': 10},
 {'Author': 'Adam Reed', 'h_index': 9},
 {'Author': 'Amanda Jones', 'h_index': 9},
 {'Author': 'Amanda Meyers', 'h_index': 9},
 {'Author': 'Andrea Parker', 'h_index': 9},
 {'Author': 'Anna Parker', 'h_index': 9},
 {'Author': 'Bailey Wilson', 'h_index': 9},
 {'Author': 'Brett Parker', 'h_index': 9},
 {'Author': 'Brian Chavez', 'h_index': 9},
 {'Author': 'Caroline Gonzalez', 'h_index': 9},
 {'Author': 'Carolyn Savage', 'h_index': 9},
 {'Author': 'Cathy Hamilton', 'h_index': 9},
 {'Author': 'Charles Mitchell', 'h_index': 9},
 {'Author': 'Cheryl Schmitt', 'h_index': 9},
 {'Author': 'Chris