In [None]:
import json
import pandas as pd
from neo4j import GraphDatabase

Only read from excel file, then create company and manager nodes

In [9]:
# Set up Neo4j database connection
class Neo4jConnection:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def query(self, query, parameters=None):
        with self.driver.session() as session:
            result = session.run(query, parameters)
            return [record for record in result]

In [10]:
# Initialize database connection
neo4j_conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", password="neo4j")

In [11]:
# Clear all nodes and relationships from the database
neo4j_conn.query("MATCH (n) DETACH DELETE n")

[]

In [12]:
# Load manager and company data from CSV
# Limited 100 search
csv_file_path = r'C:/Users/jhyang/OneDrive/文档/GitHub_Projects/GraphRAG-Company-Info-Collector/resources/form13.csv'
df_csv = pd.read_csv(csv_file_path, nrows=300)

In [None]:
# Create Manager and Company nodes with relationships
for _, row in df_csv.iterrows():
    manager_name = row['managerName']
    manager_cik = row['managerCik']
    manager_address = row['managerAddress']
    
    company_name = row['companyName']
    cusip6 = row['cusip6']
    cusip = row['cusip']
    
    value = row['value']
    shares = row['shares']
    report_calendar_or_quarter = row['reportCalendarOrQuarter']
    
    # Create Manager node
    neo4j_conn.query(
        """
        MERGE (m:Manager {name: $manager_name})
        SET m.cik = $manager_cik, m.address = $manager_address
        RETURN m
        """,
        parameters={
            "manager_name": manager_name,
            "manager_cik": manager_cik,
            "manager_address": manager_address
        }
    )
    
    # Create Company node
    neo4j_conn.query(
        """
        MERGE (c:Company {name: $company_name})
        SET c.cusip6 = $cusip6, c.cusip = $cusip
        RETURN c
        """,
        parameters={
            "company_name": company_name,
            "cusip6": cusip6,
            "cusip": cusip
        }
    )
    
    # Create relationship HOLDS with properties
    neo4j_conn.query(
        """
        MATCH (m:Manager {name: $manager_name})
        MATCH (c:Company {name: $company_name})
        MERGE (m)-[r:HOLDS]->(c)
        SET r.value = $value, r.shares = $shares, r.reportCalendarOrQuarter = $report_calendar_or_quarter
        RETURN r
        """,
        parameters={
            "manager_name": manager_name,
            "company_name": company_name,
            "value": value,
            "shares": shares,
            "report_calendar_or_quarter": report_calendar_or_quarter
        }
    )


In [14]:
# Close the database connection
neo4j_conn.close()

### 1. View All Manager Nodes

    MATCH (m:Manager)
    
    RETURN m.name AS ManagerName, m.cik AS CIK, m.address AS Address

### 2. View All Company Nodes

    MATCH (c:Company)

    RETURN c.name AS CompanyName, c.cusip6 AS CUSIP6, c.cusip AS CUSIP

### 3. Count Manager and Company Nodes

    MATCH (m:Manager)

    RETURN count(m) AS ManagerCount

    MATCH (c:Company)

    RETURN count(c) AS CompanyCount