### 📘 **Description**

This notebook demonstrates how to load a single annotated CSV file into a Neo4j graph database using Python and LangChain. It includes steps to:

* Read the CSV into a Pandas DataFrame
* Connect to Neo4j using environment variables
* Create a snapshot metadata node
* Upload each row of the CSV as a node in Neo4j using Cypher

The CSV is assumed to contain legislative or regulatory text data with fields such as `act_title`, `title`, `content`, `tokens`, and other metadata.


In [3]:
import os

In [4]:
# Step 1: Install required packages (uncomment if needed)
# !pip install pandas neo4j

# Step 2: Load CSV
import pandas as pd

csv_path = "rank_0_batch_1000.csv"  # Adjust this path
df = pd.read_csv(csv_path)
df.head()


Unnamed: 0,act_title,reg_title,title,content,url,section_url,section_id,section_number,source_rank,timestamp,tokens,token_chunks
0,Advanced Education Statute Repeal Act,,Repeal,The Public Education Flexibility and Choice Ac...,https://www.bclaws.gov.bc.ca/civix/document/id...,d2e1021,,1,0,2025-04-08T20:18:14.933593,"[{'word': 'the', 'token_values': [1996]}, {'wo...","[[101, 1996, 2270, 2495, 16991, 1998, 3601, 25..."
1,Restricting Public Consumption of Illegal Subs...,,Not in force,[Not in force.],https://www.bclaws.gov.bc.ca/civix/document/id...,d1e20,,1-11,0,2025-04-08T20:18:14.935211,"[{'word': '', 'token_values': []}, {'word': '[...","[[101, 1031, 2025, 1999, 2486, 1012, 1033, 102..."
2,South Coast British Columbia Transportation Au...,,Not in force,[Not in force.],https://www.bclaws.gov.bc.ca/civix/document/id...,d1e21,,1-8,0,2025-04-08T20:18:14.936212,"[{'word': '', 'token_values': []}, {'word': '[...","[[101, 1031, 2025, 1999, 2486, 1012, 1033, 102..."
3,Insurance for Crops Act,,Agreements with Canada,"The minister, for the government, may enter in...",https://www.bclaws.gov.bc.ca/civix/document/id...,d2e15,,1,0,2025-04-08T20:18:14.936834,"[{'word': 'the', 'token_values': [1996]}, {'wo...","[[101, 1996, 2704, 1010, 2005, 1996, 2231, 101..."
4,Commercial Liens Act,,Not in force,[Not in force.],https://www.bclaws.gov.bc.ca/civix/document/id...,d1e26,,1-62,0,2025-04-08T20:18:14.937473,"[{'word': '', 'token_values': []}, {'word': '[...","[[101, 1031, 2025, 1999, 2486, 1012, 1033, 102..."


In [5]:
NEO4J_URI = 'bolt://' + os.getenv('NEO4J_HOST') + ':7687'
NEO4J_USERNAME = os.getenv('NEO4J_USER')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = 'neo4j' #os.getenv('NEO4J_DB')
print(NEO4J_URI)
print(NEO4J_DATABASE)

bolt://neo4j:7687
neo4j


In [6]:
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector

In [7]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

  kg = Neo4jGraph(


In [44]:
point_in_time_ts = "consol_42"  # Replace with actual timestamp

In [7]:
kg.query("""
MERGE (p:PointInTime {timestamp: $timestamp})
RETURN p
""", params={"timestamp": point_in_time_ts})

[{'p': {'timestamp': 'consol_42'}}]

In [11]:
cypher = """
LOAD CSV WITH HEADERS FROM 'file:///consolidation/rank_0_batch_1000.csv' AS row
CALL {
  WITH row
  MERGE (p:PointInTime {timestamp: row.timestamp})
  CREATE (s:Section {
    act_title: row.act_title,
    reg_title: row.reg_title,
    title: row.title,
    content: row.content,
    url: row.url,
    section_url: row.section_url,
    section_id: row.section_id,
    section_number: row.section_number,
    source_rank: toInteger(row.source_rank),
    timestamp: row.timestamp
  })
  MERGE (s)-[:RECORDED_AT]->(p)
  RETURN row.section_id AS sid
} IN TRANSACTIONS OF 500 ROWS
ON ERROR CONTINUE
REPORT STATUS AS s
WITH sid, s WHERE s.errorMessage IS NOT NULL
RETURN sid, s
"""

In [None]:
kg.query(cypher)

In [10]:
len(df)

1000

In [12]:
cypher = """
LOAD CSV WITH HEADERS FROM 'file:///consolidation/rank_0_thread_10_embeddings.csv' AS row
CALL (row) {
  WITH row
  CREATE (s:consolidation_42_act { 
  })
  SET
      s.title = row.title,
      s.content = row.content,
      s.act_title = row.act_title,
      s.reg_title = row.reg_title,
      s.url = row.url,
      s.section_number = row.section_number,
      s.source_rank = toInteger(row.source_rank),
      s.timestamp = row.timestamp,
      s.snapshot_id = 42,
      s.snapshot_date = date("2024-12-31"),
      s.tokens = row.tokens,
      s.token_chunks = row.token_chunk,
      s.embedding = row.embedding
} IN 1 CONCURRENT TRANSACTIONS OF 500 ROWS
"""

In [13]:
kg.query(cypher)

[]