In [2]:
import csv
import sys
from neo4j import GraphDatabase

# Increase the CSV field size limit to the maximum integer size
csv.field_size_limit(sys.maxsize)

class GraphDBHandler:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def close(self):
        self.driver.close()
    
    def add_data_from_csv(self, file_path):
        """
        Reads each row from the CSV file and populates the Neo4j database with:
         - Question node (merged by questionID)
         - Topic nodes (split by commas)
         - Therapist node (merged by therapistName)
         - Answer node (unique per row, linked to Question and Therapist)
        """
        with self.driver.session() as session, open(file_path, newline='', encoding='utf-8') as csvfile:
            # If your CSV is comma-separated, use the default DictReader
            reader = csv.DictReader(csvfile)
            
            for row in reader:
                # Extract fields from the CSV
                question_id = row['questionID']
                question_title = row['questionTitle']
                question_text = row['questionText']
                question_url = row['questionUrl']
                
                # Split topics by comma
                topics = row['topics'].split(',') if row['topics'] else []
                
                therapist_name = row['therapistName']
                therapist_url = row['therapistUrl']
                
                answer_text = row['answerText']
                
                # Convert upvotes to int if it is numeric
                upvotes = row['upvotes']
                if upvotes.isdigit():
                    upvotes = int(upvotes)
                
                # MERGE Question node
                session.execute_write(
                    self._merge_question,
                    question_id,
                    question_title,
                    question_text,
                    question_url
                )
                
                # MERGE Topic nodes & relationships
                session.execute_write(
                    self._merge_topics,
                    question_id,
                    topics
                )
                
                # MERGE Therapist node
                session.execute_write(
                    self._merge_therapist,
                    therapist_name,
                    therapist_url
                )
                
                # CREATE Answer node & relationships
                session.execute_write(
                    self._create_answer,
                    question_id,
                    therapist_name,
                    answer_text,
                    upvotes
                )

    @staticmethod
    def _merge_question(tx, question_id, title, text, url):
        """
        MERGE a Question node based on questionID.
        Sets/updates title, text, and url.
        """
        tx.run(
            """
            MERGE (q:Question {id: $question_id})
            ON CREATE SET q.title = $title,
                          q.text = $text,
                          q.url = $url
            ON MATCH SET q.title = $title,
                         q.text = $text,
                         q.url = $url
            """,
            question_id=question_id,
            title=title,
            text=text,
            url=url
        )

    @staticmethod
    def _merge_topics(tx, question_id, topics):
        """
        For each topic in the list, MERGE a Topic node and
        create a HAS_TOPIC relationship from the Question.
        """
        tx.run(
            """
            MATCH (q:Question {id: $question_id})
            UNWIND $topics AS topic
            MERGE (t:Topic {name: TRIM(topic)})
            MERGE (q)-[:HAS_TOPIC]->(t)
            """,
            question_id=question_id,
            topics=topics
        )

    @staticmethod
    def _merge_therapist(tx, therapist_name, therapist_url):
        """
        MERGE a Therapist node based on therapistName.
        Sets/updates the therapist's URL.
        """
        tx.run(
            """
            MERGE (th:Therapist {name: $therapist_name})
            ON CREATE SET th.url = $therapist_url
            ON MATCH SET th.url = $therapist_url
            """,
            therapist_name=therapist_name,
            therapist_url=therapist_url
        )

    @staticmethod
    def _create_answer(tx, question_id, therapist_name, answer_text, upvotes):
        """
        CREATE a new Answer node (one per CSV row).
        Links it to the existing Question and Therapist nodes.
        """
        tx.run(
            """
            MATCH (q:Question {id: $question_id})
            MATCH (th:Therapist {name: $therapist_name})
            CREATE (a:Answer {
                text: $answer_text,
                upvotes: $upvotes
            })
            MERGE (q)-[:HAS_ANSWER]->(a)
            MERGE (a)-[:PROVIDED_BY]->(th)
            """,
            question_id=question_id,
            therapist_name=therapist_name,
            answer_text=answer_text,
            upvotes=upvotes
        )

def main():
    # Update these with your Neo4j connection details
    uri = "bolt://localhost:7687"
    user = "neo4j"
    password = "123456789"
    
    # Path to your CSV file
    file_path = "counselchat-data.csv"
    
    handler = GraphDBHandler(uri, user, password)
    handler.add_data_from_csv(file_path)
    handler.close()

if __name__ == "__main__":
    main()
