In [4]:
from neo4j import GraphDatabase
import pandas as pd


def read_csv(csv_file):
    data = pd.read_csv(csv_file)
    return data.to_dict(orient='records')


def import_data_to_neo4j(uri, username, password, universities_csv, courses_csv, students_csv, searches_csv, enrollment_csv):
    driver = GraphDatabase.driver(uri, auth=(username, password))

    # Read CSV files
    universities_data = read_csv(universities_csv)
    courses_data = read_csv(courses_csv)
    students_data = read_csv(students_csv)
    searches_data = read_csv(searches_csv)
    enrollment_data = read_csv(enrollment_csv) 


    university_query = """
    UNWIND $data as row
    MERGE (u:University {id: row.id})
    SET u.name = row.name,
        u.country = row.country
    """


    course_query = """
    UNWIND $data as row
    MATCH (u:University {id: row.uni})
    MERGE (c:Course {id: row.id})
    SET c.name = row.name,
        c.year = row.year,
        c.semester = row.semester
    MERGE (c)-[:BELONGS_TO]->(u)
    """


    student_query = """
    UNWIND $data as row
    MERGE (s:Student {id: row.id})
    SET s.name = row.userid_DI,
        s.country = row.final_cc_cname_DI,
        s.education = row.LoE_DI,
        s.gender = row.gender,
        s.age = row.age
    """

   
    search_query = """
    UNWIND $data as row
    MATCH (s:Student {id: row.user})
    MERGE (search:Search {id: row.id})
    SET search.date = row.date,
        search.words = row.words
    MERGE (s)-[:SEARCHED]->(search)
    """

   
    enrollment_query = """
    UNWIND $data as row
    MATCH (s:Student {id: row.`student.id`})
    MATCH (c:Course {id: row.`course.id`})
    MERGE (s)-[enroll:ENROLLED_IN]->(c)
    SET enroll.viewed = row.viewed,
        enroll.explored = row.explored,
        enroll.certified = row.certified,
        enroll.ndays_act = row.ndays_act,
        enroll.incomplete_flag = row.incomplete_flag,
        enroll.comment = row.comment
    """


    def execute_query_in_batches(tx, query, data):
        BATCH_SIZE = 1000
        for i in range(0, len(data), BATCH_SIZE):
            batch = data[i:i + BATCH_SIZE]
            tx.run(query, data=batch)

  
    with driver.session() as session:
        session.write_transaction(execute_query_in_batches, university_query, universities_data)
        session.write_transaction(execute_query_in_batches, course_query, courses_data)
        session.write_transaction(execute_query_in_batches, student_query, students_data)
        session.write_transaction(execute_query_in_batches, search_query, searches_data)
        session.write_transaction(execute_query_in_batches, enrollment_query, enrollment_data)

    driver.close()


uri = "bolt://localhost:7687"
username = "neo4j"
password = "12345678" 


universities_csv = "univ.csv"
courses_csv = "course.csv"
students_csv = "student.csv"
searches_csv = "search.csv"
enrollment_csv = "enrollement.csv" 

import_data_to_neo4j(uri, username, password, universities_csv, courses_csv, students_csv, searches_csv, enrollment_csv)


  session.write_transaction(execute_query_in_batches, university_query, universities_data)
  session.write_transaction(execute_query_in_batches, course_query, courses_data)
  session.write_transaction(execute_query_in_batches, student_query, students_data)
  session.write_transaction(execute_query_in_batches, search_query, searches_data)
  session.write_transaction(execute_query_in_batches, enrollment_query, enrollment_data)
