# AutoEdstem Presentation Notebook

In [1]:
# Setup functions

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from IPython.display import Markdown, display
import textwrap

In [2]:
import openai

with open("./secret_api_key") as f:
  secret_key = f.read()
  openai.api_key = secret_key

In [3]:
from pymilvus import connections, Collection, utility
from llama_index.vector_stores.milvus import MilvusVectorStore

from llama_index.core import StorageContext
def read_documents(data_path):
  return SimpleDirectoryReader(data_path, recursive=True).load_data()

def build_new_query_engine(data_path, collection_name):
  vector_store = MilvusVectorStore(dim=1536, overwrite=True, collection_name=collection_name)
  documents = read_documents(data_path)
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
  index = VectorStoreIndex.from_documents(documents, storage_context, show_progress=True)
  query_engine = index.as_query_engine()
  return query_engine
  

def load_existing_index_as_query_engine(collection_name, k = 10):
  connections.connect(
    alias="default", 
    host='localhost', 
    port='19530'
  )
  
  if collection_name not in utility.list_collections():
    print(f"Collection {collection_name} not found")
    return
  
  Collection(collection_name).load()
  vector_store = MilvusVectorStore(dim=1536, overwrite=False, collection_name=collection_name)
  index = VectorStoreIndex.from_vector_store(vector_store)
  query_engine = index.as_query_engine()
  query_engine.retriever.similarity_top_k = k

  return query_engine

In [4]:
def load_index(collection_name):
  connections.connect(
    alias="default", 
    host='localhost', 
    port='19530'
  )
  
  if collection_name not in utility.list_collections():
    print(f"Collection {collection_name} not found")
    return
  
  Collection(collection_name).load()
  vector_store = MilvusVectorStore(dim=1536, overwrite=False, collection_name=collection_name)
  index = VectorStoreIndex.from_vector_store(vector_store)
  return index

In [5]:
def pretty_print(x):
  display(Markdown(str(x)))

response_list = []

def append_and_print(resp, response_list = response_list):
  response_list += [resp]
  pretty_print(resp)

#Full transparency of data
def print_data_sources(response, sources=100, show_text= True):
  for i in range(min(len(response.source_nodes), sources)):
      pretty_print("Data retrieved from file: " + response.source_nodes[i].node.metadata.get('file_name'))
      if show_text:
        pretty_print("Data used to generate response: " + response.source_nodes[i].text)

In [6]:
from openai import OpenAI

openai_client = OpenAI(
    # This is the default and can be omitted
    api_key= openai.api_key,
)

def query_gpt3(query):
  chat_completion = openai_client.chat.completions.create(
    messages=[
        {
          "role": "user",
          "content": query,
        }
    ],
    model="gpt-3.5-turbo",
  )
  return chat_completion.choices[0].message.content

In [7]:
FINAL_MILVUS_COLLECION_NAME = "final_milvus_collection"

# Initial Demo

In [8]:
full_query_engine = load_existing_index_as_query_engine(FINAL_MILVUS_COLLECION_NAME)

In [17]:
sql_question = "How do I run the SQL Assignment"
append_and_print(full_query_engine.query(sql_question))

To run the SQL Assignment, make sure Docker and Docker Compose are installed on your system. Start the Docker daemon and verify its status by running `$ docker info`. Next, execute `$ docker-compose up` in a terminal. Once the process is stable, go to http://localhost:8888 in a browser to access the JupyterLab interface. Open the 'Assignment.ipynb' file from the sidebar to start the assignment.

In [18]:
pretty_print(query_gpt3(sql_question))

To run an SQL assignment, you will need access to a database management system (such as MySQL, PostgreSQL, SQL Server, etc.) and a tool to write and execute SQL queries (such as MySQL Workbench, pgAdmin, SQL Server Management Studio, etc.).

Here are the steps to run an SQL assignment:

1. Set up your database system: If you don't have a database system installed, you will need to download and install one. Follow the instructions provided by the database system's website to install and set up the database.

2. Create a new database: Create a new database where you will be running the SQL assignment. You can do this by using the CREATE DATABASE statement.

3. Connect to the database: Use the database management tool to connect to the newly created database. You will need to provide the database connection details (such as hostname, port, username, password) to establish a connection.

4. Write and execute SQL queries: Write the SQL queries provided in the assignment using the database management tool's query editor. You can execute the queries by clicking on the "Execute" or "Run" button.

5. Review the results: Once you run the queries, review the results to check if they meet the requirements specified in the assignment.

6. Submit your assignment: If you are required to submit the results of the SQL assignment, you can do so by sending the output of the queries or a screenshot of the results to your instructor or the designated submission platform.

Remember to always follow the specific instructions provided in the SQL assignment and reach out to your instructor or classmates for help if you encounter any difficulties.

# Transparency

In [43]:
print_data_sources(response_list[4], 3, show_text=True)

Data retrieved from file: Assignment 2 _ SQL.txt

Data used to generate response: ﻿SQL
Due: Monday, September 25, 2023 at 10:00 pm EDT


1 Introduction        1
2 Background Knowledge        2
2.1 SQL        2
2.2 Jupyter Notebook        2
2.3 Docker        3
3 Assignment Spec        4
3.1 Overview        4
3.2 Getting Started        8
4 FAQ        8
Feedback        8


NOTE: Please make sure you read the handout all the way through before getting started or going to hours!
________________


1        Introduction
In this assignment, we will dive into Structured Query Language (SQL), a powerful tool for managing and manipulating relational databases. SQL is a standard programming language designed for managing data stored in databases, and it plays a crucial role in retrieving, inserting, updating, and deleting data. 


Throughout this assignment, you will have the opportunity to strengthen your SQL skills and apply your knowledge to real-world scenarios by working with the IMDb database. You will be presented with a series of tasks and challenges that will test your understanding of SQL concepts, especially database querying and extracting the database design. By completing this assignment, you will gain hands-on experience in writing SQL queries, understanding complex database relationships, and leveraging SQL to extract valuable insights from data. 
2        Background Knowledge
2.1 SQL
To get started with SQL, check out our Guide to SQL! Even if you're already proficient, everybody benefits from a brief refresher.


Note that in this project, we are using sqlite3 - this means that not all features you learn about in class are necessarily available. We assure you, however, that all of the problems can be solved in sqlite3.
2.2 Jupyter Notebook
Jupyter Notebook ( formerly IPython Notebook) is an interactive web application for creating and sharing computational documents. The project was first named IPython and later renamed Jupyter in 2014. It is a fully open-source product, and users can use every functionality available for free. This notebook supports more than 40 languages, especially Python.


Why do we use Jupyter Notebook? Jupyter notebooks are used for a variety of purposes. A notebook is an interactive computational environment in which users can execute a particular piece of code, observe the output, and make changes to the code to drive it to the desired output or explore more. This would be helpful during the debugging process for the SQL queries compared to using sqlite3 independently. [1]


For the documentation of the Jupyter Notebook, check here
________________
2.3 Docker
Instead of making you install the Jupyter Notebook independently for this assignment, we’ll prepare the “Docker” for you to install. But first, we will introduce you to what the docker is.


Docker is an open platform for developing, shipping and running applications. In other words, Docker is a “package” in the form of containers that prepare the environment (Mostly a set of Library) for you to run the program. We decided to use this instead of installing the programs independently to decrease the complexity of the assignment and to assist students using Windows and Linux regarding the Version and the file location. 


3        Assignment Spec
3.1 Overall
In this assignment, you will write the SQL queries based on the questions we give. You can get the assignment by accepting the GitHub classroom assignment here


The following are the files you will need to alter:


* ./notebooks/Assignment.py


Since you will only be writing queries, you should NOT alter the database. If you do so, you may need to restart your container for a fresh database file.


All your queries should be inside the defined strings ex:
pxqy = “””
SELECT * FROM EXAMPLE 
“””
The autograder will use these query strings. Do NOT modify variable names in the form of pxqy (ex: p1q1), or else your implementation will fail the autograder!
3.2 Getting Started
Ensure that you have Docker and Docker Compose installed.


Start the docker daemon and ensure it’s running by running:
$ docker info


You should be able to run the following command in a terminal:
$ docker-compose up 


Note that this command will run a process that will not resolve in your terminal. Once it has stabilized (i.e, it is no longer producing new logs), open http://localhost:8888 in a browser. You should see a JupyterLab interface. Open the file in the sidebar named 'Assignment.ipynb' and start the assignment.
4        FAQ


* How is this graded?: See the course guide. 
* How do I hand this in?

Data retrieved from file: Assignment 2 _ SQL.txt

Data used to generate response: Do NOT modify variable names in the form of pxqy (ex: p1q1), or else your implementation will fail the autograder!
3.2 Getting Started
Ensure that you have Docker and Docker Compose installed.


Start the docker daemon and ensure it’s running by running:
$ docker info


You should be able to run the following command in a terminal:
$ docker-compose up 


Note that this command will run a process that will not resolve in your terminal. Once it has stabilized (i.e, it is no longer producing new logs), open http://localhost:8888 in a browser. You should see a JupyterLab interface. Open the file in the sidebar named 'Assignment.ipynb' and start the assignment.
4        FAQ


* How is this graded?: See the course guide. 
* How do I hand this in?: 
   * For the submission, save the assignment notebook as a Python file by going to File >> Save and Export Notebook as >> Executable Script (the file should be called "SQL_Assignment.py"). 
   * Then, submit this "SQL_Assignment.py" file to Gradescope under "Assignment 2: SQL" (using the "Upload" option, not submitting through GitHub).
   * ENSURE YOUR SUBMITTED FILE IS NAMED “SQL_Assignment.py” YOUR SUBMISSION WILL FAIL THE AUTOGRADER OTHERWISE
* What packages can I import?: You may import any standard library package that doesn’t trivialize the assignment. If you are unsure whether a package is a fair game, please post on Edstem. 


Feedback


As this is a new course, we appreciate any feedback you have for us! If you enjoyed this assignment, hated this assignment, or have other thoughts to share, please do so here!

Data retrieved from file: Slide 2 - SQL Gearup.txt

Data used to generate response: SQL Gearup
CSCI 1270 - Database Management Systems

Please read the handout in its entirety!
It will save you lots of time per assignment; especially the hints!
If anything is unclear about the handout, please post on EdStem.
The Handout
Common Commands in SQLITE
Foreign Keys
DML commands
CASE statements
Tconst, nconst
Fscore




Highly recommend doing Guide to SQL, then checking out the supplementary information.
In this project, we are using jupyter notebook
If you need help getting set up, please come to TA Hours or post on EdStem!

SQL

An open platform for developing, shipping and running applications\
Why Docker?
Decrease the complexity of the assignment 
Solve OS differences

Docker

Install Docker and Docker Compose 
Once these two software are installed, you should be able to run the following command in a terminal:
$ docker-compose up 
Once it has stabilized (i.e, it is no longer producing new logs), open http://localhost:8888 in a browser. You should see a JupyterLab interface. Open the file in the sidebar named 'SQL_Assignment.ipynb' and start the assignment.
Getting Started

The Codebase
Do not change the name of the variables since we depend on them for the grading purpose


Part 1,2,3: SQL queries
Run queries within the Jupyter Notebook
The result should appear after running the code, easier for the debugging



Tips
Read the handout thoroughly!
Be comfortable in SQL before starting.

# Impacts of different K-Values

In [14]:
query_question = "How do I implement concurrent transactions for the concurrency assignment"
pretty_print(query_gpt3(query_question))

To implement concurrent transactions for the concurrency assignment, you will need to create multiple threads or processes that can operate concurrently to execute transactions on your database. Here are the steps you can follow to implement concurrent transactions:

1. Choose a programming language or framework that supports concurrency, such as Java, Python, or Node.js.

2. Set up a database system that supports transactions, such as PostgreSQL, MySQL, or Oracle.

3. Create a table or collection in your database to store the data for the transactions.

4. Write code to define the structure of the transaction and the operations that need to be performed.

5. Create multiple threads or processes that will execute the transactions concurrently.

6. Use synchronization mechanisms, such as locks or semaphores, to ensure that transactions do not interfere with each other.

7. Test your implementation by running multiple transactions concurrently and verifying that they execute correctly and produce the expected results.

By following these steps, you can implement concurrent transactions for the concurrency assignment in your database system.

In [15]:
full_query_engine.retriever.similarity_top_k = 1
append_and_print(full_query_engine.query(query_question))
print_data_sources(response_list[-1])

To implement concurrent transactions for the concurrency assignment, you will need to adhere to strict two-phase locking (2PL) where locks are acquired as needed and held until after committing changes to the database. This ensures no cascading rollbacks occur. Additionally, implement deadlock avoidance by detecting cycles in a "waits-for" graph to prevent deadlocks. Transactions should be managed using commands like transaction [begin|commit] to start or end a transaction, and lock <table> <key> to grab write locks on resources. Running the database as a server-client application allows for multiple clients, each handling one transaction at a time. Stress testing can be done using the bumble_stress executable with specified workloads to detect deadlocks or unsafe behavior.

Data retrieved from file: Assignment 6_ Concurrency.txt

Data used to generate response: Transaction locks are completely unaware of the underlying representation of the data; we're only concerned in logical units to preserve the integrity of the external view of the database. On the other hand, data structure-level locks are completely unaware of the data it's locking; only the structure of how the data is stored. Thus, these two locking schemes are completely orthogonal to one another, and yet, are both essential for a database serving multiple tenants concurrently.


2.6 Strict 2PL 


Our transactions will adhere to strict two-phase locking. That is, we will acquire locks as we need them, but we will hold on to all of them until after we have committed our changes to the database. One great corollary of this scheme is that we can be absolutely sure that there will not be cascading rollbacks; that is, if a transaction aborts, no other transaction will have to roll back because of it! This makes our lives a lot easier when implementing aborts, but does mean that our transactions may wait longer for resources to become accessible.


2.7 Deadlock Avoidance


We want to be sure that our transactions don't end up creating a deadlock. One way to do this is by detecting cycles in a "waits-for" graph. While a transaction is waiting for another transaction to free a particular resource, we should add an edge between it and the offending transaction to the "waits-for" graph. If a cycle is detected in this graph, that means that we have deadlocked, and will not be able to proceed without terminating one of the involved transactions. A reasonable policy here would be to terminate the last transaction that joined the graph. Remember to remove edges between transactions once transactions get terminated or are otherwise no longer waiting for a resource — otherwise, you may detect deadlocks that don't exist!


3        Assignment Spec 
3.1 Overview


In this project, you'll implement fine-grained locking on Hash and B+Tree, then resource-level locking, and finally implement deadlock detection and avoidance! Note that the assignment parts are somewhat isolated from each other; feel free to work out of order on this assignment.


3.2 New REPL Commands


The transaction REPL now supports two new commands:


* transaction [begin|commit] - either starts or ends a transaction. Each client can have up to 1 transaction running at a time.
* lock <table> <key> - grabs a write lock on a resource. Useful for debugging.


3.3 Multiple Clients


Since we need to deal with multiple clients, we need to run BumbleBase as a server-client application rather than just as a command-line application. Running ./bumble -p transaction should now start a server at port 8335. Then, run ./bumble_client -p 8335 to connect to the database and start running queries as normal! Using a tool like tmux might help you manage multiple terminals.


In our implementation, each client can have up to one transaction running at a time. To simulate multiple transactions, we need multiple clients; hence, now, our database supports multiple clients through ./bumble_client. To begin a transaction, we run transaction begin, and to end it, transaction commit. Commands issued without an active transaction will be treated as a transaction of one action (transaction begin, [action], transaction commit).


3.4 Stress Testing


Because it can be useful to clobber your database to detect deadlocks or unsafe behavior using a shotgun approach, we've provided a new executable named bumble_stress to help with this. We've also provided a set of sample workloads in the workloads/ directory to run with bumble_stress - poke through workloads/README.md to get a sense of what each workload is doing, and feel free to make your own workloads!


To stress test your database, build and run:


./bumble_stress -index=<btree|hash> -workload=<w.txt> -n=<n> -verify. 
	

The workload file should essentially mimic what would normally be piped through STDIN, separated by newlines. The numthreads argument will specify the number of threads that will run the workload - to be clear, we split the workload across n threads, not duplicate the workload for n threads, meaning each line will only be run once, but on different threads. The index flag determines which kind of index you'll be stress testing. Lastly, the verify flag runs a verification check at the end of the stress test to ensure that the data structure is still consistent after the run. No project flag is required.


Stress testing is an especially experimental feature in the course.

In [16]:
full_query_engine.retriever.similarity_top_k = 10
append_and_print(full_query_engine.query(query_question))
#print_data_sources(response_list[-1], sources=10)

To implement concurrent transactions for the concurrency assignment, it is crucial to ensure that your database supports fine-grained and resource-level locking. This involves implementing fine-grained locking on Hash Tables and B+Trees, as well as deadlock detection and avoidance mechanisms. Additionally, adhere to strict two-phase locking for transactions, acquiring locks as needed and holding them until changes are committed. Manage multiple clients by running BumbleBase as a server-client application, allowing each client to have up to one transaction running at a time. To simulate multiple transactions, multiple clients are needed, and transactions can be initiated with "transaction begin" and ended with "transaction commit" commands. Finally, stress testing using the provided executable "bumble_stress" and sample workloads can help detect deadlocks and ensure the consistency of your data structure after concurrent operations.

# Guardrails

In [19]:
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core import ChatPromptTemplate

qa_prompt_str = (
    "Context information is provided below. This context is meant to guide your understanding and not to be used for direct answers.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and based on your understanding, "
    "please explain how you would approach solving the question: {query_str}\n"
    "Your response should demonstrate your reasoning process and how the concepts apply, rather than providing a direct solution."
)
# Text QA Prompt
chat_text_qa_msgs = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content=(
            "Please provide a summary and explanation, not direct excerpts or solutions to problems."
        ),
    ),
    ChatMessage(role=MessageRole.USER, content=qa_prompt_str),
]
text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)

#Create a validator- a customized query engine
index = load_index(FINAL_MILVUS_COLLECION_NAME)
checker_engine= index.as_query_engine(text_qa_template=text_qa_template)


In [20]:
query_str= "How do I write the popself function for the go assignment"
# Perform the query
response = checker_engine.query(query_str)
pretty_print(response)

To write the `PopSelf()` function for the Go assignment, you need to understand the purpose of this function and how it should be implemented based on the provided context information.

1. **Purpose of `PopSelf()` Function**:
   - The `PopSelf()` function is used to remove a specific link from a list without the need to search through the entire list to find the link.
   - The key idea is to handle the linking of the previous and next links of the given link to effectively remove it from the list.

2. **Implementation Approach**:
   - When implementing `PopSelf()`, you should focus on updating the `prev` and `next` links of the given link to effectively remove it from the list.
   - You do not need to search through the list to find the link since the link is already provided.
   - Ensure that after calling `PopSelf()` on a link, the list maintains its integrity and remains in a valid state.

3. **Handling Errors**:
   - If you encounter errors like "invalid memory address or nil pointer dereference," it suggests that there might be an issue with how you are handling pointers or nil values in your implementation.
   - Make sure to handle cases where pointers might be nil, especially when calling functions on them.

4. **Guidance from Feedback**:
   - Based on the feedback provided, ensure that your implementation of `PopSelf()` focuses on linking the previous and next links of the given link correctly to remove it from the list.
   - Avoid calling functions on nil pointers, as this can lead to runtime errors.

5. **Testing and Debugging**:
   - Test your implementation thoroughly to ensure it works as expected in different scenarios.
   - If you encounter errors, debug by checking how you are handling pointers, linking operations, and nil values in your code.

By following these guidelines and understanding the purpose of the `PopSelf()` function, you can approach writing the function for the Go assignment effectively. Remember to focus on handling the linking operations correctly to remove the given link from the list without the need for additional searching.

# Code Generation

In [23]:
code_query = "How do I do the LeafNode split() function"
append_and_print(full_query_engine.query(code_query))
print_data_sources(response_list[-1], show_text=False)

To perform the LeafNode split() function, you can utilize the leftPN and rightPN attributes within the Split class to store the newly created nodes. To access the new node after using split() in insert(), you may need to retrieve the corresponding page using node.page.GetPager().GetPage(node.rightSiblingPN) and then convert it to a leaf node using pageToLeafNode(newPage). However, it's important to note that setting the parent of the new node can be done within the split() function itself, and updating the parent only requires the page number, which can be obtained from the Split operation.

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

In [27]:
code_only = load_existing_index_as_query_engine("code")

In [29]:
write_code_prompt_str = """
Based on the provided information about CS1270, please answer the question to the best of your ability.
---------------------
{query_str}
---------------------
"""

write_code_message = [
   ChatMessage(role=MessageRole.SYSTEM, content="Please provide an answer to the question to the best of your ability, using the additional information if relevant"),
   ChatMessage(role=MessageRole.USER, content=write_code_prompt_str),
]

final_template = ChatPromptTemplate(write_code_message)
code_only.update_prompts({'response_synthesizer:text_qa_template': final_template})

In [30]:
append_and_print(code_only.query("How do I do the LeafNode split() function"))

To provide guidance on how to implement the `split()` function for a `LeafNode` in the context of the given code snippet, you would need to understand the specific structure and requirements of the `LeafNode` class within the codebase.

In general, a `split()` function for a `LeafNode` in a B-tree or similar data structure involves redistributing keys and values between two new leaf nodes when the original leaf node becomes full during an insertion operation.

Here is a general outline of how you could implement a `split()` function for a `LeafNode` based on the provided code:

1. Determine the midpoint index of the keys in the current `LeafNode`.
2. Create a new `LeafNode` to store the keys and values that will be moved during the split.
3. Copy the keys and values from the current `LeafNode` to the new `LeafNode`, starting from the midpoint index.
4. Update the pointers of the neighboring leaf nodes to maintain the linked list structure.
5. Insert the new key and value into the appropriate leaf node (either the current or the new one).
6. Update the parent node with the new key if necessary.

Remember, the specific implementation details may vary based on the exact requirements and structure of the `LeafNode` class in the provided code. It's essential to refer to the codebase, documentation, or any relevant examples to ensure that your implementation aligns with the expected behavior.

In [37]:
from llama_index.core.schema import IndexNode
from llama_index.core import SummaryIndex

base_retreiver = load_index(FINAL_MILVUS_COLLECION_NAME).as_retriever(similarity_top_k=10)
code_retreiver = load_index("code").as_retriever(similarity_top_k=10)

base_obj = IndexNode(
    index_id="vector", obj=base_retreiver, text="Full information retriever"
)
code_obj = IndexNode(
    index_id="bm25", obj=code_retreiver, text="Code only retriever"
)

summary_index = SummaryIndex(objects=[base_obj, code_obj])
composite_engine = summary_index.as_query_engine()
composite_engine.update_prompts({'response_synthesizer:text_qa_template': final_template})

In [38]:
query_str= "Look at the LeafNode split() function. It is not fully implemented. Write code in go to implement it using the information you have. Replace the panic with your implementation of the function"
append_and_print(composite_engine.query(query_str))

To implement the `split()` function for a `LeafNode` in Go, you can follow this basic example:

```go
type LeafNode struct {
    keys  []int
    value []string
    next  *LeafNode
}

func (ln *LeafNode) split() (*LeafNode, int) {
    mid := len(ln.keys) / 2
    newKeys := make([]int, len(ln.keys)-mid)
    newValues := make([]string, len(ln.value)-mid)

    copy(newKeys, ln.keys[mid:])
    copy(newValues, ln.value[mid:])

    newLeaf := &LeafNode{
        keys:  newKeys,
        value: newValues,
        next:  ln.next,
    }

    ln.keys = ln.keys[:mid]
    ln.value = ln.value[:mid]
    ln.next = newLeaf

    return newLeaf, newKeys[0]
}
```

In [35]:
query_str= "Look at the LeafNode split() function. It is not fully implemented. Write code in go to implement it using the information you have. Replace the panic with your implementation of the function"
append_and_print(full_query_engine.query(query_str))

To implement the LeafNode split() function in Go, you can use the following code snippet:

```go
func (node *LeafNode) split() Split {
    // Implement the split logic here
    // You need to split the keys and values of the current node into two separate nodes
    // Ensure that the split maintains the order of keys

    // Example implementation:
    left := &LeafNode{keys: node.keys[:len(node.keys)/2], values: node.values[:len(node.values)/2]}
    right := &LeafNode{keys: node.keys[len(node.keys)/2:], values: node.values[len(node.values)/2:]}

    node.keys = []int64{}
    node.values = []int64{}

    return Split{left: left, right: right, key: right.keys[0]}
}
```

In [36]:
print_data_sources(response_list[-1], show_text=False)

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: clean-ed-data.json

Data retrieved from file: Assignment 4_ Indexing.txt

Data retrieved from file: clean-ed-data.json

Data retrieved from file: node.go