In [None]:
# Install dependencies
!pip install neo4j langchain-experimental spacy==3.5.2
!python -m spacy download en_core_web_sm


In [None]:
# Import necessary libraries
from IPython.display import display
import ipywidgets as widgets
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    load_index_from_storage,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.node_parser import SentenceSplitter
from llama_index.llms.groq import Groq
import warnings
import os
import csv
from neo4j import GraphDatabase
import spacy


In [None]:
# Ignore warnings
warnings.filterwarnings('ignore')

# -------------------------------------------
# 1. SET UP NEO4J CONNECTION
# -------------------------------------------
neo4j_uri = "ENTER URI"
neo4j_user = "neo4j"
neo4j_password = "ENTER PASSWORD"
driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))


In [None]:
# -------------------------------------------
# 2. ENVIRONMENT VARIABLES
# -------------------------------------------
os.environ["GROQ_API_KEY"] = "ENTER YOUR GROQ API KEY"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")


In [None]:
# -------------------------------------------
# 3. PROMPT TEMPLATE
# -------------------------------------------
prompt_template = """
Use the following pieces of information to answer the user's question. 
If you don't know the answer, just say that you don't know. 
Context: {context}
Graph Insights: {graph_insights}
Question: {question}

Answer the question and provide additional helpful information, based on the pieces of information and graph insights, if applicable. Be succinct.
"""

# Example context
context = "This directory contains multiple documents providing examples and solutions for various programming tasks."


In [None]:
nlp = spacy.load("en_core_web_sm")


def populate_graph(documents, driver, nlp):
    """
    Extract entities (e.g., ORG, PRODUCT) from each document and populate the Neo4j graph.
    """
    with driver.session() as session:
        for doc in documents:
            doc_text = doc.text
            nlp_doc = nlp(doc_text)
            concepts = [ent.text for ent in nlp_doc.ents if ent.label_ in ("ORG", "PRODUCT")]
            for concept in concepts:
                session.run("MERGE (:Concept {name: $concept})", concept=concept)
            for i, concept in enumerate(concepts):
                if i + 1 < len(concepts):
                    next_concept = concepts[i + 1]
                    session.run(
                        """
                        MATCH (c1:Concept {name: $concept}), (c2:Concept {name: $next_concept})
                        MERGE (c1)-[:RELATED_TO]->(c2)
                        """,
                        concept=concept, next_concept=next_concept
                    )

populate_graph(documents, driver, nlp)

In [None]:
# -------------------------------------------
# 6. SPLIT DOCUMENTS INTO NODES
# -------------------------------------------
text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
nodes = text_splitter.get_nodes_from_documents(documents, show_progress=True)


In [None]:

# -------------------------------------------
# 7. SET UP EMBEDDINGS & LLM
# -------------------------------------------
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)


In [None]:
# -------------------------------------------
# 8. BUILD AND PERSIST VECTOR INDEX
# -------------------------------------------
vector_index = VectorStoreIndex.from_documents(
    documents, show_progress=True, service_context=service_context, node_parser=nodes
)
vector_index.storage_context.persist(persist_dir="./storage_mini")

# -------

In [None]:
storage_context = StorageContext.from_defaults(persist_dir="./storage_mini")
index = load_index_from_storage(storage_context, service_context=service_context)


In [None]:
query_engine = index.as_query_engine(service_context=service_context)


In [None]:
# -------------------------------------------
# 11. QUERY ENHANCEMENT WITH NEO4J
# -------------------------------------------
def get_graph_insights(question):
    """
    Fetch related concepts from Neo4j that match user query keywords.
    """
    with driver.session() as session:
        result = session.run(
            """
            MATCH (c:Concept)
            WHERE toLower(c.name) CONTAINS toLower($question)
            OPTIONAL MATCH (c)-[r:RELATED_TO]->(other:Concept)
            RETURN c.name AS concept, collect(other.name) AS related_concepts
            """,
            question=question
        )
        insights = [
            f"Concept: {record['concept']}, Related Concepts: {', '.join(record['related_concepts'])}"
            for record in result
        ]
        return "\n".join(insights) if insights else "No relevant graph insights found."


In [None]:
# -------------------------------------------
# 12. SET UP FEEDBACK DATA STRUCTURES
# -------------------------------------------
last_question = None
last_response = None

feedback_csv_path = "feedback_log.csv"
if not os.path.isfile(feedback_csv_path):
    with open(feedback_csv_path, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["question", "response", "rating"])


In [None]:
# -------------------------------------------
# 13. BUILD THE UI WIDGETS
# -------------------------------------------
input_box = widgets.Text(
    value='Explain Python?',
    placeholder='Type your question here',
    description='Question:',
    disabled=False
)
output_area = widgets.Output()
feedback_output = widgets.Output()

rating_slider = widgets.IntSlider(
    value=3, min=1, max=5, step=1, description='Rating:', style={'description_width': 'initial'}
)
feedback_button = widgets.Button(
    description='Submit Feedback', disabled=False, button_style='', tooltip='Submit your feedback rating', icon='thumbs-up'
)

In [None]:
# -------------------------------------------
# 14. CALLBACK FUNCTIONS
# -------------------------------------------
def on_button_click(_):
    global last_question, last_response
    with output_area:
        output_area.clear_output()
        question = input_box.value
        graph_insights = get_graph_insights(question)
        query_prompt = prompt_template.format(context=context, graph_insights=graph_insights, question=question)
        resp = query_engine.query(query_prompt)
        print(resp.response)
        last_question = question
        last_response = resp.response

def on_feedback_click(_):
    global last_question, last_response
    with feedback_output:
        feedback_output.clear_output()
        if not last_question or not last_response:
            print("No recent question/response to rate. Please ask a question first.")
            return
        rating_value = rating_slider.value
        with open(feedback_csv_path, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([last_question, last_response, rating_value])
        print(f"Feedback recorded for question: '{last_question}' with rating: {rating_value}")

ask_button = widgets.Button(
    description='Ask', disabled=False, button_style='', tooltip='Ask the question', icon='check'
)
ask_button.on_click(on_button_click)
feedback_button.on_click(on_feedback_click)


In [None]:

# -------------------------------------------
# 15. DISPLAY THE UI
# -------------------------------------------
display(input_box, ask_button, output_area)
display(rating_slider, feedback_button, feedback_output)