In [None]:
!pip install neo4j langchain-experimental
!pip install llama-index llama-index-llms-groq groq llama-index-embeddings-huggingface ipywidgets

In [None]:
from IPython.display import display
import ipywidgets as widgets
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, ServiceContext, load_index_from_storage
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.groq import Groq
import warnings
import os
from neo4j import GraphDatabase
import spacy

warnings.filterwarnings('ignore')

neo4j_uri = "ENTER URI"
neo4j_user = "neo4j"
neo4j_password = "ENTER INSTANCE PASSWORD"
driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))

In [None]:

os.environ["GROQ_API_KEY"] = "ENTER API KEY"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Graph Insights: {graph_insights}
Question: {question}

Answer the question and provide additional helpful information,
based on the pieces of information and graph insights, if applicable. Be succinct.

Responses should be properly formatted to be easily read.
"""

In [None]:
context = "This directory contains multiple documents providing examples and solutions for various programming tasks."
directory_path = "ENTER PATH"
reader = SimpleDirectoryReader(input_dir=directory_path)
documents = reader.load_data()
nlp = spacy.load("en_core_web_sm")

In [None]:
def populate_graph(documents, driver, nlp):
    with driver.session() as session:
        for doc in documents:
            doc_text = doc.text
            nlp_doc = nlp(doc_text)
            concepts = [ent.text for ent in nlp_doc.ents if ent.label_ == "ORG" or ent.label_ == "PRODUCT"] # Adjust entity types as needed

            for concept in concepts:
                session.run("MERGE (:Concept {name: $concept})", concept=concept)

            for i, concept in enumerate(concepts):
                if i + 1 < len(concepts):
                    next_concept = concepts[i + 1]
                    session.run(
                        """
                        MATCH (c1:Concept {name: $concept}), (c2:Concept {name: $next_concept})
                        MERGE (c1)-[:RELATED_TO]->(c2)
                        """,
                        concept=concept, next_concept=next_concept
                    )

populate_graph(documents, driver, nlp)

In [None]:
text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
nodes = text_splitter.get_nodes_from_documents(documents, show_progress=True)
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)
vector_index = VectorStoreIndex.from_documents(documents, show_progress=True, service_context=service_context, node_parser=nodes)
vector_index.storage_context.persist(persist_dir="./storage_mini")
storage_context = StorageContext.from_defaults(persist_dir="./storage_mini")
index = load_index_from_storage(storage_context, service_context=service_context)

In [None]:
input_box = widgets.Text(
    value='Explain Python?',
    placeholder='Type your question here',
    description='Question:',
    disabled=False
)

output_area = widgets.Output()


def get_graph_insights(question):
  with driver.session() as session:
    result = session.run(
         """
            MATCH (c:Concept)
            WHERE toLower(c.name) CONTAINS toLower($question)
            OPTIONAL MATCH (c)-[r:RELATED_TO]->(other:Concept)
            RETURN c.name AS concept, collect(other.name) AS related_concepts
            """,
         question=question
         )
    insights = []
    for record in result:
       insights.append(f"Concept: {record['concept']}, Related Concepts: {', '.join(record['related_concepts'])}")
       return "\n".join(insights) if insights else "No relevant graph insights found."


In [None]:
def on_button_click(b):
  with output_area:
    output_area.clear_output()
    question = input_box.value
    graph_insights = get_graph_insights(question)
    query_prompt = prompt_template.format(context=context, graph_insights=graph_insights, question=question)
    resp = query_engine.query(query_prompt)
    print(resp.response)

button = widgets.Button(
    description='Ask',
    disabled=False,
    button_style='',
    tooltip='Ask the question',
    icon='check'
)

button.on_click(on_button_click)

display(input_box, button, output_area)
query_engine = index.as_query_engine(service_context=service_context)