# Jupyter Notebook if parts of the App for testing

In [4]:
# LOADING
import os
import json
import openai
from llama_index import SimpleDirectoryReader

with open("../CONFIG_LIST.json", "r") as file:
    config = json.load(file)

os.environ['OPENAI_API_KEY'] = config["openai_api_key"]
model = config["model"]

openai.organization = config["openai_organization"]
openai.api_key = os.getenv("OPENAI_API_KEY")

documents = SimpleDirectoryReader('../documentS').load_data() # reads whole drectory.


# Using OpenAI API directly

In [5]:
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from llama_index import GPTVectorStoreIndex, StorageContext, load_index_from_storage
from langchain.chat_models import ChatOpenAI


def indexer(documents, model ):
    
    if os.path.exists('../storage'):
        print('index exist')
        
        # rebuild storage context
        storage_context = StorageContext.from_defaults(persist_dir="../storage")
        # load index
        index = load_index_from_storage(storage_context)

    
    else:
        print('not exist, creating index ... ')

        documents = documents
        
        llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name=model))

        max_input_size = 4096
        num_output = 256
        max_chunk_overlap = 0.1
        chunk_size_limit = 600
        prompt_helper = PromptHelper(max_input_size, num_output,max_chunk_overlap,chunk_size_limit=chunk_size_limit)
        service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)

        # index content in the folder documents
        index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) 
        # Save your index to a directory called storage
        index.storage_context.persist(persist_dir="../storage")
    
    return(index)

index = indexer(documents=documents, model=model)


index exist


In [6]:

prompt = "Can you write a summary of the book 'Exploring the Depths of Human Existence' "

query_engine = index.as_query_engine()
response = query_engine.query(prompt)

print(response)

The book 'Exploring the Depths of Human Existence' is a project that has taken over 12 years to complete. It is a collection of conversations on identity, society, and progress, written by Dr. Carlos Kuhn. The author acknowledges that we do not have all the answers and encourages curiosity and the pursuit of knowledge. The book aims to captivate readers' interest, spark excitement, and inspire thoughtful contemplation. It is a culmination of insights and data, crafted and refined over time, and is intended to serve as a catalyst for intellectual exploration.


In [7]:
from langchain.agents import Tool
from langchain.agents import initialize_agent
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

# Define tools
tools = [
    Tool(
       name = "LlamaIndex",
        func=lambda q: str(index.as_query_engine().query(q)),
        description="useful for when you want to answer questions about the author. The input to this tool should be a complete english sentence.",
        return_direct=True
    ),
]
#Initialize conversational memory
conversational_memory = ConversationBufferWindowMemory( memory_key='chat_history', k=5, return_messages=True )
# Initialize agent with conversational memory
agent_executor = initialize_agent(tools, llm=ChatOpenAI(temperature=0.7, model_name=model), agent="conversational-react-description", memory=conversational_memory)

# if I use my index
while True:
    prompt = input("type prompt")
    if prompt == 'thanks':
        print(f' ---------- Agent: chat is closed -------')
        break
    else:
        response = agent_executor.run(input=prompt)
        print(f'me : {prompt}')
        print(f'Agent : {response}')


me : What is the autor name for the book in the index?
Agent : The author name for the book in the index is Dr Carlos Kuhn.
 ---------- Agent: chat is closed -------


## To Do to improve the code

In [49]:
# Todo: Need to modify, to generate new index if document is added to the folder documents

import os
import hashlib

def get_directory_hash(directory):
    file_hash_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            with open(file_path, 'rb') as f:
                file_hash = hashlib.md5(f.read()).hexdigest()
                file_hash_list.append((file_path, file_hash))
    return file_hash_list

def has_directory_changed(previous_state, current_state):
    return previous_state != current_state

if __name__ == "__main__":
    directory_path = "documents"

    # Record the current state
    current_state = get_directory_hash(directory_path)

    # Compare with the previous state (you might want to load the previous state from a file)
    if has_directory_changed(previous_state, current_state):
        print("Directory has changed!")
        # Update the previous state with the current state for the next check
        previous_state = current_state
    else:
        print("Directory has not changed.")


Directory has not changed.


In [46]:
previous_state = get_directory_hash("documents")