# Custom Knowledge Chatbot w/ LlamaIndex
By Liam Ottley

YouTube: https://www.youtube.com/@LiamOttley

Github: https://github.com/wombyz/custom-knowledge-chatbot/blob/main/custom-knowledge-chatbot/Custom%20Knowledge%20Chatbot.ipynb

LlamaIndex: https://gpt-index.readthedocs.io/en/latest/index.html

## Customization
Using the example above with fixes from LlamaIndex documentation, this notebook will index the documents in the data directory and allow you to query the index through a chatbot interface

## Installation
*   Create data directory
*   Upload documents to data 




In [1]:
!pip install llama_index
!pip install langchain




In [2]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.environ.get("APIKEY")

In [None]:
# Load you data into 'Documents' a custom type by LlamaIndex
from llama_index import GPTSimpleVectorIndex, download_loader, SimpleDirectoryReader

documents = SimpleDirectoryReader('data').load_data()
index = GPTSimpleVectorIndex.from_documents(documents)


In [None]:
response = index.query("What are some of the sparks of AGI that large language models are exhibiting?")
print(response)

In [None]:
response = index.query("What does memory augmented mean?")
print(response)

In [3]:
# Setup your LLM

from llama_index import (
    GPTKeywordTableIndex,
    SimpleDirectoryReader,
    LLMPredictor,
    ServiceContext
)
from langchain import OpenAI

documents = SimpleDirectoryReader('data').load_data()

# define LLM
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-002"))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

# build index
index = GPTKeywordTableIndex.from_documents(documents, service_context=service_context)

  from .autonotebook import tqdm as notebook_tqdm
Token indices sequence length is longer than the specified maximum sequence length for this model (3586 > 1024). Running this sequence through the model will result in indexing errors


KeyboardInterrupt: 

In [None]:
response = index.query("What are some of the sparks of AGI that large language models are exhibiting?")
print(response)

In [None]:
response = index.query("What does memory augmented mean?")
print(response)

In [None]:
index.save_to_disk('./index.json')

In [None]:
response = index.query("Summarize the Sparks of AGI paper")
print(response)

In [None]:

import json

class Chatbot:
    def __init__(self, api_key, index):
        self.index = index
        openai.api_key = api_key
        self.chat_history = []

    def generate_response(self, user_input):
        prompt = "\n".join([f"{message['role']}: {message['content']}" for message in self.chat_history[-5:]])
        prompt += f"\nUser: {user_input}"
        response = index.query(user_input)

        message = {"role": "assistant", "content": response.response}
        self.chat_history.append({"role": "user", "content": user_input})
        self.chat_history.append(message)
        return message
    
    def load_chat_history(self, filename):
        try:
            with open(filename, 'r') as f:
                self.chat_history = json.load(f)
        except FileNotFoundError:
            pass

    def save_chat_history(self, filename):
        with open(filename, 'w') as f:
            json.dump(self.chat_history, f)

In [None]:
# Swap out your index below for whatever knowledge base you want
bot = Chatbot("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", index=index)
bot.load_chat_history("chat_history.json")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "goodbye"]:
        print("Bot: Goodbye!")
        bot.save_chat_history("chat_history.json")
        break
    response = bot.generate_response(user_input)
    print(f"Bot: {response['content']}")