### IMPORTS

In [207]:
import os
import json
import warnings
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter, RecursiveJsonSplitter
from langchain_community.document_loaders import TextLoader, JSONLoader
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

warnings.filterwarnings("ignore")

# from fastapi import FastAPI
# from pydantic import BaseModel

# from langchain.schema import Document
# from langchain.prompts import PromptTemplate
# from langchain.memory import ConversationBufferMemory


In [208]:
# with open('data/data.json', 'r') as file:
#     workshops_data = json.load(file)

# workshop_documents = []

# for workshop in workshops_data:
#     content = json.dumps(workshop, indent=2)
    
#     doc = Document(
#         page_content=content,
#         metadata={
#             "title": workshop["title"],
#             "duration": workshop["duration"],
#             "max_participants": workshop["max_participants"]
#         }
#     )
#     workshop_documents.append(doc)

# workshop_documents

In [209]:
# Load environment variables
load_dotenv()
open_api_key = os.getenv("OPENAI_API_KEY")
if not open_api_key:
    raise ValueError("OPENAI_API_KEY is not set")

### DATA 

In [210]:
loader = TextLoader("data/data.txt", encoding="utf-8")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=50,
    length_function=len
)

docs = text_splitter.split_documents(documents)
docs

[Document(metadata={'source': 'data/data.txt'}, page_content='Azercell is the only company in Azerbaijan and CIS region which has been awarded Platinum'),
 Document(metadata={'source': 'data/data.txt'}, page_content='and CIS region which has been awarded Platinum Certificate of International “Investors in People”'),
 Document(metadata={'source': 'data/data.txt'}, page_content='of International “Investors in People” Standard. The mobile operator is the only company in the'),
 Document(metadata={'source': 'data/data.txt'}, page_content='The mobile operator is the only company in the country to receive Gold Award in nominations of'),
 Document(metadata={'source': 'data/data.txt'}, page_content='country to receive Gold Award in nominations of "Company of the Year" and “The Most Innovative'),
 Document(metadata={'source': 'data/data.txt'}, page_content='of "Company of the Year" and “The Most Innovative Company of the Year” from the International'),
 Document(metadata={'source': 'data/data.t

### MODEL AND VECTOR DB 

In [211]:
# completion_model = OpenAI(
#     model="gpt-3.5-turbo-instruct",
#     temperature=0
# ) 

chat_model = ChatOpenAI(
    model="gpt-4o",
    temperature=0
)

memory = ConversationBufferMemory(
    memory_key="history",
    return_messages=True
)

embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002")
vector_store = FAISS.from_documents(docs, embedding_model)

### CUSTOM PROMPT 

In [212]:
custom_prompt = PromptTemplate(
    input_variables=["history", "context", "question"],
    template="""
    You are a helpful AI assistant. 
    You are a helpful AI assistant who remembers past conversations and provides answers using the given knowledge.
    
    Conversation history: 
    {history}
    
    Relevant knowledge:
    {context}
    
    Human: 
    {question}
    
    AI Assistant:
    """
)

In [213]:
# query1 = "what is the duration of the AI Chatbot Workshop?"
# query1_answer = vector_store.similarity_search(query1)
# query1_answer
# query1_answer[0].page_content

### RETREIVER

In [214]:
retreiver = vector_store.as_retriever()

### QA CHAIN

In [215]:
# qa_chain = RetrievalQA.from_chain_type(
#     llm=chat_model,
#     chain_type="stuff", 
#     retriever=retreiver,
#     chain_type_kwargs={
#         "verbose": True,
#         "prompt": custom_prompt
#     }
# )

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=chat_model,
    retriever=retreiver,
    verbose=True,
    combine_docs_chain_kwargs={
        "prompt": custom_prompt
    },
    memory=memory,
    # memory=memory,
)

### RESULTS

In [223]:
# retreiver_query = {"query": "What awards has Azercell received?"}
# results = qa_chain.invoke(
#     retreiver_query
# )

def chat_with_bot():
    print("Chatbot is ready! Type 'exit' to stop.")
    
    chat_history = []
    while True:
        query = input("\nYou: ")
        if query.lower() == "exit":
            print("Goodbye!")
            break
        response = qa_chain.invoke(
            {
                "input": query,
                "chat_history": chat_history
            }
        )
        print("\nAI:", response["answer"])
        
if __name__ == "__main__":
    chat_with_bot()

Chatbot is ready! Type 'exit' to stop.


ValueError: Missing some input keys: {'question'}

In [217]:
results

{'query': 'What awards has Azercell received?',
 'result': 'Azercell has received the Platinum Certificate of International "Investors in People" and the Gold Award in nominations, as well as the "Company of the Year" award from the International Business Award STEVIE.'}