### Setup of OpenAI API & Model

In [None]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [None]:
import datetime
current_date = datetime.datetime.now().date()
if current_date < datetime.date(2023, 9, 2):
    llm_name = "gpt-3.5-turbo-0301"
else:
    llm_name = "gpt-3.5-turbo"
print(llm_name)

### Data Loading and Data Splitting

In [None]:
from langchain.document_loaders import PyPDFLoader


folder_path = "dataset_docs/green_energy_pdfs/"

# Load PDF
loaders = []
# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".pdf"):
        pdf_path = os.path.join(folder_path, filename)
        loader = PyPDFLoader(pdf_path)
        loaders.append(loader)

docs = []
for loader in loaders:
    docs.extend(loader.load())

print(f"Total no. of PDF files are : {len(loaders)}. \nTotal no. of pages of all PDF files are : {len(docs)}.")

In [None]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 512,
    chunk_overlap = 24
)
splits = text_splitter.split_documents(docs)
print(f"Total no. of Chunks created after splitting are : {len(splits)}.")

### Creating Embeddings for Data Chunks and Performing VectorStores

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = 'dataset_docs/chroma/'

In [None]:
# !rm -rf ./dataset_docs/chroma  # remove old database files if any

In [None]:
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)
print(f"Total no. of Collections stored in Chroma VectorDB are : {vectordb._collection.count()}.")

### Retrival QA Chain creation

#### Create conversation chain that uses our vectordb as retriver, this also allows for chat history management

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

llm = ChatOpenAI(model_name=llm_name, temperature=0)
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)

### Connecting all the RAG components together and creating a ChatBot Interface

In [None]:
# Import necessary libraries
from IPython.display import display, HTML

# Initialize chat history
chat_history = []

# Function to simulate chatbot interaction
def chat():
    print("Welcome to AI chatbot! Type 'exit' to stop.")
    while True:
        query = input("Please type here: ")

        if query.lower() == 'exit':
            print("Thank you for using the AI chatbot!")
            break

        # Simulate the chatbot's response
        result = qa({"question": query, "chat_history": chat_history})
        chat_history.append((query, result['answer']))

        # Display the conversation
        display(HTML(f'<b><font color="blue">Human_User:</font></b> {query}'))
        display(HTML(f'<b><font color="green">AI_Chatbot:</font></b> {result["answer"]}'))

# Run the chat function
chat()