In [1]:
from google.colab import drive
import os

drive.mount('/content/drive')

base_dir = "/content/drive/MyDrive/RAG_Chatbot"
os.makedirs(base_dir, exist_ok=True)

datasets_dir = os.path.join(base_dir, "datasets")
outputs_dir = os.path.join(base_dir, "outputs")
os.makedirs(datasets_dir, exist_ok=True)
os.makedirs(outputs_dir, exist_ok=True)

print(f"Base Directory: {base_dir}")
print(f"Datasets Directory: {datasets_dir}")
print(f"Outputs Directory: {outputs_dir}")


Mounted at /content/drive
Base Directory: /content/drive/MyDrive/RAG_Chatbot
Datasets Directory: /content/drive/MyDrive/RAG_Chatbot/datasets
Outputs Directory: /content/drive/MyDrive/RAG_Chatbot/outputs


In [2]:
import pandas as pd

# Updated data
data = {
    "Question": [
        "Who is the owner of this project?",
        "What is this project about?",
        "What technology does this project use?",

    ],
    "Answer": [
        "The owner of this project is Himanshu Kumar.",
        "This project is about building a chatbot using Retrieval-Augmented Generation.",
        "This project uses Python, LangChain, and Google Colab.",

    ]
}

# To save a CSV file in Google Drive
df = pd.DataFrame(data)
dataset_path = os.path.join(datasets_dir, "knowledge_base.csv")
df.to_csv(dataset_path, index=False)
print(f"Dataset saved at {dataset_path}")


Dataset saved at /content/drive/MyDrive/RAG_Chatbot/datasets/knowledge_base.csv


In [3]:
!pip install langchain
!pip install langchain-community
!pip install openai faiss-cpu




Collecting langchain-community
  Downloading langchain_community-0.3.14-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.25.0-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB

In [4]:
import os

os.environ["OPENAI_API_KEY"] = ""


In [6]:
!pip install tiktoken


Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0


In [7]:
!pip install faiss-gpu


Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [8]:
!pip install langchain sentence-transformers faiss-cpu




In [13]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from langchain.vectorstores import FAISS
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.schema import Document

# Loading dataset
df = pd.read_csv(dataset_path)
texts = [f"Question: {row['Question']} Answer: {row['Answer']}" for _, row in df.iterrows()]

model = SentenceTransformer('all-MiniLM-L6-v2')

embedding_vectors = model.encode(texts)

# Converting embeddings to NumPy array
embedding_array = np.array(embedding_vectors, dtype='float32')

index = faiss.IndexFlatL2(embedding_array.shape[1])
index.add(embedding_array)

documents = {i: Document(page_content=text) for i, text in enumerate(texts)}

docstore = InMemoryDocstore(documents)

def embedding_function(text):
    return model.encode([text])[0]

# Creating FAISS vectorstore
vectorstore = FAISS(index=index, docstore=docstore, index_to_docstore_id=list(documents.keys()), embedding_function=embedding_function)

print("Vector store successfully created!")




Vector store successfully created!


In [14]:
from langchain.chains import RetrievalQA
from langchain.llms.fake import FakeListLLM

# Initializing the fake LLM
responses = [row["Answer"] for _, row in df.iterrows()]
fake_llm = FakeListLLM(responses=responses)

qa_chain = RetrievalQA.from_chain_type(llm=fake_llm, retriever=vectorstore.as_retriever())

# Loop for chatbot
while True:
    question = input("Ask a question (type 'exit' to quit): ")
    if question.lower() in ["exit", "quit"]:
        print("Goodbye!")
        break
    response = qa_chain.run(question)
    print(f"Answer: {response}")


Ask a question (type 'exit' to quit): Who is the owner of this project?
Answer: The owner of this project is Himanshu Kumar.
Ask a question (type 'exit' to quit): exit
Goodbye!


In [15]:
# defining directory for logs
log_dir = "/content/drive/MyDrive/RAG_Chatbot/logs"
os.makedirs(log_dir, exist_ok=True)  # Create the directory if it doesn't exist
log_path = os.path.join(log_dir, "chatbot_logs.txt")

# looping with loging
with open(log_path, "w") as log_file:
    while True:
        question = input("Ask a question (type 'exit' to quit): ")
        if question.lower() in ["exit", "quit"]:
            print("Goodbye!")
            break
        response = qa_chain.run(question)
        print(f"Answer: {response}")
        log_file.write(f"Question: {question}\nAnswer: {response}\n\n")

print(f"Chatbot logs saved at {log_path}")


Ask a question (type 'exit' to quit): Who is the owner of this project?
Answer: This project is about building a chatbot using Retrieval-Augmented Generation.
Ask a question (type 'exit' to quit): What technology does this project use?
Answer: This project uses Python, LangChain, and Google Colab.
Ask a question (type 'exit' to quit): exit
Goodbye!
Chatbot logs saved at /content/drive/MyDrive/RAG_Chatbot/logs/chatbot_logs.txt


In [21]:
from google.colab import files

# Files to download
files.download("/content/drive/MyDrive/RAG_Chatbot/datasets/knowledge_base.csv")
files.download("/content/drive/MyDrive/RAG_Chatbot/logs/chatbot_logs.txt")
files.download("/content/drive/MyDrive/Colab Notebooks/RAG_Chatbot.ipynb")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>