In [1]:
import os

In [2]:
from langchain.text_splitter import CharacterTextSplitter

In [3]:
from langchain_community.document_loaders import TextLoader

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from pinecone import Pinecone, ServerlessSpec

In [6]:
from langchain_pinecone import PineconeVectorStore

In [7]:
from dotenv import load_dotenv

In [8]:
from pinecone import Pinecone, ServerlessSpec

In [None]:
pc = Pinecone(api_key="")

In [10]:
current_dir = os.getcwd()

In [11]:
books_dir = os.path.join(current_dir, "documents")

In [12]:
print("Current Directory:", current_dir)
print("Books Directory:", books_dir)

Current Directory: c:\Users\Lenovo\OneDrive\Desktop\New_Allo_Tower
Books Directory: c:\Users\Lenovo\OneDrive\Desktop\New_Allo_Tower\documents


In [13]:
book_files = [f for f in os.listdir(books_dir) if f.endswith(".txt")]

In [14]:
print(book_files)

["Alice's_Adventures_in_Wonderland.txt", 'Dracula.txt', 'Frankenstein.txt', 'lord_of_the_rings.txt']


In [15]:
documents = []
for book_file in book_files:
    file_path = os.path.join(books_dir, book_file)
    loader = TextLoader(file_path)
    book_docs = loader.load()
    for doc in book_docs:
        # Add metadata to each document indicating its source
        doc.metadata = {"source": book_file}
        documents.append(doc)

In [17]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

Created a chunk of size 1184, which is longer than the specified 1000
Created a chunk of size 1045, which is longer than the specified 1000
Created a chunk of size 1132, which is longer than the specified 1000
Created a chunk of size 1674, which is longer than the specified 1000
Created a chunk of size 1610, which is longer than the specified 1000
Created a chunk of size 1562, which is longer than the specified 1000
Created a chunk of size 1063, which is longer than the specified 1000
Created a chunk of size 1543, which is longer than the specified 1000
Created a chunk of size 2597, which is longer than the specified 1000
Created a chunk of size 2613, which is longer than the specified 1000
Created a chunk of size 1079, which is longer than the specified 1000
Created a chunk of size 1251, which is longer than the specified 1000
Created a chunk of size 1534, which is longer than the specified 1000
Created a chunk of size 1323, which is longer than the specified 1000
Created a chunk of s

In [18]:
print("\n--- Document Chunks Information ---")
print(f"Number of document chunks: {len(docs)}")


--- Document Chunks Information ---
Number of document chunks: 1724


In [19]:
from langchain_huggingface import HuggingFaceEmbeddings

In [20]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [21]:
index_name = "test"

# Check if the index already exists
existing_indexes = [index["name"] for index in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )
    print(f"Index '{index_name}' created successfully.")
else:
    print(f"Index '{index_name}' already exists. Skipping creation.")


Index 'test' created successfully.


In [22]:
vector_store = PineconeVectorStore.from_documents(
    documents=docs,
    embedding=embeddings,
    index_name=index_name
)

In [23]:
from google.cloud import firestore
from langchain_google_firestore import FirestoreChatMessageHistory

In [None]:
import firebase_admin
from firebase_admin import credentials,db
cred=credentials.Certificate(r"")
'''firebase_admin.initialize_app(cred,{
    'databaseURL':r''
})'''
ref=db.reference('/')
chat_history = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "human", "content": ""},
    {"role": "ai", "content": "I'm ready to assist you. How can I help?"},
    {"role": "human", "content": "What is the summary of Alice's Adventures in Wonderland"},
    {"role": "ai", "content": "Alice's Adventures in Wonderland tells the story of a young girl named Alice who falls down a rabbit hole..."},
    {"role": "human", "content": ""},
    {"role": "ai", "content": "Is there anything else I can help you with?"}
]

# Insert chat history into Firebase
ref.set({"messages": chat_history})

print("Chat history has been written to Firebase Realtime Database")
ref.set(chat_history)
print("Data has been written to Firebase Realtime Database")


Chat history has been written to Firebase Realtime Database
Data has been written to Firebase Realtime Database


In [52]:
query = "Where is Dracula's castle located?"

In [39]:
index_name="test"
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieved_docs = retriever.invoke(query)

In [40]:
print(retrieved_docs)

[Document(id='d2278769-405e-40e9-b9ac-84cc4952a9e9', metadata={'source': 'Dracula.txt'}, page_content='Having had some time at my disposal when in London, I had visited the\nBritish Museum, and made search among the books and maps in the library\nregarding Transylvania; it had struck me that some foreknowledge of the\ncountry could hardly fail to have some importance in dealing with a\nnobleman of that country. I find that the district he named is in the\nextreme east of the country, just on the borders of three states,\nTransylvania, Moldavia and Bukovina, in the midst of the Carpathian\nmountains; one of the wildest and least known portions of Europe. I was\nnot able to light on any map or work giving the exact locality of the\nCastle Dracula, as there are no maps of this country as yet to compare\nwith our own Ordnance Survey maps; but I found that Bistritz, the post\ntown named by Count Dracula, is a fairly well-known place. I shall enter\nhere some of my notes, as they may refresh

In [41]:
print("\n--- Retrieved Documents ---")
for doc in retrieved_docs:
    print(f"Source: {doc.metadata['source']}")
    print(f"Content: {doc.page_content}")
    print("-" * 40)


--- Retrieved Documents ---
Source: Dracula.txt
Content: Having had some time at my disposal when in London, I had visited the
British Museum, and made search among the books and maps in the library
regarding Transylvania; it had struck me that some foreknowledge of the
country could hardly fail to have some importance in dealing with a
nobleman of that country. I find that the district he named is in the
extreme east of the country, just on the borders of three states,
Transylvania, Moldavia and Bukovina, in the midst of the Carpathian
mountains; one of the wildest and least known portions of Europe. I was
not able to light on any map or work giving the exact locality of the
Castle Dracula, as there are no maps of this country as yet to compare
with our own Ordnance Survey maps; but I found that Bistritz, the post
town named by Count Dracula, is a fairly well-known place. I shall enter
here some of my notes, as they may refresh my memory when I talk over my
travels with Mina.
-------

In [28]:
import time

In [29]:
import firebase_admin

In [30]:
from firebase_admin import credentials, db

In [31]:
from langchain.schema import HumanMessage, AIMessage

In [32]:
from langchain_core.messages import SystemMessage

In [None]:
GOOGLE_API_KEY = ""

In [42]:
model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,          # For deterministic responses
    max_output_tokens=4048, # Limits response length
    timeout=120,            # 2 minute timeout
    max_retries=2,
    google_api_key=GOOGLE_API_KEY
)

In [53]:
ref = db.reference('/')

In [54]:
stored_chat_history = ref.get() or []

In [55]:
print(stored_chat_history)

[{'content': 'You are a helpful assistant', 'role': 'system'}, {'content': '', 'role': 'human'}, {'content': "I'm ready to assist you. How can I help?", 'role': 'ai'}, {'content': "What is the summary of Alice's Adventures in Wonderland", 'role': 'human'}, {'content': "Alice's Adventures in Wonderland tells the story of a young girl named Alice who falls down a rabbit hole...", 'role': 'ai'}, {'content': '', 'role': 'human'}, {'content': 'Is there anything else I can help you with?', 'role': 'ai'}]


In [60]:
from typing import List, Dict, Any
from langchain.schema import HumanMessage, AIMessage, SystemMessage

def deserialize_chat_history(stored_chat_history: List[Dict[str, Any]]) -> List[Any]:
    deserialized_messages = []
    
    for message in stored_chat_history:
        role = message.get("role")  # Safely get role
        content = message.get("content", "")  # Ensure content is not empty
        
        if content:  # Only process messages with content
            if role == "human":
                deserialized_messages.append(HumanMessage(content=content))
            elif role == "ai":
                deserialized_messages.append(AIMessage(content=content))
            elif role == "system":
                deserialized_messages.append(SystemMessage(content=content))
            else:
                print(f"Warning: Unrecognized role '{role}' in message, skipping.")
    
    return deserialized_messages


In [61]:
chat_history = deserialize_chat_history(stored_chat_history)

In [62]:
chat_history.insert(0, SystemMessage(content="You are a helpful assistant"))

In [63]:
while True:
    query = input("You: ").strip()
    if not query:
        print("Please enter a valid message.")
        continue
    if query.lower() == "exit":
        print("Chat session ended.")
        break

    # Retrieve relevant documents
    retrieved_docs = retriever.invoke(query)
    retrieved_texts = "\n".join([doc.page_content for doc in retrieved_docs])  # Extract text from documents

    # Combine chat history, query, and retrieval results
    chat_history.append(HumanMessage(content=query))
    chat_history.append(SystemMessage(content=f"Retrieved Context:\n{retrieved_texts}"))

    # Generate response
    result = model.invoke(chat_history)

    if result.content.strip():
        print("Assistant:", result.content)
        chat_history.append(AIMessage(content=result.content))

Assistant: Hello there! How can I help you today?
Assistant: Initially, Jonathan Harker is impressed by the grandeur and imposing nature of Dracula's castle, albeit with a sense of unease. He describes it with a mixture of awe and apprehension, noting its size, age, and the picturesque, albeit wild, Transylvanian landscape surrounding it. He finds the Count to be a courteous and intriguing host, though somewhat eccentric.

However, as time progresses, Harker's perception of the castle shifts dramatically. He begins to notice unsettling details: the lack of servants, the Count's nocturnal habits, the locked doors, and the increasingly oppressive atmosphere.  His initial awe transforms into suspicion, then fear, and finally, outright terror as he realizes he is a prisoner and Dracula is a vampire. The castle, once a place of fascination, becomes a symbol of his confinement and the Count's sinister nature.  He starts to see it not as a grand residence, but as a prison, a place of dark sec

In [64]:
def serialize_chat_history(chat_history):
    serialized_messages = []
    for message in chat_history:
        if isinstance(message, HumanMessage):
            serialized_messages.append({"role": "human", "content": message.content})
        elif isinstance(message, AIMessage):
            serialized_messages.append({"role": "ai", "content": message.content})
    return serialized_messages

In [65]:
ref.set(serialize_chat_history(chat_history))
print("Data saved successfully!")

Data saved successfully!
