In [None]:
!pip install transformers datasets langchain sentence-transformers torch langchain-huggingface faiss-cpu gradio Flask pyngrok streamlit langchain-community flask_ngrok

In [None]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
import re
import threading
import torch  # Add this import statement

# Step 1: Load the text file using standard Python
file_path = "/kaggle/input/document/test.txt"
with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

# Step 2: Split the text into chunks using RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text_chunks = splitter.split_text(text)

# Step 3: Generate embeddings for each chunk using all-MiniLM-L6-v2 model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
embeddings = model.encode(text_chunks, convert_to_tensor=True, device=device)

# Step 4: Store the embeddings in a FAISS vector database
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # Using L2 distance
index.add(embeddings.cpu().numpy())

# Step 5: Function to retrieve similar documents based on a user query
def retrieve_similar_docs(query, index, model, text_chunks, top_k=3):
    # Generate embedding for the user query
    query_embedding = model.encode([query], convert_to_tensor=True, device=device)
    
    # Use FAISS to find the nearest neighbors
    _, indices = index.search(query_embedding.cpu().numpy(), top_k)
    
    # Retrieve and return the most similar document chunks
    similar_docs = [text_chunks[idx] for idx in indices[0]]
    return similar_docs

# Initialize the model and tokenizer for streaming
llm_model = AutoModelForCausalLM.from_pretrained("TroyDoesAI/Phi-3-Context-Obedient-RAG")
tokenizer = AutoTokenizer.from_pretrained("TroyDoesAI/Phi-3-Context-Obedient-RAG")

# Define the prompt template
prompt_template = """
Act like a professional chatbot for a training institute. Based on the provided documents, answer the following query: {query}

Documents:
{documents}

Response:
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["query", "documents"])

# Function to stream output
def stream_output(prompt_text):
    inputs = tokenizer(prompt_text, return_tensors="pt")
    streamer = TextIteratorStreamer(tokenizer, timeout=150.0, skip_prompt=True, skip_special_tokens=True)

    generation_kwargs = {
        "inputs": inputs.input_ids,
        "attention_mask": inputs.attention_mask,
        "max_new_tokens": 250,
        "top_k": 50,
        "temperature": 0.9,
        "do_sample": True
    }

    generation_thread = threading.Thread(target=llm_model.generate, kwargs=dict(generation_kwargs, streamer=streamer))
    generation_thread.start()

    for new_text in streamer:
        print(new_text, end="", flush=True)

# Loop to continuously prompt the user for queries
while True:
    user_query = input("Enter your query (or type 'exit' to stop): ")
    if user_query.lower() == 'exit':
        break

    similar_documents = retrieve_similar_docs(user_query, index, model, text_chunks, top_k=5)
    documents_text = "\n".join(similar_documents)
    
    prompt_text = prompt.format(query=user_query, documents=documents_text)
    
    stream_output(prompt_text)
    print("\n")  # Add a new line after the response is complete
