In [62]:
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM
from typing import Optional, List

In [63]:
 # Load TXT files separately with metadata
def load_txt_files_separately(directory_or_file):
    documents = []
    if os.path.isfile(directory_or_file) and directory_or_file.endswith(".txt"):
        with open(directory_or_file, "r", encoding="utf-8") as f:
            text = f.read()
            documents.append({"text": text, "metadata": {"source": directory_or_file}})
    elif os.path.isdir(directory_or_file):
        for filename in os.listdir(directory_or_file):
            if filename.endswith(".txt"):
                filepath = os.path.join(directory_or_file, filename)
                with open(filepath, "r", encoding="utf-8") as f:
                    text = f.read()
                    documents.append({"text": text, "metadata": {"source": filename}})
    return documents

In [64]:
# Load files
txt_documents = load_txt_files_separately("D:/RagStorySummary")

In [65]:
 # Chunk the text with metadata
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunked_docs = []
for doc in txt_documents:
    chunks = splitter.split_text(doc["text"])
    for chunk in chunks:
        chunked_docs.append({"text": chunk, "metadata": doc["metadata"]})

texts = [doc["text"] for doc in chunked_docs]
metadatas = [doc["metadata"] for doc in chunked_docs]

Created a chunk of size 689, which is longer than the specified 500
Created a chunk of size 990, which is longer than the specified 500
Created a chunk of size 758, which is longer than the specified 500
Created a chunk of size 762, which is longer than the specified 500
Created a chunk of size 1246, which is longer than the specified 500
Created a chunk of size 717, which is longer than the specified 500
Created a chunk of size 635, which is longer than the specified 500
Created a chunk of size 574, which is longer than the specified 500
Created a chunk of size 649, which is longer than the specified 500
Created a chunk of size 756, which is longer than the specified 500
Created a chunk of size 749, which is longer than the specified 500
Created a chunk of size 631, which is longer than the specified 500
Created a chunk of size 712, which is longer than the specified 500
Created a chunk of size 560, which is longer than the specified 500
Created a chunk of size 541, which is longer th

In [66]:
# Embed and store in FAISS with metadata
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(texts, embeddings, metadatas=metadatas)

In [72]:
class CustomLLM(LLM):
    api_key: str
    model_name: str = "mistralai/mixtral-8x7b-instruct"
    temperature: float = 0

    def __init__(self, api_key: str, model_name: str = "mistralai/mixtral-8x7b-instruct", temperature: float = 0):
        super().__init__(api_key=api_key, model_name=model_name, temperature=temperature)
        self.api_key = api_key
        self.model_name = model_name
        self.temperature = temperature

    # Your custom function to call the actual LLM (OpenAI API)
    def call_openai(self, prompt: str) -> str:
        url = "https://openrouter.ai/api/v1/chat/completions"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": self.model_name,
            "messages": [{"role": "user", "content": prompt}]
            # "temperature": self.temperature
            # "max_tokens": 500  # Adjust as needed
        }
        try:
            response = requests.post(url, data=json.dumps(payload), headers=headers)
            
            response.raise_for_status()
            return response.json()["choices"][0]["message"]["content"]
        except Exception as e:
            return f"Error calling OpenAI: {str(e)}"
     
    # Required method for LangChain LLM interface
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        # Call your custom function here
        result = self.call_openai(prompt)
        if stop:
            for s in stop:
                result = result.split(s)[0]
        return result

    # Required property for LangChain
    @property
    def _llm_type(self) -> str:
        return "custom_openai"       

In [73]:
# Set up OpenRouter
OPENAI_API_KEY = "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"  # Replace with your OpenAI key
llm = CustomLLM(api_key=OPENAI_API_KEY)

In [74]:
# Define custom prompt
prompt_template = """Question: {question}
Context: {context}
Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["question", "context"]
)

In [75]:
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={"k": 1}),
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True  # Return source files for reference
)

In [79]:
query = "Can you please summarize the short story based on math and science?"
response = rag_chain({"query": query})
print("Answer:", response["result"])
print("Sources:", [doc.metadata["source"] for doc in response["source_documents"]])

Answer:  The short story is about a man who comes to Chicago to reunite with his wife after twenty years. She is there to perform a "magic" show for a higher-up at the state university, suggesting that she has a career in a field related to math and science. The man feels like he has lost something, possibly a sense of wonder or optimism, as he observes the world around him. A woman from the university meets him and takes him to see his wife. The weather is dreary, reflecting the man's mood. The story seems to be about the intersection of math, science, and wonder, as well as the reunion of the long-separated couple.
Sources: ["The Hunter's Wife.txt"]
