In [None]:
from langchain_community.vectorstores.pinecone import Pinecone
import pinecone
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from typing import List
from stock_assistant.config.settings import OPENAI_API_KEY, PINECONE_API_KEY
import os
from langchain_core.messages import SystemMessage, AIMessage

load_dotenv()
# print(os.getenv("OPENAI_API_KEY"))

pinecone.Pinecone(api_key=PINECONE_API_KEY)
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=OPENAI_API_KEY
)
vector_store = Pinecone.from_existing_index("stockrag", embeddings)

llm = ChatOpenAI(
    temperature=0.7,
    model="gpt-3.5-turbo",
    api_key=OPENAI_API_KEY
)


In [None]:
from typing import List
class EnhancedRAGSystem:
    def __init__(self, vector_store, llm, prompt):
        self.vector_store = vector_store
        # Store the prompt template separately
        self.prompt_template = prompt
        # Create the chain
        self.chain = prompt | llm

    def get_relevant_docs(self, question: str, k: int = 3) -> List:
        """Get relevant documents from vector store"""
        if not isinstance(question, str):
            question = str(question)
        # Fix: This line was incorrectly indented in your code
        results = self.vector_store.similarity_search(
            query=question,
            k=k
        )
        return results

    def generate_response(self, question: str, context_docs: List) -> str:
        """Generate a response using the LLM chain"""
        # Combine context from all relevant documents
        context = "\n\n".join([doc.page_content for doc in context_docs])

        input_dict = {
            "context": context,
            "question": question
        }

        # Generate response
        response = self.chain.invoke(input_dict)

        return response.content if hasattr(response, 'content') else str(response)

    def process_query(self, question: str, k: int = 3) -> dict:
        """Process a query end-to-end"""
        try:
            # Get relevant documents
            relevant_docs = self.get_relevant_docs(question, k)

            # Generate response
            response = self.generate_response(question, relevant_docs)

            # Prepare source information
            sources = [
                {
                    "page": doc.metadata.get("page", "Unknown"),
                    "source": doc.metadata.get("source", "Unknown")
                }
                for doc in relevant_docs
            ]

            return {
                "response": response,
                "sources": sources,
                "success": True
            }

        except Exception as e:
            return {
                "response": f"Error processing query: {str(e)}",
                "sources": [],
                "success": False
            }


In [None]:
RESPONSE_TEMPLATE = """
You are a helpful financial advisor assistant. Using the provided context, answer the user's question in a clear, concise, and informative way.
If the information in the context is not sufficient, say so.

Context from documents:
{context}

User Question: {question}

Please provide a well-structured response that:
1. Directly answers the question
2. Includes relevant examples or explanations where appropriate
3. Highlights any important caveats or considerations

Response:
"""

In [None]:
response_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=RESPONSE_TEMPLATE
)

In [None]:
rag_system = EnhancedRAGSystem(vector_store, llm, response_prompt)

In [None]:
result = rag_system.process_query(str("what is an initial public offer"))
relevant_docs = rag_system.get_relevant_docs(str("what are market orders and limit order in tradin"))
content = result["response"]
additional_kwargs = {"sources": result["sources"]}
print(content)
print(additional_kwargs)
print(relevant_docs)

In [None]:
questions = [
    "what are the dos for investing in mutual fund schemes",
    "give me 2 mantras to wise investing",
    "what are market orders and limit order in trading",
    "what is an initial public offer"
]

answers = [    
    "The DOs for investing in mutual fund schemes include: 1. Reading the offer document carefully before investing to understand the main features, risks, expenses, and track record of the scheme..."
    "Two mantras to wise investing are: 1. Follow life-cycle investing: As you age, your risk tolerance should change..."
    "Market orders and limit orders are two common types of orders in trading..."
    "An Initial Public Offering (IPO) is when a previously unlisted company offers its shares to the public for the first time..."
]

In [None]:
from datasets import Dataset
from ragas import evaluate, EvaluationDataset
from typing import List

def handle_rag_evaluation():
    dataset = []

    for query, reference in zip(questions, answers):
        relevant_docs: List = rag_system.get_relevant_docs(query)

        contexts = relevant_docs
        if relevant_docs and not isinstance(relevant_docs[0], str):
            contexts = [str(doc) for doc in relevant_docs]

        response = rag_system.generate_response(query, relevant_docs)

        dataset.append({
            "user_input": query,
            "retrieved_contexts": contexts,
            "response": response,
            "reference": reference,
            "ground_truth": reference
        })

        print(f"Query Processed: {query}")
        print(f"Response: {response}\n")

    evaluation_dataset = EvaluationDataset.from_list(dataset)
    return evaluation_dataset

In [None]:
from ragas.metrics import (
    faithfulness, context_precision, context_recall, 
    answer_relevancy, answer_similarity
)

result = evaluate(
        dataset=handle_rag_evaluation(), 
        metrics=[faithfulness, context_precision, context_recall, answer_relevancy, answer_similarity]
)

In [None]:
from tabulate import tabulate

df = result.to_pandas()
df