In [1]:
from langchain.embeddings import GooglePalmEmbeddings


In [9]:
import os


In [13]:
import os
from langchain.document_loaders import PyPDFLoader

def load_files(data_folder):
    files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.pdf')]
    docs = []
    for file in files:
        try:
            loader = PyPDFLoader(file)  # Use PyPDFLoader for PDF files
            docs.extend(loader.load())
        except Exception as e:
            print(f"Error loading file {file}: {e}")
    return docs


In [14]:
docs=load_files("data")


In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def chunk_documents(docs):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    return splitter.split_documents(docs)


In [15]:
step2=chunk_documents(docs)

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer


embeddings = HuggingFaceEmbeddings(model_name='models\embedding-model')



  from .autonotebook import tqdm as notebook_tqdm
  embeddings = HuggingFaceEmbeddings(model_name='models\embedding-model')


In [2]:
multi_qa_mini = HuggingFaceEmbeddings(model_name="models\embedding-model2")

In [10]:
from dotenv import load_dotenv
load_dotenv() 

True

In [19]:
from langchain.llms import OpenAI

In [5]:
from transformers import pipeline


In [6]:
model=pipeline("question-answering", model="models/roberta-qa", tokenizer="models/roberta-qa")

Device set to use cpu


In [4]:
import os
from langchain.vectorstores import FAISS

def store_in_vector_store(embeddings, docs):
    index_path = "faiss_index"

    # Check if the FAISS index exists
    if os.path.exists(index_path):
        # Load the FAISS vector store from the disk
        vector_store = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True)
        print("FAISS index loaded from disk.")
    else:
        # If it doesn't exist, create a new vector store from documents
        vector_store = FAISS.from_documents(docs, embeddings)
        # Save it for future use
        vector_store.save_local(index_path)
        print("FAISS index created and saved to disk.")
    
    return vector_store


In [5]:
if os.path.exists("faiss_index"):
        # Load the FAISS vector store from the disk
        vector_store = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True)
        print("FAISS index loaded from disk.")

FAISS index loaded from disk.


In [18]:
step3=store_in_vector_store(embeddings,step2)

FAISS index created and saved to disk.


In [6]:
from langchain.retrievers import (
    ContextualCompressionRetriever,
   
    MergerRetriever,
)

In [7]:
from langchain.retrievers.document_compressors import DocumentCompressorPipeline


In [8]:
from langchain.vectorstores import FAISS
from langchain_community.document_transformers import EmbeddingsRedundantFilter



def retrieval(vector_store,embeddings):
    # Define Individual Retrievers
    retriever_tns = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 5, "include_metadata": False}
    )

    retriever_ad = vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 5, "include_metadata": False}
    )

    # Merge Retrievers
    lotr = MergerRetriever(retrievers=[retriever_tns, retriever_ad])

    # Add Embedding-Based Filtering
    filter = EmbeddingsRedundantFilter(embeddings=embeddings)

    # Create a Document Compression Pipeline
    pipeline = DocumentCompressorPipeline(transformers=[filter])

    # Contextual Compression Retriever
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=pipeline, base_retriever=lotr
    )
    return compression_retriever

# Function to Use the Compression Retriever
def retrieve_documents(query,compression_retriever):
    return compression_retriever.get_relevant_documents(query)


In [9]:
step4=retrieval(vector_store,multi_qa_mini)

In [11]:
query="Tell me about the economic consequences of COVID-19?"

In [21]:
step5=retrieve_documents(query,step4)

  return compression_retriever.get_relevant_documents(query)


In [10]:
from langchain.embeddings import OpenAIEmbeddings


In [12]:
import getpass
import os


# os.environ["MISTRAL_API_KEY"] = getpass.getpass("Enter your Mistral API key: ")
api_key = os.getenv("MISTRAL_API_KEY")

In [13]:
if not api_key:
    raise ValueError("MISTRAL_API_KEY is not set in the .env file.")

In [36]:
from langchain.llms import GooglePalm


In [32]:
from langchain.llms.base import BaseLanguageModel

class WrappedMistral(BaseLanguageModel):
    def __init__(self, mistral_instance):
        self.mistral_instance = mistral_instance

    def _call(self, prompt, stop=None):
        return self.mistral_instance.generate(prompt)


In [14]:
from langchain_mistralai import ChatMistralAI

llm = ChatMistralAI(
    model="mistral-large-latest",
    temperature=0,
    max_retries=2,
    api_key=api_key
    # other params...
)

In [15]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

def generation(compression_retriever,chatllm):
    memory = ConversationBufferMemory(memory_key="chat_history",
                                    return_messages=True)

    qa_withmemory = ConversationalRetrievalChain.from_llm(chatllm,
                                            compression_retriever,
                                            memory=memory)
    return qa_withmemory

In [16]:
messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)
ai_msg

AIMessage(content="I love programming. = J'aime la programmation.\n\nHere's a breakdown:\n- I love = J'aime\n- programming = la programmation", additional_kwargs={}, response_metadata={'token_usage': {'prompt_tokens': 27, 'total_tokens': 66, 'completion_tokens': 39}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-2f56749d-d850-4d34-8d26-467c897c1cf8-0', usage_metadata={'input_tokens': 27, 'output_tokens': 39, 'total_tokens': 66})

In [17]:
step6=generation(step4,llm)

  memory = ConversationBufferMemory(memory_key="chat_history",


In [18]:
query="Tell me about the economic consequences of COVID-19?"

In [19]:
x=step6(query)

  x=step6(query)


In [20]:
x

{'question': 'Tell me about the economic consequences of COVID-19?',
 'chat_history': [HumanMessage(content='Tell me about the economic consequences of COVID-19?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The global economy experienced a significant downturn during the COVID-19 pandemic. According to the International Monetary Fund (IMF), the world economy contracted by 3.5% in 2020. Small and medium-sized enterprises (SMEs), which form the backbone of many economies, were disproportionately affected by lockdowns and restrictions. In the United States alone, an estimated 200,000 businesses permanently closed during the first year of the pandemic.', additional_kwargs={}, response_metadata={})],
 'answer': 'The global economy experienced a significant downturn during the COVID-19 pandemic. According to the International Monetary Fund (IMF), the world economy contracted by 3.5% in 2020. Small and medium-sized enterprises (SMEs), which form the backbone of many eco

In [21]:
x['answer']

'The global economy experienced a significant downturn during the COVID-19 pandemic. According to the International Monetary Fund (IMF), the world economy contracted by 3.5% in 2020. Small and medium-sized enterprises (SMEs), which form the backbone of many economies, were disproportionately affected by lockdowns and restrictions. In the United States alone, an estimated 200,000 businesses permanently closed during the first year of the pandemic.'