# DSPy
In this notebook, we'll be exploring a prompt optimization framework called DSPy.

## Notebook Setup

In [14]:
# Importing the necessary Python libraries
import os
import json

import numpy as np
import pandas as pd

import dspy
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import DataFrameLoader
from langchain_openai import OpenAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Initializing the OpenAI embeddings
embeddings = OpenAIEmbeddings()

In [2]:
# Loading our sample questions and answers associated to our knowledge item project
df_ki_qas = pd.read_csv('synthetic_knowledge_items_with_qas.csv')


In [10]:
# Checking if the FAISS index file already exists
if os.path.exists('faiss_kis.index'):

    # Loading the FAISS KIs index from the file
    faiss_kis_index = FAISS.load_local('faiss_kis.index', embeddings, allow_dangerous_deserialization = True)

else:

    # Extracting the relevant columns
    df_relevant = df_ki_qas[['ki_topic', 'ki_text']]

    # Creating LangChain documents using DataFrameLoader
    documents = DataFrameLoader(df_relevant, page_content_column = 'ki_text').load()

    # Creating the FAISS KIs index
    faiss_kis_index = FAISS.from_documents(documents, embeddings)

    # Saving the FAISS KIs index to a file
    faiss_kis_index.save_local('faiss_kis.index')

In [15]:
class FAISSDSPyRetriever(dspy.Retrieve):
    
    def __init__(self, faiss_index, embeddings, k = 2):
        super().__init__()
        self.index = faiss_index
        self.embeddings = embeddings
        self.k = k

    def forward(self, query):
        
        # Embedding the input query
        query_embeding = self.embeddings.embed_query(query)

        # Generating the similarity scores and indices of the top k most similar knowledge items
        scores, indices = self.index.search(query_embeding, self.k)

        # Converting the FAISS results to DSPy passages format
        passages = [self.get_document(idx) for idx in indices]

In [None]:
class RAGWithFAISS(dspy.Module):

    def __init__(self, faiss_retriever):
        super().__init__()
        self.retrieve = faiss_retriever
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):

        # Retrieving the context passages using the FAISS retriever
        context = self.retrieve(question).passages
        
        # Generating the answer using the ChainOfThought model
        prediction = self.generate_answer(context = context, question = question)

        return dspy.Prediction(answer = prediction.answer)