In [58]:
! pip install sentence_transformers langchain

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings

In [2]:
embeddings_model_name = "all-MiniLM-L6-v2"

In [5]:
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
embeddings_model_path = "all-MiniLM-L6-v2"
embeddings.client.save(embeddings_model_path)

In [12]:
# to load the embedding model
# embeddings_model_path = "all-MiniLM-L6-v2"
# embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_path)

In [30]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader

folder_path = "docs/"

text_loader_kwargs={'autodetect_encoding': True}
loader = DirectoryLoader(folder_path,
                        loader_cls=TextLoader, 
                        loader_kwargs=text_loader_kwargs)
docs = loader.load()

In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200, chunk_overlap=0
)
texts = text_splitter.split_documents(docs)

In [40]:
from langchain.vectorstores import Chroma

db_directory = "local_db"
vector_db = Chroma.from_documents(documents=texts, persist_directory=db_directory, embedding=embeddings)
vector_db.persist()

In [41]:
# to delete the vector db
# vector_db.delete_collection()

# to load the directory without saving it
# db_directory = "local_db"
# vector_db = Chroma(persist_directory=db_directory, embedding_function=embeddings)

In [42]:
retriever = vector_db.as_retriever()
# using cosine similarity between the query and embeddings stored in the vector database
retriever.search_kwargs['distance_metric'] = 'cos'
# using max marginal relevance (see https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf)
retriever.search_kwargs['maximal_marginal_relevance'] = True
# number of Documents to fetch to pass to MMR algorithm
retriever.search_kwargs['fetch_k'] = 100
# number of Documents to return
retriever.search_kwargs['k'] = 4

In [56]:
from langchain.chains.combine_documents.base import (
    BaseCombineDocumentsChain,
    format_document,
)

def semantic_search(query, retriever = retriever, search_prefix=search_prefix):
    docs = retriever.get_relevant_documents(query)
    doc_list = [doc.page_content+"\nSource:"+doc.metadata['source'] for doc in docs]
    document_separator = "\n\n"
    return document_separator.join(doc_list)

In [57]:
query = 'Black Scholes formula with funding'
print(semantic_search(query))

We note that the adjustment grows as (roughly) $T^{2}$. A similar formula was obtained by Barden (2009) using a model in which funding spread is functionally linked to the value of the asset.
Source:docs\FundingBeyondDiscounting

In what follows, we shall consider derivatives contracts on a particular asset, whose price process we denote by $S(t), t \geq 0$. We denote by $r_{R}(t)$ the short rate on funding secured by this
Source:docs\FundingBeyondDiscounting

$$
d S(t) / S(t)=O(d t)+\sigma_{S} d W_{S}(t)
$$

and funding spread that follows dynamics inspired by a simple onefactor Gaussian model of interest rates ${ }^{2}$ :
Source:docs\FundingBeyondDiscounting

In this article, we have developed valuation formulas for derivative contracts that incorporate the modern realities of funding and collateral agreements that deviate significantly from the textbook
Source:docs\FundingBeyondDiscounting
