In [1]:
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
azure_api_key=os.getenv("AZURE_OPENAI_API_KEY")
api_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
azure_api_version=os.getenv("OPENAI_API_VERSION")
open_ai_api_key=os.getenv("OPENAI_API_KEY")

In [5]:
embedding_model = AzureOpenAIEmbeddings(azure_deployment="text-embedding")
llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo-1106", temperature=0)

# Load document

In [4]:
from langchain_community.document_loaders import PyPDFLoader
# for pdf to LaTeX
# loader = MathpixPDFLoader("data/luri_higher_topos.pdf")
# data = loader.load()

loader = PyPDFLoader("data/luri_higher_topos.pdf")
data = loader.load()


# Split text

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    separators=[
        "\n\n",
        "\n",
        " ",
        ".",
        ",",
        "\u200B",  # Zero-width space
        "\uff0c",  # Fullwidth comma
        "\u3001",  # Ideographic comma
        "\uff0e",  # Fullwidth full stop
        "\u3002",  # Ideographic full stop
        "",
    ],
)

splits = text_splitter.split_documents(data)

In [10]:
# Split semantically based on embeddings
# from langchain_experimental.text_splitter import SemanticChunker

# semantic_splitter = SemanticChunker(AzureOpenAIEmbeddings(azure_deployment="text-embedding"))
# semantic_splits = semantic_splitter.create_documents("insert page content only")

## Create Vectorstore

In [6]:
# vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model, persist_directory="db/recursive_splits")
# load vectorstore
vectorstore = Chroma(persist_directory="db/recursive_splits", embedding_function=embedding_model)

# Retrieval

In [8]:
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k":20})

results = retriever.get_relevant_documents("Can you give some basic examples of a Kan complex?")

In [9]:
results

[Document(page_content='In the setting of simplicial sets, this problem admits an att ractive formulation in terms of Quillen’s theory\nofminimal Kan complexes. Every Kan complex Xis homotopy equivalent to a minimal Kan complex, and\na mapX→Yof minimal Kan complexes is a homotopy equivalence if and onl y if it is an isomorphism.\nConsequently, the classiﬁcation of Kan complexes up to homo topy equivalence is equivalent to the classi-\nﬁcation of minimal Kan complexes up to isomorphism. Of course, in practical ter ms, this is not of much\nuse for solving the classiﬁcation problem. Nevertheless, t he theory of minimal Kan complexes (and, more\ngenerally, minimal Kan ﬁbrations) is a useful tool in the hom otopy theory of simplicial sets. The purpose\nof this section is to describe a generalization of the theory of minimal models, in which Kan ﬁbrations are\nreplaced by inner ﬁbrations. An exposition of this theory ca n also be found in [44].', metadata={'page': 90, 'source': 'data/luri_hi

# Generation

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """You are a helpful math research assistant. 
        Always mention the sections or pages you based your answer on."""),
        ("human", """Use the following pieces of retrieved context to answer the question. 
        If you don't know the answer or the answer is not in the context, just say that you don't know.
        Question: {question}
        Context: {context}
        Answer:""")
    ]
)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [12]:
res = rag_chain.invoke("Why are smooth functions on smooth spaces a Kan complex?")
print(res)

The smooth functions on smooth spaces form a Kan complex because of the properties of Kan complexes and the theory of minimal Kan complexes. In the context of simplicial sets, every Kan complex X is homotopy equivalent to a minimal Kan complex, and a map X→Y of minimal Kan complexes is a homotopy equivalence if and only if it is an isomorphism (Section 1.2.16). This implies that the class of Kan complexes up to homotopy equivalence is equivalent to the class of minimal Kan complexes up to isomorphism. Furthermore, in Proposition 1.2.16.2, it is stated that for every pair of objects X, Y∈Kan, the simplicial set MapKan(X,Y) = Y^X is a Kan complex, which implies that S is an ∞-category. Additionally, in Proposition 1.2.2.3, it is proven that the simplicial set HomR_S(x,y) is a Kan complex when C is an ∞-category. This demonstrates that the space of smooth functions between two smooth spaces forms a Kan complex, making smooth functions on smooth spaces a Kan complex.

Therefore, the smooth

# Contextualizing the question

In [13]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [14]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_system_prompt = """You are a helpful math research assistant. 
Always mention the sections or pages you base your answer on. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [16]:
from langchain_core.messages import HumanMessage

chat_history = []

question = "Can you explain what is a Kan complex?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg_1["answer"]])

second_question = "Can you give some examples for it?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

Certainly! Here are a few examples of Kan complexes:

1. Singular Complexes: The singular complex of any topological space is a Kan complex. This follows from the fact that the horn of a singular complex is a retract of the simplex in the category of topological spaces. Conversely, any Kan complex behaves like a space, and there are simple combinatorial recipes for extracting homotopy groups from Kan complexes.

2. Homotopy Categories: The homotopy category of CW complexes and the homotopy category of Kan complexes are mutually equivalent. This is a result of a theorem by Quillen, which establishes a correspondence between the homotopy category of CW complexes and the homotopy category of Kan complexes.

3. Model Categories: In the context of model categories, Kan complexes play a role in the theory of left and right fibrations. They are used to study the structure of fibrant objects and are closely related to the theory of weak homotopy equivalences.

These examples illustrate the div

In [17]:
for document in ai_msg_1["context"]:
    print(document)
    print()

page_content='In the setting of simplicial sets, this problem admits an att ractive formulation in terms of Quillen’s theory\nofminimal Kan complexes. Every Kan complex Xis homotopy equivalent to a minimal Kan complex, and\na mapX→Yof minimal Kan complexes is a homotopy equivalence if and onl y if it is an isomorphism.\nConsequently, the classiﬁcation of Kan complexes up to homo topy equivalence is equivalent to the classi-\nﬁcation of minimal Kan complexes up to isomorphism. Of course, in practical ter ms, this is not of much\nuse for solving the classiﬁcation problem. Nevertheless, t he theory of minimal Kan complexes (and, more\ngenerally, minimal Kan ﬁbrations) is a useful tool in the hom otopy theory of simplicial sets. The purpose\nof this section is to describe a generalization of the theory of minimal models, in which Kan ﬁbrations are\nreplaced by inner ﬁbrations. An exposition of this theory ca n also be found in [44].' metadata={'page': 90, 'source': 'data/luri_higher_topos.

In [20]:
ai_msg_2

{'input': 'Can you give some examples for it?',
 'chat_history': [HumanMessage(content='Can you explain what is a Kan complex?'),
  "A Kan complex is a type of simplicial set that behaves like a space. It is closely related to the theory of homotopy and is used to study homotopy theory in the context of simplicial sets. Kan complexes are important in the theory of minimal models and are closely related to the theory of left and right Kan extensions. In the context of ∞-categories, Kan complexes play a role in the theory of homotopy pullback squares and the study of left fibrations. They are also used to represent compact objects in ∞-categories.\n\nIn more technical terms, a Kan complex is a simplicial set in which every horn can be filled. Horns are certain types of sub-simplicial sets, and the ability to fill them is a key property that characterizes Kan complexes. This property is related to the notion of weak homotopy equivalences and is used to study the homotopy theory of simplic