In [14]:
import os
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.ollama import Ollama
from dotenv import load_dotenv
load_dotenv()

True

In [15]:
azure_api_key=os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
azure_api_version=os.getenv("AZURE_OPENAI_API_VERSION")
open_ai_api_key=os.getenv("OPENAI_API_KEY")

In [16]:
print(azure_api_version)

2024-02-01


In [17]:
# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

# base node parser is a sentence splitter
text_splitter = SentenceSplitter()

# llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
# embed_model = OpenAIEmbedding(model="text-embedding-3-small")

llm = Ollama(model="mistral", request_timeout=30.0)
# llm = AzureOpenAI(deployment_name="gpt-35-turbo-1106",
#                     model="gpt-35-turbo",
#                     api_key=azure_api_key,
#                     azure_endpoint=azure_endpoint,
#                     azure_api_version=azure_api_version,
#                     temperature=0.1)
# embed_model = AzureOpenAIEmbedding(
#     model="text-embedding-ada-002",
#     deployment_name="text-embedding",
#     api_key=azure_api_key,
#     azure_endpoint=azure_endpoint,
#     azure_api_version=azure_api_version
# )
embed_model = HuggingFaceEmbedding(
    model_name="sentence-transformers/all-mpnet-base-v2", max_length=512
)

from llama_index.core import Settings
# set models for entire llamaindex app
Settings.llm = llm
Settings.embed_model = embed_model
Settings.text_splitter = text_splitter

In [18]:
embeddings = embed_model.get_text_embedding(
    "Open AI new Embeddings models is shit."
)

In [19]:
response = llm.complete("The sky is a beautiful blue and")
print(response)

 the sun is shining, casting gentle rays of light over the landscape. Birds are singing sweetly in the trees, and there's a soft breeze that rustles the leaves and carries the scent of blooming flowers. It's a peaceful moment, and the world seems to be bathed in a warm, golden glow. I hope you're enjoying this beautiful day! Is there anything specific you would like me to help you with?


In [20]:
documents = SimpleDirectoryReader(
    input_files=["data/luri_higher_topos.pdf"]
).load_data()

We extract out the set of nodes that will be stored in the VectorIndex. This includes both the nodes with the sentence window parser, as well as the "base" nodes extracted using the standard parser.

In [21]:
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes = text_splitter.get_nodes_from_documents(documents)

In [22]:
from llama_index.core import VectorStoreIndex

sentence_index = VectorStoreIndex(nodes)
base_index = VectorStoreIndex(base_nodes)

In [30]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

query_engine = sentence_index.as_query_engine(
    similarity_top_k=5,
    # the target key defaults to `window` to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)
window_response = query_engine.query(
    "Why are smooth functions on smooth spaces a Kan complex?"
)
print(window_response)

 The text provides information that a smooth function on a topological space X is defined as a function satisfying certain conditions mentioned in Proposition 4.1.2.7. It also states that the projection map from the homotopy category of simplicial sets to the category of sets (Map(Set∆)/S) is a right fibration when the projection map is smooth and the inclusion is cofinal, as stated in Lemma 2.1.3.3. The text does not directly mention that smooth functions on smooth spaces form a Kan complex. However, it can be inferred from Theorem 1.1.5.13 and the fact that the singular complex of any topological space is a Kan complex that there is a relationship between Kan complexes and topological spaces. Therefore, one could infer that smooth functions on smooth spaces might form a Kan complex due to this connection. However, without further information, it cannot be definitively stated that smooth functions on smooth spaces are a Kan complex.


In [31]:
window = window_response.source_nodes[0].node.metadata["window"]
sentence = window_response.source_nodes[0].node.metadata["original_text"]

print(f"Window: {window}")
print("------------------")
print(f"Original Sentence: {sentence}")

Window: LetCbe a topological category.  Then the topological nerve N(C)is an∞-category.
 Proof.  This follows immediately from Proposition 1.1.5.10, since the singular complex of any topological
space is a Kan complex.
 We now cite the following theorem, which will be proven in §2.2.4 and reﬁned in §2.2.5:
Theorem 1.1.5.13.  LetCbe a topological category, and let X,Y∈Cbe objects.  Then the counit map
|MapC[N(C)](X,Y)|→MapC(X,Y)
is a weak homotopy equivalence of topological spaces.

------------------
Original Sentence: This follows immediately from Proposition 1.1.5.10, since the singular complex of any topological
space is a Kan complex.

