In [1]:
from langchain.schema import Document
import logging
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.llms import Ollama
import faiss
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain.schema import Document
import os






In [2]:
class FileIngestor:
    def __init__(self, filepath):
        self.filepath = filepath

    def load(self):
        with open(self.filepath, 'r') as file:
            content = file.read()
        return [Document(page_content=content, metadata={"source": self.filepath})]

In [3]:
def load_documents_from_directory(folder_path):
    documents = []
    for filename in os.listdir(folder_path):
        filepath = os.path.join(folder_path, filename)
        if filename.endswith(".txt"):
            loader = TextLoader(filepath)
        elif filename.endswith(".pdf"):
            loader = PyPDFLoader(filepath)
        else:
            continue
        docs = loader.load()
        for doc in docs:
            doc.metadata["source"] = filepath
        documents.extend(docs)
    return documents

In [4]:
class Chunking:
    def __init__(self, documents, chunk_size=500, chunk_overlap=50):
        self.documents = documents
        self.splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        )

    def chunk(self):
        try:
            chunks = self.splitter.split_documents(self.documents)
            logging.info("Documents chunked successfully.")
            return chunks
        except Exception as e:
            logging.error(f"Failed to chunk the documents: {e}")
            return []


In [5]:
class Embedder:
    def __init__(self, model_name="text-embedding-3-large"):
        self.embedding_model = OpenAIEmbeddings(model=model_name)

    def embed(self, chunks):
        return self.embedding_model.embed_documents([doc.page_content for doc in chunks])

In [6]:
class VectorStore:
    def __init__(self, embedding_model):
        self.embedding_model = embedding_model

    def create_store(self, documents):
        self.db = FAISS.from_documents(documents, self.embedding_model)
        return self.db

    def save(self, path="faiss_index"):
        self.db.save_local(path)

In [8]:
def main():
    docs = load_documents_from_directory("data")

    chunker = Chunking(docs)
    chunks = chunker.chunk()

    for i, chunk in enumerate(chunks[:5]):
        print(f"\n--- Chunk {i+1} ---\n")
        print(chunk.page_content)

    embedder = Embedder()
    embeddings = embedder.embed(chunks)  

    store = VectorStore(embedder.embedding_model)
    db = store.create_store(chunks)
    store.save()

    print("FAISS index created and saved locally.")

if __name__ == "__main__":
    main()



--- Chunk 1 ---

﻿Recogni Inc. | Recogni GmbH
San Jose (US) | Munich (Germany)
	  

	



Pyxis Stakeholder Meeting Prep: Context & Questionnaire

--- Chunk 2 ---

This document is confidential, for Recogni internal use only. The objective of this document is to provide context, along with a set of questions that the Sales, Business Development, Product Management and Engineering Leadership teams may use to validate to drive customer positioning, partner development and help fine-tune product requirements to customer/partner needs. 


Last Edited Date: 04/21/2025
________________

--- Chunk 3 ---

Pyxis Product Positioning        3
Target Customer Segments        3
Hyperscaler and Large CSP (cloud service provider)        3
Generative AI Focused Cloud        3
Large Enterprises        3
Gen AI Tech Model Developers and Independent Software Vendors (ISVs)        4
Key Hypothesis (to test/validate with external partners/customers in 2024)        4
Recogni Thesis #1 : Models size growth c

In [8]:
from sentence_transformers import CrossEncoder

embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
db = FAISS.load_local("faiss_index", embeddings=embedding_model,allow_dangerous_deserialization=True)
query = "what is pyxis?"
cross_encoder = CrossEncoder(
    "cross-encoder/ms-marco-TinyBERT-L-2-v2", max_length=512, device="cpu"
)

results = db.similarity_search(query, k=25)
reranked_docs = cross_encoder.rank(
    query,
    [doc.page_content for doc in results],
    top_k=5,
    return_documents=True,
)

context = "\n\n".join([doc["text"] + "\n" for doc in reranked_docs])
end_prompt = f"Below is the final context and query that will be sent to the llm.\n\nContext:\n{context}\n\nQuestion: {query}"
print (end_prompt)
final_prompt = PromptTemplate(
    template="""Answer the user's question using ONLY the information provided below.

                If the answer cannot be found in the provided context, reply ONLY with: "Insufficient information available." 
                Do NOT guess or answer from prior knowledge.

                Context:
                {context}

                Question:
                {query}
                """,
    input_variables=['query','context'])
prompt = final_prompt.format(query=query,context=context)
model = ChatOpenAI(model='gpt-4', temperature=0.3, max_completion_tokens=500)

response = model.invoke(prompt)

print("\nAnswer:")
print(response.content)

Below is the final context and query that will be sent to the llm.

Context:
﻿Pyxis Software Specification
Executive Summary
Pyxis is the code name for Recogni’s technology which delivers a complete hardware and software stack for multimodal generative AI inference acceleration. The Product Requirements Document (PRD) defines all of its major components in detail; we provide a short summary here.


Pyxis is the Recogni inference architecture.


Pyxis Device is our first generation ASIC for data-center LLM inference.

Tensor: a tensor is an algebraic object that describes a multilinear relationship between sets of algebraic objects tied to a linear space. They’re also commonly used in physics. Many tensors can be considered a type of multi-dimensional array.[4]


Pyxis has hardware facilities that enable tracing and profiling. As supported by firmware, Pyxis will have the ability to record trace data directly into a memory buffer located on the Pyxis device. This trace data, which captu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



Answer:
Pyxis is the code name for Recogni’s technology which delivers a complete hardware and software stack for multimodal generative AI inference acceleration. It is also the Recogni inference architecture.


## Refer this code

In [35]:
class FileIngestor:
    def __init__(self, filepath):
        self.filepath = filepath

    def load(self):
        with open(self.filepath, 'r') as file:
            content = file.read()
        return [Document(page_content=content, metadata={"source": self.filepath})]


In [42]:
class Chunking:
    def __init__(self, documents, chunk_size=200, chunk_overlap=10):
        self.documents = documents
        self.splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        )

    def chunk(self):
        try:
            chunks = self.splitter.split_documents(self.documents)
            print(chunks)
            logging.info("Documents chunked successfully.")
            return chunks
        except Exception as e:
            logging.error(f"Failed to chunk the documents: {e}")
            return []


In [43]:
class Embedder:
    def __init__(self, model_name="nomic-embed-text"):
        self.embedding_model = OllamaEmbeddings(model=model_name)

    def embed(self, chunks):
        return self.embedding_model.embed_documents([doc.page_content for doc in chunks])


In [44]:
class VectorStore:
    def __init__(self, embedding_model):
        self.embedding_model = embedding_model

    def create_store(self, documents):
        self.db = FAISS.from_documents(documents, self.embedding_model)
        return self.db

    def save(self, path="faiss_index"):
        self.db.save_local(path)

In [45]:
def main():
    ingestor = FileIngestor("pyxis_software_specification.txt")
    docs = ingestor.load()

    chunker = Chunking(docs)
    chunks = chunker.chunk()

    embedder = Embedder()
    embeddings = embedder.embed(chunks)  

    store = VectorStore(embedder.embedding_model)
    db = store.create_store(chunks)
    store.save()

    print("FAISS index created and saved locally.")

if __name__ == "__main__":
    main()


[Document(metadata={'source': 'pyxis_software_specification.txt'}, page_content='\ufeffPyxis Software Specification'), Document(metadata={'source': 'pyxis_software_specification.txt'}, page_content='A comprehensive overview of all software components of the Recogni Pyxis inference architecture. This document aims to describe the first generation of Recogni’s data-center inference solution.'), Document(metadata={'source': 'pyxis_software_specification.txt'}, page_content='Executive Summary\nSoftware Overview\nSystem Software Overview\nDatacenter Software Overview\nModel Preparation Overview\nCompilation Flow\nAnalysis and Profiling Tools'), Document(metadata={'source': 'pyxis_software_specification.txt'}, page_content='System Design Features and Programming Guidance\nOur Pareto math system enables low power and high density.\nThe fabric connection between Pyxis MCMs is symmetric.'), Document(metadata={'source': 'pyxis_software_specification.txt'}, page_content='All Pyxises are equivalen

In [46]:
embedding_model = OllamaEmbeddings(model="nomic-embed-text")
db = FAISS.load_local("faiss_index", embeddings=embedding_model,allow_dangerous_deserialization=True)
query = "What do you know about pyxis"

results = db.similarity_search(query, k=2)
context = "\n\n".join([doc.page_content for doc in results])

final_prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {query}"

llm = Ollama(model="llama3.2")

response = llm(final_prompt)

print("\nAnswer:")
print(response)


Answer:
Based on the provided context, I can infer that a "Pyxis" refers to a type of software. The specification states that all Pyxises are equivalent and interchangeable, suggesting that it is likely a generic or proprietary term used within an organization. However, without further information, I cannot provide more specific details about what Pyxis software actually does or its functionality.


In [None]:
llm = Ollama(model="llama3.2")
response = llm.invoke("whats up buddy")
print(response)

Not much, just hanging out in the digital realm! How about you? What's new and exciting in your world? Want to chat about something in particular or just shoot the breeze? I'm all ears (or rather, all text).


In [10]:
dc1 = Document(page_content= 'The @Core-ML team develops methodologies that enable our chip to run deep neural networks at high performance.' \
                ' It develops methodologies to quantize and compress networks with minimal performance drop.')

dc2 = Document(page_content= 'The second team is application-oriented and develops deep neural networks that focus on state-of-the-art perception tasks for the automotive industry. ' \
                'They develop baselines and apply the core-ai teams methods to these networks.')

In [None]:

embedding_model = OllamaEmbeddings(model="nomic-embed-text")
vector = embedding_model.embed_documents([dc1,dc2])

print(len(vector))


2


In [16]:
len(vector[0]),len(vector[1])

(768, 768)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
class Chunking:
    def __init__(self,document):
        self.document = document 
    
    def text_splitter(self):
        try:
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=100,
                chunk_overlap=0
                )
            chunks = text_splitter.split_documents(self.document)
            logging.info("Documents chunked successfully")
            return chunks 
        except Exception as e:
            logging.error(f"Failed to chunk the documents: {e}")
            return []

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
documents = ["Delhi is the capital of India",
             "Berlin is the capital of Germany",
             'I study in germany']
vector = embedding.embed_documents(documents)
print(str(vector))

[[0.0435495525598526, 0.023877233266830444, -0.045241277664899826, 0.03540502116084099, -0.016651025041937828, -0.06554816663265228, 0.07625998556613922, 0.00994038674980402, -0.0019631751347333193, -0.027022704482078552, 0.007385638542473316, -0.12068238854408264, 0.06404849886894226, -0.06795036047697067, 0.03638886287808418, -0.0780777707695961, 0.03318420797586441, 0.08175565302371979, 0.07336156070232391, -0.07802220433950424, -0.02092120610177517, 0.035732824355363846, -0.008563267067074776, -0.037455108016729355, 0.0004388459783513099, 0.05346423014998436, 0.005293548107147217, -0.016870422288775444, -0.0004130160668864846, 0.0010301083093509078, 0.06669677793979645, 0.004223247990012169, -0.022522659972310066, -0.002101638587191701, -0.05594777688384056, 0.016869906336069107, -0.1295161098241806, 0.06496332585811615, 0.17288093268871307, -0.11778352409601212, 0.03644104674458504, -0.0006774832727387547, 0.0778668075799942, -0.028167471289634705, 0.03655530512332916, -0.02369884