In [2]:
from langchain_community.retrievers import BM25Retriever
from typing import List
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.document_loaders import CSVLoader
from rank_bm25 import BM25Okapi
from langchain_huggingface import HuggingFaceEmbeddings
import os

In [3]:
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/huggingface-models'

In [None]:
# Different text splitter for different purpose.
# 1. Use `RecursiveCharacterTextSplitter` for long, unstructured text
# 2. Use `CharacterTextSplitter` for structured data or when you want simple splitting
# 3. Use `TokenTextSplitter` when you need to respect token limits for LLMs
# 4. Use specialized splitters (`MarkdownHeaderTextSplitter`, `HTMLHeaderTextSplitter`) for their respective document types

In [4]:
loader = CSVLoader("/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv")
documents = loader.load()
text_splitter = CharacterTextSplitter(
    separator="\n", # where to split the text
    chunk_size=1000, # how big each chunk should be 
    chunk_overlap=100,
    length_function=len,
    add_start_index=True
)
docs = text_splitter.split_documents(documents)

In [5]:
docs[0]

Document(metadata={'source': '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv', 'row': 0, 'start_index': 0}, page_content=': 0\nquestion: What is Hybridizer?\nanswer: Hybridizer is a compiler from Altimesh that enables programming GPUs and accelerators using C# code or .NET Assembly.')

### Document retrieval

In [6]:
retriever = BM25Retriever.from_documents(docs, k=10)

In [7]:
text_res = retriever.invoke("What is Hybridizer?")
text_res

[Document(metadata={'source': '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv', 'row': 0, 'start_index': 0}, page_content=': 0\nquestion: What is Hybridizer?\nanswer: Hybridizer is a compiler from Altimesh that enables programming GPUs and accelerators using C# code or .NET Assembly.'),
 Document(metadata={'source': '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv', 'row': 4, 'start_index': 0}, page_content=': 4\nquestion: What is an example of using Hybridizer?\nanswer: An example in the text demonstrates using Parallel.For with a lambda to leverage the compute power of accelerators.'),
 Document(metadata={'source': '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv', 'row': 5, 'start_index': 0}, page_content=': 5\nquestion: How can you debug and profile GPU code written with Hybridizer?\nanswer: You can debug and profile GPU code created with Hyb

### Vector embedding retrieval

In [8]:
embeddings = HuggingFaceEmbeddings(
    model_name='BAAI/bge-small-en-v1.5', 
    model_kwargs={'device': 'cpu'},
    show_progress=True
)

In [9]:
db = FAISS.load_local('/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/rag-optimizations/faiss', embeddings, allow_dangerous_deserialization=True)
# db = FAISS.from_documents(docs, embeddings)

In [60]:
# db.save_local('/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/rag-optimizations/faiss')

In [10]:
vector_res = db.similarity_search("What is Hybridizer?", k=10)
vector_res

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[Document(id='ff35ab66-3c39-4e66-8800-e9f29e3cdfcf', metadata={'source': '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv', 'row': 0, 'start_index': 0}, page_content=': 0\nquestion: What is Hybridizer?\nanswer: Hybridizer is a compiler from Altimesh that enables programming GPUs and accelerators using C# code or .NET Assembly.'),
 Document(id='4a345fbc-3935-4f4d-a50c-ffda0e3a7e20', metadata={'source': '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv', 'row': 1, 'start_index': 0}, page_content=': 1\nquestion: How does Hybridizer generate optimized code?\nanswer: Hybridizer uses decorated symbols to express parallelism and generates source code or binaries optimized for multicore CPUs and GPUs.'),
 Document(id='5d4d0399-8a56-4bce-9e6c-bc1fec24370a', metadata={'source': '/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/data/NvidiaDocumentationQandApairs.csv', 'row': 6, 'start_index': 0}, p

### RRF

In [11]:
# Explanation: https://medium.com/@devalshah1619/mathematical-intuition-behind-reciprocal-rank-fusion-rrf-explained-in-2-mins-002df0cc5e2a

def rrf(vector_results: List[str], text_results: List[str], k: int=10, m: int=60):
    """Use RRF to rerank the results from 2 retrieval methods
    
    params:
    k (int): num docs returned after rerank
    m (int): default value for hyperparam
    
    return:
    doc results after rerank, each element is (doc_id, combined_score)
    """
    
    doc_scores = {}
    for rank, doc_id in enumerate(vector_results):
        doc_scores[doc_id] = doc_scores.get(doc_id, 0) + 1 / (rank + m)
    for rank, doc_id in enumerate(text_results):
        doc_scores[doc_id] = doc_scores.get(doc_id, 0) + 1 / (rank + m)
    
    scored_results = [d for d, _ in sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)[:k]]
    return scored_results

In [12]:
vector_results = [i.page_content for i in vector_res]
text_results = [i.page_content for i in text_res]
rrf_res = rrf(vector_results, text_results)
rrf_res

[': 0\nquestion: What is Hybridizer?\nanswer: Hybridizer is a compiler from Altimesh that enables programming GPUs and accelerators using C# code or .NET Assembly.',
 ': 4\nquestion: What is an example of using Hybridizer?\nanswer: An example in the text demonstrates using Parallel.For with a lambda to leverage the compute power of accelerators.',
 ': 5\nquestion: How can you debug and profile GPU code written with Hybridizer?\nanswer: You can debug and profile GPU code created with Hybridizer using NVIDIA Nsight Visual Studio Edition.',
 ': 1\nquestion: How does Hybridizer generate optimized code?\nanswer: Hybridizer uses decorated symbols to express parallelism and generates source code or binaries optimized for multicore CPUs and GPUs.',
 ': 6\nquestion: What advanced C# features does Hybridizer implement?\nanswer: Hybridizer implements advanced C# features, including virtual functions and generics.',
 ': 1699\nquestion: What is cuNumeric?\nanswer: A library that replaces the NumPy 

### LLM Response

In [13]:
prompt_template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Keep the answer as concise as possible.

Context: {context}

Question: {question}

Helpful Answer:"""

In [17]:
from langchain_openai import ChatOpenAI
model = ChatOpenAI(model='/Users/wangzeyu/Desktop/Github projects/legalai-chatbot/huggingface-models/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/7ae557604adf67be50417f59c2c2f167def9a775', base_url='http://0.0.0.0:8000/v1', api_key='n')
rag_res = model.invoke(
    prompt_template.format(
        context=''.join(rrf_res),
        question='What is Hybridizer?'
    )
)
print(rag_res.content)

Hybridizer is a compiler developed by Altimesh that allows programmers to write C# or .NET Assembly code to run on GPUs and accelerators. It supports parallel processing through decorated symbols and generates optimized code for both multicore CPUs and GPUs. This makes it suitable for scenarios where performance optimization is critical, such as in finance, scientific computing, and machine learning.


In [18]:
res = model.invoke("What is Hybridizer?")
print(res.content)

Hybridizer is a software tool designed to help users manage and organize their digital assets, such as files, images, videos, and documents. It provides a user-friendly interface that allows users to easily upload, store, share, and access their digital content.

Key features of Hybridizer include:

1. Digital Asset Management: Users can create and categorize digital assets, such as folders, subfolders, and files.
2. File Sharing: Users can share their digital assets with others through various platforms like email, social media, or cloud storage services.
3. Collaboration Tools: Hybridizer supports collaboration features, allowing multiple users to work on the same file simultaneously.
4. Security Features: It includes built-in security measures to protect sensitive data, ensuring that only authorized users have access to shared assets.
5. Customization Options: Users can customize the appearance and behavior of their digital assets to suit their preferences and needs.
6. Integration 

In [None]:
# 除此之外也可以试一试sequence retrieval，比如先用vector embedding找出前100个docs，再用
# text similarity rank到前10条，让模型生成response（反过来也可以）