# 1: set up GPU, api keys, and models


In [1]:
import torch

# Print the PyTorch version
print(f"PyTorch version: {torch.__version__}")

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    print("CUDA is available! GPU is ready to be used.")
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA is not available. GPU is not set up correctly.")

# Print additional GPU details
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  - Total Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9} GB")
        print(f"  - Compute Capability: {torch.cuda.get_device_capability(i)}")

if torch.cuda.is_available():
    # Create a random tensor and move it to the GPU
    tensor = torch.rand(3, 3).cuda()
    print("Tensor on GPU:", tensor)
else:
    print("GPU is not available, cannot move tensor to GPU.")


PyTorch version: 2.3.1+cu121
CUDA is available! GPU is ready to be used.
Number of GPUs available: 1
Current GPU: NVIDIA GeForce RTX 4090
GPU 0: NVIDIA GeForce RTX 4090
  - Total Memory: 25.756696576 GB
  - Compute Capability: (8, 9)
Tensor on GPU: tensor([[0.7068, 0.6939, 0.9230],
        [0.2045, 0.8537, 0.6757],
        [0.7674, 0.0699, 0.5267]], device='cuda:0')


In [2]:
import os
from dotenv import load_dotenv
from huggingface_hub import login

# Print the current working directory (optional for debugging)
print(os.getcwd())

# Set the path to your .env file relative to the current working directory
dotenv_path = os.path.join(os.getcwd(), '../.env')
load_dotenv(dotenv_path)

# Load the API keys from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")
hf_token = os.getenv("HUGGINGFACE_API_KEY")
anthropic_token = os.getenv("ANTHROPIC_API_KEY")
tavily_token = os.getenv("TAVILY_API_KEY")
langsmith_token = os.getenv("LANGSMITH_API_KEY")
# NOMIC_EMBEDDINGS_API_KEY
nomic_token = os.getenv("NOMIC_EMBEDDINGS_API_KEY")

# Set the Hugging Face token as an environment variable (if not already done)
if hf_token:
    os.environ["HUGGINGFACE_API_KEY"] = hf_token
if openai_api_key:
    os.environ["OPENAI_API_KEY"] = openai_api_key
if anthropic_token:
    os.environ["ANTHROPIC_API_KEY"] = anthropic_token
if tavily_token:
    os.environ["TAVILY_API_KEY"] = tavily_token
if tavily_token:
    os.environ["LANGSMITH_API_KEY"] = langsmith_token
if nomic_token:
    os.environ["NOMIC_EMBEDDINGS_API_KEY"] = nomic_token
    
    
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"


/workspaces/custom_ollama_docker/notebooks


Pull in Ollama local model for free use throughout RAG

In [3]:
from langchain_community.chat_models import ChatOllama

local_llm = "llama3.1"

llm = ChatOllama(model=local_llm, temperature=0)


Embedding with Ollama Models

Use Ollama or compatible models for generating embeddings, ensuring control over the embedding process and consistency within the pipeline.
Action:

    Replace external embedding generation with local embeddings using NomicEmbeddings.

Pros:

    Efficient Retrieval: Embeddings generated locally.
    Unified Pipeline: Consistency in using local models.

Cons:

    Embedding Quality: May not match commercial APIs without fine-tuning.
    Resource Requirements: Ensure compatibility with your hardware.

In [4]:
from langchain_nomic import NomicEmbeddings

embeddings_model = NomicEmbeddings(
    model="nomic-embed-text-v1.5",
    nomic_api_key=nomic_token
)


In [5]:
import nltk
nltk.download('punkt')

from nltk.tokenize import sent_tokenize
from langchain.schema import Document

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# 2. Basic RAG Pipeline Setup with Ollama

Set up the basic Retrieval-Augmented Generation (RAG) pipeline using Ollama. This involves retrieving relevant documents, feeding them to the LLM (Ollama), and generating responses based on the context.
Steps:

    Document Retrieval:
        Use a vector store like FAISS to store and retrieve embeddings.
        Use a text splitter to divide documents into manageable chunks.

    LLM Integration:
        Replace any existing LLMs with ChatOllama.

    Prompt & Template:
        Customize prompts to leverage Ollama's capabilities.

In [19]:

# Import necessary modules
import os
import torch
import nltk
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from huggingface_hub import login
import numpy as np

# Updated langchain imports based on changes in recent versions
from langchain.vectorstores import FAISS  # FAISS is now part of langchain.vectorstores
from langchain.prompts import ChatPromptTemplate  # ChatPromptTemplate should now be imported from langchain.prompts
from langchain.chains import LLMChain  # Chains module
from langchain.chat_models import ChatOllama  # ChatOllama is still valid
from langchain_nomic import NomicEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain.schema import Document, HumanMessage

# NBA API imports
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players

nltk.download('punkt')

# Load documents from the web
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

# Initialize FAISS vector store
vector_store = FAISS.from_documents(documents, embeddings_model)

# Create a retriever
retriever = vector_store.as_retriever(search_type="similarity")

# Define the prompt template
prompt_template = ChatPromptTemplate.from_messages([
    ("system", "You are an assistant that answers questions based on retrieved context."),
    ("user", "Question: {question}\n\nContext:\n{context}")
])

# Initialize the LLM
llm = ChatOllama(model="llama3.1", temperature=0.7)

# Set up the chain
chain = LLMChain(llm=llm, prompt=prompt_template)


# Example usage
def basic_rag_example():
    question = "What is the main topic of the document?"
    context_docs = retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in context_docs[:5]])
    response = chain.run(question=question, context=context)
    print("Basic RAG Response:")
    print(response)

# Call the example function
basic_rag_example()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Basic RAG Response:
Based on the retrieved context, the main topic of the document is writing a Super Mario game in Python using MVC components split in separate files with keyboard control.


2. Option 1 - Using the NBA API
2.1. NBAAPIDataLoader Class

This class fetches player career stats using the NBA API and converts the data into a list of Document objects suitable for the RAG pipeline.

In [20]:
class NBAAPIDataLoader:
    def __init__(self, player_name):
        self.player_name = player_name

    def fetch_player_career_stats(self):
        # Find the player ID based on the player name
        player_dict = players.find_players_by_full_name(self.player_name)
        if not player_dict:
            print(f"No player found with name {self.player_name}")
            return None
        player_id = player_dict[0]['id']

        # Fetch career stats using the player ID
        career = playercareerstats.PlayerCareerStats(player_id=player_id)
        career_df = career.get_data_frames()[0]
        return career_df

    def df_to_documents(self, df):
        # Convert each row of the DataFrame into a Document
        documents = []
        for index, row in df.iterrows():
            content = row.to_string()
            doc = Document(page_content=content)
            documents.append(doc)
        return documents

    def load_data(self):
        # Load data and convert it into Documents
        career_df = self.fetch_player_career_stats()
        if career_df is None:
            return []
        documents = self.df_to_documents(career_df)
        return documents


def nba_api_example():
    # Initialize the data loader for a specific player
    player_name = "LeBron James"
    data_loader = NBAAPIDataLoader(player_name)
    nba_documents = data_loader.load_data()
    if not nba_documents:
        return

    # Use a text splitter if needed
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    nba_documents = text_splitter.split_documents(nba_documents)

    # Build a vector store with the NBA documents
    nba_vector_store = FAISS.from_documents(nba_documents, embeddings_model)

    # Create a retriever
    nba_retriever = nba_vector_store.as_retriever(search_type="similarity")

    # Example query
    question = "What is LeBron James' average points per game?"
    context_docs = nba_retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in context_docs[:5]])

    # Generate a response using the RAG pipeline
    response = chain.run(question=question, context=context)
    print("NBA API RAG Response:")
    print(response)


3. Option 2 - Web Scraping nba.com
3.1. NBAWebScraperDataLoader Class

This class scrapes player news articles from nba.com and converts them into Document objects.

In [21]:
class NBAWebScraperDataLoader:
    def __init__(self, player_name):
        self.player_name = player_name

    def scrape_nba_player_news(self):
        # Build the search URL for the player
        search_query = self.player_name.replace(' ', '+')
        url = f"https://www.nba.com/search?query={search_query}"

        # Send a GET request to nba.com
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to retrieve data from {url}")
            return []

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract news articles (modify selectors based on actual HTML structure)
        articles = soup.find_all('div', class_='ArticleItem_headline')
        documents = []
        for article in articles:
            title = article.get_text(strip=True)
            link = article.find('a')['href']
            # Combine title and link as the content
            content = f"{title}\nLink: {link}"
            doc = Document(page_content=content)
            documents.append(doc)

        return documents

    def load_data(self):
        # Load data and convert it into Documents
        documents = self.scrape_nba_player_news()
        return documents

def nba_web_scraping_example():
    # Initialize the data loader for a specific player
    player_name = "LeBron James"
    data_loader = NBAWebScraperDataLoader(player_name)
    nba_documents = data_loader.load_data()
    if not nba_documents:
        return

    # Use a text splitter if needed
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    nba_documents = text_splitter.split_documents(nba_documents)

    # Build a vector store with the NBA documents
    nba_vector_store = FAISS.from_documents(nba_documents, embeddings_model)

    # Create a retriever
    nba_retriever = nba_vector_store.as_retriever(search_type="similarity")

    # Example query
    question = "What is the latest news about LeBron James?"
    context_docs = nba_retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in context_docs[:5]])

    # Generate a response using the RAG pipeline
    response = chain.run(question=question, context=context)
    print("NBA Web Scraping RAG Response:")
    print(response)




# 3. Advanced Chunking: Proposition Chunking

Introduce Proposition Chunking to break documents into smaller, meaningful sentences. This enhances retrieval accuracy and allows the LLM to generate more precise responses.

- Where to Implement:

    Replace fixed-length chunking with proposition-based chunking, breaking text into logical sentences.


Pros:

    Improved Precision: Better alignment between query and content.
    Contextual Clarity: Each chunk represents a complete thought.

Cons:

    Increased Number of Chunks: May lead to a larger number of documents to manage.
    Potential Performance Impact: More documents can slow down retrieval.

In [22]:
class PropositionChunking:
    def __init__(self, content):
        self.content = content

    def chunk(self):
        # Tokenize the content into sentences
        sentences = nltk.sent_tokenize(self.content)
        # Create Document objects for each sentence
        return [Document(page_content=sentence) for sentence in sentences]

# Apply Proposition Chunking to the documents
chunked_documents = []
for doc in docs:
    chunker = PropositionChunking(doc.page_content)
    chunked_documents.extend(chunker.chunk())

# Re-initialize the vector store with the new chunked_documents
vector_store = FAISS.from_documents(chunked_documents, embeddings_model)
retriever = vector_store.as_retriever(search_type="similarity")

# Example usage
def proposition_chunking_example():
    print("Number of chunks after proposition chunking:", len(chunked_documents))

# Call the example function
proposition_chunking_example()


Number of chunks after proposition chunking: 426


# 4. Query Transformations: HyDE Approach

Implement Hypothetical Document Embeddings (HyDE) for query transformation. This method generates hypothetical documents based on the original query to improve retrieval alignment.
Where to Implement:

    Add a query transformation step before retrieval using the LLM.

Pros:

    Enhanced Retrieval: Aligns queries with document embeddings.
    Improved Context Matching: Captures nuances of the query.

Cons:

    Additional Computation: Requires extra LLM calls.
    Potential Latency Increase: May affect response times.

In [23]:
from langchain.schema import Document, HumanMessage

class HyDE:
    def __init__(self, llm):
        self.llm = llm

    def transform(self, original_query):
        # Generate a hypothetical answer
        prompt = f"Provide a detailed answer to the following question:\n\n{original_query}"
        messages = [HumanMessage(content=prompt)]
        response = self.llm(messages)
        hypothetical_doc = response.content.strip() if hasattr(response, 'content') else response.strip()
        return hypothetical_doc

# Example usage
def hyde_example():
    hyde_transformer = HyDE(llm)
    transformed_query_doc = hyde_transformer.transform("Explain how LangSmith can help with testing.")
    print("Hypothetical Document:")
    print(transformed_query_doc)
    return transformed_query_doc

# Call the example function
transformed_query_doc = hyde_example()

# Embed the hypothetical document
transformed_embedding = embeddings_model.embed_query(transformed_query_doc)

# Retrieve documents using the transformed embedding
retrieved_docs = vector_store.similarity_search_by_vector(transformed_embedding)


Hypothetical Document:
I couldn't find any information on "LangSmith". It's possible that it's a fictional or non-existent tool, or perhaps a lesser-known or emerging solution in the field of software development and testing.

However, I can provide a general answer to how tools like LangSmith (if it exists) could potentially help with testing:

**Assuming LangSmith is a software testing tool**

In today's fast-paced software development landscape, ensuring the quality and reliability of software applications is more crucial than ever. Testing plays a vital role in this process, helping developers identify bugs, ensure compatibility, and guarantee that their product meets user expectations.

If LangSmith were a real-world solution, it could potentially offer various features to facilitate testing processes, such as:

1. **Automated Testing**: LangSmith might provide an automated testing framework that enables developers to write tests for their code using a specific programming languag

# 5. Advanced Retrieval: Multi-Chunk Segment Extraction

Use Relevant Segment Extraction (RSE) to retrieve multi-chunk segments, providing better context for the LLM during generation.
Where to Implement:

    After initial retrieval, combine relevant chunks into larger segments.

Pros:

    Contextual Richness: Provides comprehensive information.
    Better Answer Generation: Improves the LLM's ability to generate accurate responses.

Cons:

    Potential for Exceeding Context Window: Be mindful of the LLM's maximum input size.
    Processing Overhead: Combining large texts may increase computation time.

In [24]:
class RelevantSegmentExtraction:
    def __init__(self, documents):
        self.documents = documents

    def extract(self):
        # Combine the content of the top relevant documents
        combined_content = " ".join([doc.page_content for doc in self.documents])
        return combined_content

# Example usage
def relevant_segment_extraction_example():
    # Assuming 'retrieved_docs' is obtained from previous steps
    rse = RelevantSegmentExtraction(retrieved_docs)
    segments = rse.extract()
    print("Combined Segments:")
    print(segments)

# Call the example function (make sure 'retrieved_docs' is defined)
# relevant_segment_extraction_example()


# 6. Intelligent Re-ranking for Better Retrieval

Use Intelligent Re-ranking to reorder retrieved documents based on their relevance, ensuring the most pertinent information is considered first.
Where to Implement:

    After retrieval, re-score documents using the LLM and re-rank them.

Pros:

    Increased Accuracy: Prioritizes the most relevant information.
    Dynamic Adaptation: Adjusts to nuances in the query.

Cons:

    Computational Overhead: Additional LLM calls for scoring.
    Latency: May increase total response time.

In [25]:
class IntelligentReranking:
    def __init__(self, llm):
        self.llm = llm

    def rerank(self, query, retrieved_docs):
        scored_docs = []
        for doc in retrieved_docs:
            prompt = f"On a scale of 1 to 10, how relevant is the following document to the query?\n\nQuery: {query}\n\nDocument: {doc.page_content}\n\nRelevance Score:"
            messages = [HumanMessage(content=prompt)]
            response = self.llm(messages)
            score_text = response.content.strip() if hasattr(response, 'content') else response.strip()
            try:
                score = float(score_text)
            except ValueError:
                score = 0  # Default to 0 if parsing fails
            scored_docs.append((doc, score))
        # Sort documents by score
        reranked_docs = [doc for doc, score in sorted(scored_docs, key=lambda x: x[1], reverse=True)]
        return reranked_docs

# Intelligent Re-ranking Example
def intelligent_reranking_example():
    reranker = IntelligentReranking(llm)
    question = "Explain how LangSmith can help with testing."
    reranked_docs = reranker.rerank(question, retrieved_docs)
    top_docs = reranked_docs[:5]
    print("Top Document After Re-ranking:")
    print(reranked_docs[0].page_content)
    return top_docs

# Call the example function and get 'top_docs'
top_docs = intelligent_reranking_example()

print(top_docs)


Top Document After Re-ranking:
The results highlight when the external symbolic tools can work reliably, knowing when to and how to use the tools are crucial, determined by the LLM capability.
[Document(metadata={}, page_content='The results highlight when the external symbolic tools can work reliably, knowing when to and how to use the tools are crucial, determined by the LLM capability.'), Document(metadata={}, page_content='ChatGPT Plugins and OpenAI API  function calling are good examples of LLMs augmented with tool use capability working in practice.'), Document(metadata={}, page_content='The workflow, implemented in LangChain, reflects what was previously described in the ReAct and MRKLs and combines CoT reasoning with tools relevant to the tasks:\n\nThe LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output.'), Document(metadata={}, page_content='This agent can use tools to browse the Internet, read documentation, ex

# 7. Ensemble Retrieval for Robustness

Incorporate Ensemble Retrieval by combining multiple retrieval methods, such as keyword-based and vector-based retrieval, to enhance the retrieval robustness.
Where to Implement:

    After individual retrievals, combine and re-rank results.

Pros:

    Robustness: Captures documents that might be missed by one method.
    Improved Recall: Increases the chance of retrieving relevant documents.

Cons:

    Complexity: Requires managing multiple retrieval systems.
    Potential Redundancy: May retrieve overlapping information.

In [26]:
def ensemble_retrieval_example():
    from rank_bm25 import BM25Okapi

    # Prepare documents for BM25
    tokenized_corpus = [doc.page_content.split() for doc in chunked_documents]
    bm25 = BM25Okapi(tokenized_corpus)

    query = "Explain how LangSmith can help with testing."
    tokenized_query = query.split()
    bm25_scores = bm25.get_scores(tokenized_query)
    bm25_scores = np.array(bm25_scores)
    bm25_top_indices = bm25_scores.argsort()[-5:][::-1]
    bm25_top_docs = [chunked_documents[i] for i in bm25_top_indices]

    # Retrieve using vector store
    vector_retrieved_docs = retriever.get_relevant_documents(query)

    # Combine results without duplicates
    def combine_documents(doc_list1, doc_list2):
        combined_docs = []
        seen_contents = set()
        for doc in doc_list1 + doc_list2:
            content = doc.page_content  # Use the content as a unique identifier
            if content not in seen_contents:
                seen_contents.add(content)
                combined_docs.append(doc)
        return combined_docs

    combined_docs = combine_documents(vector_retrieved_docs, bm25_top_docs)

    # Assign weights
    vector_weight = 0.7
    bm25_weight = 0.3

    # Create a scoring function
    combined_scores = {}
    for doc in combined_docs:
        content = doc.page_content  # Use document content as the key
        vector_score = 1 if doc in vector_retrieved_docs else 0
        bm25_score = 1 if doc in bm25_top_docs else 0
        combined_score = vector_weight * vector_score + bm25_weight * bm25_score
        combined_scores[content] = combined_score  # Use the content as the key

    # Sort documents based on combined scores
    ensemble_results = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
    top_docs = [doc for doc_content, score in ensemble_results[:5]]
    context = "\n\n".join([doc.page_content for doc in top_docs])

    # Update the prompt
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", "You are an assistant that answers questions based on retrieved context."),
        ("user", "Question: {question}\n\nContext:\n{context}")
    ])

    # Set up the chain
    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Invoke the chain
    response = chain.run(question=query, context=context)
    print("Ensemble Retrieval Response:")
    print(response)

# Call the example function
ensemble_retrieval_example()


Ensemble Retrieval Response:
Based on the provided context, it appears that we're discussing a topic unrelated to LangSmith or testing. However, I'm assuming that you'd like me to retrieve context about something called "LangSmith" and provide information on how it can help with testing.

Unfortunately, there's no relevant context provided about LangSmith. Nevertheless, if I were to make an educated guess, I might say that LangSmith could be a tool or service that assists with automated testing or quality assurance processes.

In the absence of concrete information, here are some hypothetical ways LangSmith might aid in testing:

1. **Automated Testing**: LangSmith could potentially help automate various types of tests, such as unit tests, integration tests, or end-to-end tests.
2. **Test Case Management**: It may enable users to manage and organize test cases more efficiently, making it easier to maintain a comprehensive set of tests.
3. **Code Review and Analysis**: This tool might p

# 8. Explainable Retrieval for Transparency

Add Explainable Retrieval to provide insights into why certain documents were retrieved, enhancing transparency and user trust.
Where to Implement:

    After retrieval and before presenting results to the user.

Pros:

    Transparency: Users understand the relevance of retrieved documents.
    Trust Building: Enhances user confidence in the system.

Cons:

    Additional Computation: Generating explanations requires extra processing.
    Possible Latency Increase: May affect overall response time.

In [27]:
class ExplainableRetrieval:
    def __init__(self, llm):
        self.llm = llm

    def retrieve_with_explanation(self, query, retrieved_docs):
        explanations = []
        for doc in retrieved_docs:
            prompt = f"Explain in one sentence why the following document is relevant to the query.\n\nQuery: {query}\n\nDocument: {doc.page_content}\n\nExplanation:"
            messages = [HumanMessage(content=prompt)]
            response = self.llm(messages)
            explanation = response.content.strip() if hasattr(response, 'content') else response.strip()
            explanations.append((doc, explanation))
        return explanations

# Explainable Retrieval Example
def explainable_retrieval_example(top_docs):
    explainable_retriever = ExplainableRetrieval(llm)
    question = "Explain how LangSmith can help with testing."
    explanations_and_docs = explainable_retriever.retrieve_with_explanation(question, top_docs)
    for doc, explanation in explanations_and_docs:
        print(f"Document: {doc.page_content}\nExplanation: {explanation}\n{'-'*80}")

    context_with_explanations = "\n\n".join([
        f"Document: {doc.page_content}\nExplanation: {explanation}"
        for doc, explanation in explanations_and_docs
    ])

    prompt_template = ChatPromptTemplate.from_messages([
        ("system", "You are an assistant that answers questions based on retrieved context and explanations."),
        ("user", "Question: {question}\n\nContext:\n{context_with_explanations}")
    ])

    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Invoke the chain
    response = chain.run(question=question, context_with_explanations=context_with_explanations)
    print("Explainable Retrieval Response:")
    print(response)

# Call the example function with 'top_docs'
explainable_retrieval_example(top_docs)


Document: The results highlight when the external symbolic tools can work reliably, knowing when to and how to use the tools are crucial, determined by the LLM capability.
Explanation: The document is relevant to the query because it provides information on how to utilize external symbolic tools effectively for testing, which is a key aspect of LangSmith's capabilities.
--------------------------------------------------------------------------------
Document: ChatGPT Plugins and OpenAI API  function calling are good examples of LLMs augmented with tool use capability working in practice.
Explanation: This document is relevant because it provides an example of how AI models, like LangSmith, can be used to augment human capabilities, specifically in the context of testing.
--------------------------------------------------------------------------------
Document: The workflow, implemented in LangChain, reflects what was previously described in the ReAct and MRKLs and combines CoT reasonin

9. Main Function Integrating All Components

In [30]:
def main():
    # Load documents
    loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
    docs = loader.load()

    # Proposition Chunking
    chunked_documents = []
    for doc in docs:
        chunker = PropositionChunking(doc.page_content)
        chunked_documents.extend(chunker.chunk())

    # Build vector store
    vector_store = FAISS.from_documents(chunked_documents, embeddings_model)
    retriever = vector_store.as_retriever(search_type="similarity")

    # HyDE query transformation
    hyde_transformer = HyDE(llm)
    original_query = "Tell me about the documents within: 2023-06-23-agent"
    transformed_query_doc = hyde_transformer.transform(original_query)

    # Embed the hypothetical document
    transformed_embedding = embeddings_model.embed_query(transformed_query_doc)

    # Retrieve documents using the transformed embedding
    retrieved_docs = vector_store.similarity_search_by_vector(transformed_embedding)

    # Intelligent re-ranking
    reranker = IntelligentReranking(llm)
    reranked_docs = reranker.rerank(original_query, retrieved_docs)

    # Prepare top documents for context
    top_docs_content = "\n\n".join([doc.page_content for doc in reranked_docs[:5]])

    # Generate answer
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", "You are an assistant that answers questions based on retrieved context."),
        ("user", "Question: {question}\n\nContext:\n{top_docs_content}")
    ])

    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Invoke the chain
    response = chain.run(question=original_query, top_docs_content=top_docs_content)
    print("Final Answer:")
    print(response)

# Call the main function
if __name__ == "__main__":
    main()


Final Answer:
Based on the provided context, I've retrieved some relevant information about documents within `2023-06-23-agent`.

It seems like there's a focus on organizing and managing files across different classes or projects. Some key points to take away are:

* Different classes should be stored in separate files (point 13).
* Important information should be saved immediately to files due to short-term memory limitations.
* The code within these files should be compatible with each other.

However, I couldn't find any specific documents or files mentioned within `2023-06-23-agent`. If you'd like to provide more context or details about what's inside this folder/file, I'll do my best to assist further!


Fun Nba api example: Moved to it's own notebook to see about the success of this

In [29]:
def integrate_data_into_pipeline(data_documents):
    # Use Proposition Chunking to split documents into sentences
    chunked_documents = []
    for doc in data_documents:
        chunker = PropositionChunking(doc.page_content)
        chunked_documents.extend(chunker.chunk())

    # Build a vector store with the chunked documents
    vector_store = FAISS.from_documents(chunked_documents, embeddings_model)
    retriever = vector_store.as_retriever(search_type="similarity")

    return retriever


def main_with_data_source():
    # Choose data source and player name
    player_name = "LeBron James"
    data_source = "nba_api"  # Options: "nba_api" or "web_scraping"

    # Initialize the appropriate data loader
    if data_source == "nba_api":
        data_loader = NBAAPIDataLoader(player_name)
    elif data_source == "web_scraping":
        data_loader = NBAWebScraperDataLoader(player_name)
    else:
        print("Invalid data source selected.")
        return

    # Load data into Documents
    data_documents = data_loader.load_data()
    if not data_documents:
        print("No data loaded.")
        return

    # Integrate data into the pipeline
    retriever = integrate_data_into_pipeline(data_documents)

    # Proceed with the rest of the pipeline
    # HyDE query transformation
    hyde_transformer = HyDE(llm)
    original_query = f"What is {player_name}'s average points per game?"
    transformed_query_doc = hyde_transformer.transform(original_query)

    # Embed the hypothetical document
    transformed_embedding = embeddings_model.embed_query(transformed_query_doc)

    # Retrieve documents using the transformed embedding
    retrieved_docs = retriever.get_relevant_documents(original_query)

    # Intelligent re-ranking
    reranker = IntelligentReranking(llm)
    reranked_docs = reranker.rerank(original_query, retrieved_docs)

    # Prepare top documents for context
    top_docs_content = "\n\n".join([doc.page_content for doc in reranked_docs[:5]])

    # Generate an answer using the RAG pipeline
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", "You are an assistant that answers questions based on retrieved context."),
        ("user", "Question: {question}\n\nContext:\n{top_docs_content}")
    ])

    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Invoke the chain to get the final answer
    response = chain.run(question=original_query, top_docs_content=top_docs_content)
    print("Final Answer:")
    print(response)

if __name__ == "__main__":
    main_with_data_source()


Final Answer:
Based on the provided context, I have retrieved information for LeBron James's points per game in different seasons. 

To calculate the average points per game across all available data, we need to add up his total points and divide by the number of games played.

Total points = 1822 + 1654 + 2175 + 1590 = 6241
Total games played = 71 + 79 + 80 + 55 = 285

Average points per game = Total points / Total games played 
= 6241 / 285 
= approximately 21.9 

So, LeBron James's average points per game across the provided seasons is around 21.9.
