In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
import llamabot as lmb
from typing import Any, List, Dict
from pydantic import BaseModel, Field


class ToolsToCall(BaseModel):
    tool_names: List[str] = Field(..., description="The order in which to call tools.")
    tool_arguments: Dict[str, Any | None] = Field(
        ...,
        description="Arguments to pass to each tool. Use None for arguments that are not known ahead of time.",
    )


decision_bot = lmb.StructuredBot(
    pydantic_model=ToolsToCall,
    system_prompt=lmb.system(
        "Given the following message and available tools, "
        "pick the tool(s) that you need to call on "
        "and the arguments that you need for those. "
        "Any arguments that you do not know ahead of time, just leave blank. "
        "Not all tools need to be called; only call the tools that are relevant to the user's message."
    ),
    model_name="gpt-4o",
)


@lmb.tool
def calculate_total_with_tip(bill_amount: float, tip_rate: float) -> float:
    """Calculate the total amount to pay at a restaurant including tip.

    tip_rate should be a decimal between 0 and 1 (e.g., 0.15, which represents 15% tip rate).

    :param bill_amount: The original bill amount before tip.
    :param tip_rate: The tip rate as a decimal (e.g., 0.15).
    :returns: The total amount including tip.
    """
    if tip_rate < 0 or tip_rate > 1.0:
        raise ValueError("Tip rate must be between 0 and 1.0")
    return bill_amount * (1 + tip_rate)


@lmb.tool
def split_bill(total_amount: float, num_people: int) -> float:
    """Calculate how much each person needs to pay when splitting a bill evenly.

    :param total_amount: The total bill amount including tip.
    :param num_people: Number of people splitting the bill.
    :returns: The amount each person should pay.
    """
    return total_amount / num_people


funcs = [calculate_total_with_tip, split_bill]
tools = {func.__name__: func for func in funcs}

split_bill_only_prompt = (
    "My dinner was $2300 in total. Split the bill between 4 people."
)
calculate_total_only_prompt = (
    "My dinner was $2300 without tips. Calculate my total with an 18% tip."
)
split_and_calculate_prompt = "My dinner was $2300 without tips. Calculate my total with an 18% tip and split the bill between 4 people."

In [None]:
from llamabot.bot.agentbot import AgentBot

bot = AgentBot(
    system_prompt=lmb.system("You are my assistant with respect to restaurant bills."),
    functions=[calculate_total_with_tip, split_bill],
    model_name="gpt-4o",
)

bot(split_bill_only_prompt)

In [None]:
bot(split_and_calculate_prompt)

In [None]:
bot(calculate_total_only_prompt)

Another example of using the AgentBot

In [None]:
# Define some tools for web scraping and data analysis
import numpy as np
import httpx
from typing import List, Dict
import json
from loguru import logger


@lmb.tool
def scrape_stock_prices(symbol: str) -> List[float]:
    """Scrape historical stock prices from Yahoo Finance API.

    :param symbol: Stock ticker symbol
    :returns: List of closing prices
    """
    # Yahoo Finance API endpoint for historical data
    url = f"https://query1.finance.yahoo.com/v8/finance/chart/{symbol}"
    params = {
        "range": "100d",  # Get 100 days of data
        "interval": "1d",  # Daily intervals
    }

    try:
        with httpx.Client() as client:
            response = client.get(url, params=params)
            response.raise_for_status()
            data = response.json()

            # Extract closing prices from the response
            prices = data["chart"]["result"][0]["indicators"]["quote"][0]["close"]
            # Filter out None values and convert to float
            prices = [float(price) for price in prices if price is not None]
            return prices[-100:]  # Return last 100 prices

    except httpx.HTTPError as e:
        logger.error(f"HTTP error occurred: {e}")
        raise Exception(f"Failed to fetch data for {symbol}: {e}")
    except (KeyError, IndexError) as e:
        logger.error(f"Error parsing response: {e}")
        raise Exception(f"Failed to parse data for {symbol}: {e}")


@lmb.tool
def calculate_percentile(data: List[float], percentile: float) -> float:
    """Calculate the percentile value from a list of numbers.

    :param data: List of numerical values
    :param percentile: The percentile to calculate (between 0 and 100)
    :returns: The percentile value
    """
    return float(np.percentile(data, percentile))


@lmb.tool
def detect_outliers(data: List[float], threshold: float = 1.5) -> List[float]:
    """Detect outliers in data using IQR method.

    :param data: List of numerical values
    :param threshold: The IQR multiplier threshold for outlier detection
    :returns: List of outlier values
    """
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = q3 - q1
    lower_bound = q1 - threshold * iqr
    upper_bound = q3 + threshold * iqr
    return [x for x in data if x < lower_bound or x > upper_bound]


@lmb.tool
def summarize_statistics(data: List[float]) -> Dict[str, float]:
    """Calculate basic statistical measures for a dataset.

    :param data: List of numerical values
    :returns: Dictionary containing mean, median, std, min, and max
    """
    return {
        "mean": float(np.mean(data)),
        "median": float(np.median(data)),
        "std": float(np.std(data)),
        "min": float(np.min(data)),
        "max": float(np.max(data)),
    }


# Create an AgentBot for stock analysis
stats_bot = AgentBot(
    system_prompt=lmb.system(
        """You are a stock market analysis assistant. Help analyze stock price data
        by providing insights about their distribution, outliers, and basic statistics.
        Always explain your findings in plain English."""
    ),
    functions=[
        scrape_stock_prices,
        calculate_percentile,
        detect_outliers,
        summarize_statistics,
    ],
    model_name="gpt-4o",
)

# Ask the bot to analyze stock data
response = stats_bot(
    """Please analyze the last 100 days of MRNA and AAPL stock prices:
    1. Scrape the price data
    2. Calculate the 90th percentile price
    3. Detect any price outliers
    4. Provide basic statistical summary

    Please analyze this data and explain what you find in terms a retail investor would understand."""
)

In [None]:
print(response.content)

In [None]:
from Bio import Entrez

Entrez.email = "your_email@example.com"  # Replace with your email


@lmb.tool
def query_pubmed(query: str, retmax: int = 5) -> List[str]:
    """Query PubMed for articles matching a given search term.

    :param query: The search query
    :param retmax: The maximum number of results to return
    :returns: List of PubMed IDs
    """
    handle = Entrez.esearch(db="pmc", term=query, retmax=retmax)
    result = Entrez.read(handle)
    return result["IdList"]


result = query_pubmed("antibody engineering review article")
print(result)

In [None]:
from Bio import Entrez


def get_full_text_from_pmcid(pmcid):
    Entrez.email = "your_email@example.com"  # Replace with your email
    handle = Entrez.efetch(db="pmc", id=pmcid, retmode="xml")
    record = handle.read()
    handle.close()
    return record


pmcid = result[1]  # Replace with the actual PMC ID
full_text_xml = get_full_text_from_pmcid(pmcid)


from bs4 import BeautifulSoup


def extract_text_from_xml(xml_string: str) -> str:
    """Extract plain text from PMC XML.

    :param xml_string: XML string from PMC
    :returns: Plain text content of the article
    """
    soup = BeautifulSoup(xml_string, "xml")

    # Find all paragraphs
    paragraphs = soup.find_all("p")

    # Extract text from each paragraph
    text_content = []
    for p in paragraphs:
        text_content.append(p.get_text().strip())

    # Join paragraphs with newlines
    full_text = "\n\n".join(text_content)

    return full_text


full_text = extract_text_from_xml(full_text_xml)

print(full_text)

In [None]:
from chonkie import SDPMChunker


chunker = SDPMChunker()

In [None]:
chunks = chunker(full_text)

In [None]:
len(chunks[8].text)

In [None]:
# Implement graph construction now
import networkx as nx

from pydantic import BaseModel
from typing import List, Optional


class TextSpan(BaseModel):
    """A span of text from a source document.

    :param text: The verbatim text content
    :param start_char: Character offset where this span starts in the source
    :param end_char: Character offset where this span ends in the source
    :param source_id: Identifier for the source document
    """

    text: str = Field(..., description="The verbatim text content.")
    start_char: int = Field(
        ..., description="Character offset where this span starts in the source."
    )
    end_char: int = Field(
        ..., description="Character offset where this span ends in the source."
    )
    source_id: str = Field(..., description="Identifier for the source document.")


class Entity(BaseModel):
    """An entity node in the knowledge graph.

    :param id: Unique identifier for this entity
    :param name: Display name of the entity
    :param type: The type/category of entity (e.g. protein, gene, disease)
    :param mentions: Text spans where this entity is mentioned
    :param metadata: Optional additional attributes
    """

    id: str = Field(..., description="The unique identifier for this entity.")
    name: str = Field(..., description="The display name of the entity.")
    type: str = Field(..., description="The type/category of entity.")
    mentions: List[TextSpan] = Field(
        ..., description="The text spans where this entity is mentioned."
    )
    metadata: Optional[dict] = Field(
        None, description="Optional additional attributes."
    )


class Relation(BaseModel):
    """A relationship between entities in the knowledge graph.

    :param source: Entity ID for the source/subject
    :param target: Entity ID for the target/object
    :param relation_type: Type of relationship
    :param evidence: Text spans supporting this relationship
    :param confidence: Optional confidence score
    :param metadata: Optional additional attributes
    """

    source: str = Field(..., description="The source entity ID.")
    target: str = Field(..., description="The target entity ID.")
    relation_type: str = Field(..., description="The type of relationship.")
    evidence: List[TextSpan] = Field(
        ..., description="The evidence for the relationship."
    )
    confidence: Optional[float] = Field(
        None, description="The confidence score for the relationship."
    )
    metadata: Optional[dict] = Field(
        None, description="Optional additional attributes."
    )


class KnowledgeGraph(BaseModel):
    """A knowledge graph constructed from scientific papers.

    :param entities: Collection of entities in the graph
    :param relations: Collection of relationships between entities
    :param metadata: Optional metadata about the graph
    """

    entities: List[Entity] = Field(..., description="The entities in the graph.")
    relations: List[Relation] = Field(
        ..., description="The relationships in the graph."
    )
    metadata: Optional[dict] = Field(
        None, description="Optional metadata about the graph."
    )

    def to_networkx(self) -> nx.MultiDiGraph:
        """Convert the knowledge graph to a NetworkX MultiDiGraph.

        :returns: A NetworkX MultiDiGraph representation of the knowledge graph.
            Nodes are entities with their attributes stored as node properties.
            Edges are relations with their attributes stored as edge properties.
        """
        G = nx.MultiDiGraph()

        # Add entities as nodes
        for entity in self.entities:
            G.add_node(
                entity.id,
                name=entity.name,
                type=entity.type,
                mentions=entity.mentions,
                metadata=entity.metadata,
            )

        # Add relations as edges
        for relation in self.relations:
            G.add_edge(
                relation.source,
                relation.target,
                relation_type=relation.relation_type,
                evidence=relation.evidence,
                confidence=relation.confidence,
                metadata=relation.metadata,
            )

        return G

In [None]:
# Extract a mini-knowledge graph from the individual chunks
from tqdm import tqdm

kgs = []

kgbot = lmb.StructuredBot(
    pydantic_model=KnowledgeGraph,
    system_prompt=lmb.system("You are a biomedical knowledge graph builder."),
    model_name="gpt-4o-mini",
)
for chunk in tqdm(chunks):
    kg = kgbot(chunk.text)
    kgs.append(kg)

In [None]:
# Create a combined NetworkX MultiDiGraph
G = nx.MultiDiGraph()

# Add nodes and edges from each knowledge graph
for kg in kgs:
    # Add entities as nodes
    for entity in kg.entities:
        G.add_node(
            entity.name,  # Use entity name as node identifier
            type=entity.type,
            mentions=entity.mentions,
            metadata=entity.metadata,
        )

    # Add relations as edges
    for relation in kg.relations:
        # Find source and target entity names
        source_name = next(e.name for e in kg.entities if e.id == relation.source)
        target_name = next(e.name for e in kg.entities if e.id == relation.target)

        G.add_edge(
            source_name,
            target_name,
            relation_type=relation.relation_type,
            evidence=relation.evidence,
            confidence=relation.confidence,
            metadata=relation.metadata,
        )

G  # Return the populated graph

In [None]:
import hvplot.networkx as hvnx

hvnx.draw(G)

In [None]:
# Create a dictionary to store node relationships
node_relations = {}

# Iterate through each node in the graph
for node in G.nodes():
    relations = []

    # Get all outgoing edges
    for _, target, data in G.out_edges(node, data=True):
        relations.append((target, data["relation_type"], data["evidence"]))

    # Get all incoming edges
    for source, _, data in G.in_edges(node, data=True):
        relations.append((source, data["relation_type"], data["evidence"]))

    node_relations[node] = relations

from llamabot import StructuredBot
from pydantic import BaseModel


class NodeSummary(BaseModel):
    """Summary of a node's relationships and context in the knowledge graph."""

    node_name: str
    summary: str


# Create a StructuredBot with appropriate system prompt
summarizer = StructuredBot(
    """You are an expert at summarizing relationships between entities in a knowledge graph.
    Given information about a node and its relationships, generate a clear and concise summary
    that captures the key connections and their nature.""",
    NodeSummary,
)

# Store summaries for each node
node_summaries = {}

for node, relations in node_relations.items():
    # Format relationships into a readable string
    relations_text = "\n".join(
        [f"- {rel[0]} ({rel[1]}): {rel[2]}" for rel in relations]
    )

    # Create prompt describing the node and its relationships
    prompt = f"""Node: {node}

Relationships:
{relations_text}

Generate a concise summary of this node's role and relationships in the knowledge graph."""

    # Get summary from StructuredBot
    summary = summarizer(prompt)
    if summary:  # Check if we got a valid response
        node_summaries[node] = summary.summary

# Print summaries
for node, summary in node_summaries.items():
    print(f"\n{node}:")
    print(summary)

In [None]:
# Perform community detection using networkx
from networkx.algorithms import community

# Find communities using Louvain method (similar to Leiden)
communities = community.louvain_communities(G)

# Create mapping of community ID to node names
community_mapping = {}
for community_id, nodes in enumerate(communities):
    community_mapping[community_id] = list(nodes)

# Print communities
for community_id, nodes in community_mapping.items():
    print(f"\nCommunity {community_id}:")
    print("\n".join(f"- {node}" for node in nodes))

In [None]:
# Create community summaries by combining node summaries
community_summaries = {}

from llamabot import StructuredBot
from pydantic import BaseModel


class CommunitySummary(BaseModel):
    """Summary of a community of nodes in the knowledge graph."""

    summary: str = Field(..., description="A concise summary of the community.")
    community_name: str = Field(
        ..., description="A name for the community that captures the key themes."
    )


community_summarizer = StructuredBot(
    """You are an expert at analyzing and summarizing groups of related concepts.
    Given a set of nodes and their summaries from a knowledge graph, generate a concise
    summary that captures the key themes and relationships that unite this community.""",
    CommunitySummary,
)

for community_id, nodes in community_mapping.items():
    # Gather summaries for nodes in this community
    community_node_summaries = {
        node: node_summaries.get(node, "No summary available") for node in nodes
    }

    # Format the summaries into a prompt
    prompt = f"""Community {community_id} contains the following nodes and their summaries:

{chr(10).join(f'Node: {node}{chr(10)}Summary: {summary}{chr(10)}' for node, summary in community_node_summaries.items())}

Generate a concise summary that captures the key themes and relationships that unite this community of nodes."""

    # Get summary from StructuredBot
    summary = community_summarizer(prompt)
    if summary:  # Check if we got a valid response
        community_summaries[community_id] = summary.summary

# Print community summaries
print("\nCommunity Summaries:")
for community_id, summary in community_summaries.items():
    print(f"\nCommunity {community_id}:")
    print(summary)

In [None]:
import llamabot as lmb

query_bot = lmb.QueryBot(
    system_prompt=lmb.system("You are a biomedical knowledge graph explorer."),
    model_name="gpt-4o-mini",
    collection_name="biomedical-knowledge-graph",
)

In [None]:
query_bot.docstore.extend([c.text for c in chunks])
query_bot.docstore.extend(list(node_summaries.values()))
query_bot.docstore.extend(list(community_summaries.values()))

# query_bot("Tell me about the major communities in the knowledge graph.")

In [None]:
query_bot.docstore.table.create_fts_index(field_names=["document"])

In [None]:
query_bot(
    "Tell me about NADPH and FAD and what the paper says about their role in fibrosis. Return in tabular format."
)

In [None]:
query_bot("How do we use ImageJ and flow cytometry together to measure NADPH?")

In [None]:
query_bot(
    "What stain should I be using to stain NADPH, and what fluorophore is it usually coupled to?"
)