In [3]:
!ollama list

NAME                 ID              SIZE      MODIFIED       
llama3:latest        365c0bd3c000    4.7 GB    18 minutes ago    
deepseek-r1:7b       0a8c26691023    4.7 GB    26 hours ago      
deepseek-r1:7b-8k    d823319ccf43    4.7 GB    30 hours ago      


In [5]:
## time
from smolagents import OpenAIServerModel, CodeAgent, ToolCallingAgent, HfApiModel, tool, GradioUI
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import os

load_dotenv()

tool_model_id = os.getenv("TOOL_MODEL_ID")
reasoning_model_id = os.getenv("REASONING_MODEL_ID")
huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")

def get_model(model_id):
    using_huggingface = os.getenv("USE_HUGGINGFACE", "yes").lower() == "yes"
    if using_huggingface:
        return HfApiModel(model_id=model_id, token=huggingface_api_token)
    else:
        return OpenAIServerModel(
            model_id=model_id,
            api_base="http://localhost:11434/v1",
            api_key="ollama"
        )

# Create the reasoner for better RAG
reasoning_model = get_model(reasoning_model_id)
reasoner = CodeAgent(tools=[], model=reasoning_model, add_base_tools=False, max_steps=2)

# Initialize vector store and embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={'device': 'cpu'}
)

#db_dir = os.path.join(os.path.dirname(__file__),"chroma_db")
db_dir = "chroma_db"
vectordb = Chroma(persist_directory=db_dir, embedding_function=embeddings)

@tool
def rag_with_reasoner(user_query: str) -> str:
    """
    This is a RAG tool that takes in a user query and searches for relevant content from the vector database.
    The result of the search is given to a reasoning LLM to generate a response, so what you'll get back
    from this tool is a short answer to the user's question based on RAG context.

    Args:
        user_query: The user's question to query the vector database with.
    """
    # Search for relevant documents
    docs = vectordb.similarity_search(user_query, k=3)
    
    # Combine document contents
    context = "\n\n".join(doc.page_content for doc in docs)
    
    # Create prompt with context
    prompt = f"""Based on the following context, answer the user's question. Be concise and specific.
    If there isn't sufficient information, give as your answer a better query to perform RAG with.
    
Context:
{context}

Question: {user_query}

Answer:"""
    
    # Get response from reasoning model
    response = reasoner.run(prompt, reset=False)
    return response

# Create the primary agent to direct the conversation
tool_model = get_model(tool_model_id)

agent = CodeAgent(tools=[rag_with_reasoner], model=tool_model, max_steps=4, verbosity_level=2)

agent_input = "Compare and contrast the services offered by RankBoost and Omni Marketing"
agent_output = agent.run(agent_input)
print("Final output:")
print(agent_output)


Final output:
Omni Marketing


In [7]:
## registry.ollama.ai/library/deepseek-r1:7b-8k does not support tools
primary_agent = ToolCallingAgent(tools=[rag_with_reasoner], model=tool_model, add_base_tools=False, max_steps=3)

agent_input = "Compare and contrast the services offered by RankBoost and Omni Marketing"
agent_output = primary_agent.run(agent_input)
print("Final output:")
print(agent_output)


AgentGenerationError: Error in generating tool call with model:
Error code: 400 - {'error': {'message': 'registry.ollama.ai/library/deepseek-r1:7b-8k does not support tools', 'type': 'api_error', 'param': None, 'code': None}}