## Install dependencies

In [52]:
%pip install cohere --q --disable-pip-version-check

# Used at cohere to simplify rag systems.

Note: you may need to restart the kernel to use updated packages.


## Utilities

In [53]:
def print_tool_calls(tool_calls):
    # Determine the message based on the number of tools calls
    if len(tool_calls) > 1:
        print("The model suggests making PARALLEL QUERIES!!!")
    else:
        print("The model suggests making SINGLE TOOL CALL!!!")

    
    for i, tool_call in enumerate(tool_calls):
        # If there's more than one tool call separate each with a header
        if(len(tool_calls)) > 1:
            print(f"== Parallel Tool Call #{i+1}")


        # Print the tool call name and "With this code:" on the same line
        if tool_call.name == "python_interpreter":
            print(f"{tool_call.name} with this code:")
            code = tool_call.parameters.get('code', '')
            print("\n", ''.join(f"  {line}" for line_num, line in enumerate(code.splitlines())))
        else:
            # For non_python_interpreter tool calls, just print the parameters:
            print(f"{tool_call.name}")
            print(f"{tool_call.parameters}")


In [54]:
def print_step(step):
    print(f"\n\n\033[1;34m == STEP {step+1} \033[0m\n")  # Blue bold for step header

def print_plan(text):
    print(f"\033[1;32mThe plan is:\033[0m \033[1m{text}\033[0m\n")  # Green bold for "The plan is" and bold for the plan text

def print_final_answer(text):
    print(f"\n\n \033[1;32mThe final answer is:\033[0m \033[1m{text}\033[0m\n")  # Green bold for "The final answer is" and bold for the answer text

# def print_tool_results(tool_results):
#     print(f"\033[1;35mtool_results:\033[0m \033[1m{tool_results['outputs']}\033[0m")  # Magenta bold for "tool_results" and bold for outputs
def print_tool_results(tool_results):
    for result in tool_results:
        print(f"\033[1;35mtool_results:\033[0m \033[1m{result}\033[0m")


## Define tools for the RAG system:

### 1. Search Engine - Tavily

In [55]:
# Install the required package
%pip install tavily-python --q --disable-pip-version-check


Note: you may need to restart the kernel to use updated packages.


In [56]:

# Import the required class
from tavily import TavilyClient

# Initialize the client with your API key
tavily_client = TavilyClient(api_key="***")

# Define a web search engine
def web_search(query: str) -> list[dict]:
    response = tavily_client.search(query, max_results=3)['results']
    return response

# Define the tool description for the LLM
web_search_tool = {
    "name": "web_search",
    "description": "Returns a list of relevant document snippets for a textual query retrieved from the internet",
    "parameter_definitions": {
        "query": {
            "description": "Query to search the internet with",
            "type": "str",
            "required": True
        }
    }
}


### 2. Python Interpreter - Used by the RAG agent to write code and/or to access a Spreadsheet 

In [57]:
import io, contextlib

def python_interpreter(code: str) -> list[dict]:
    output = io.StringIO()
    try:
        # Redirect stdout to capture print statements
        with contextlib.redirect_stdout(output):
            exec(code, globals())
    except Exception as e:
        return [{
            "error": str(e),
            "executed_code": code
        }]
    
    # Get stdout
    return [{
        "console_output": output.getvalue(),
        "executed_code": code
    }]


#### Defining the Python Interpreter Tool:


In [58]:
python_interpreter_tool = {
    "name": "python_interpreter",
    "description": "Executes python code and returns the result. The code runs in a static sandbox without internet access and without interaction.",
    "parameter_definitions": {
        "code": {
            "description": "Python code to execute",
            "type": "str",
            "required": True
        }
    }
}


#### Displaying the first few raws of the dataset

In [59]:
import pandas as pd

file_path = './spotify_dataset.csv'
spotify_data = pd.read_csv(file_path)
spotify_data.head(3)


Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,...,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,...,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,...,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic


In [60]:
functions_map = {
    "web_search": web_search,
    "python_interpreter": python_interpreter
}

## Cohere Model to power Agentic RAG

In [61]:
# Ensure the dotenv usage:
%pip install python-dotenv


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [67]:
import cohere
import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv()

# Initialize Cohere client using an environment variable for the API key
api_key = os.getenv('COHERE_API_KEY')

# Check if the .env file is loaded
#print(f"Cohere API Key: {api_key}") 


co = cohere.Client(api_key=api_key)

# Specify the model
model = "command-r-plus"

# # Example API call: Generating text
# response = co.generate(
#     model=model,
#     prompt="Once upon a time",
#     max_tokens=50
# )



In [68]:
try:
    # Generate text using the Cohere API
    response = co.generate(
        model=model,
        prompt="Once upon a time",
        max_tokens=50
    )
    
    # Access the generated text
    print(response.generations[0].text)
    
except cohere.CohereError as e:
    print(f"An error occurred: {e.message}")

Once upon a time, in a magical kingdom far away, there lived a brave and kind-hearted princess named Ella. Ella had a loving heart and a strong sense of justice, always standing up for those who couldn't stand up for themselves. She


#### Complex User Query

In [69]:
message = """What's the age and citizenship of the artists who had the top 3 most streamed songs on Spotify in 2023

You have access to a dataset with information about Spotify songs from the past 10 years, located at ./spotify_dataset.csv.
You also have access to the internet.
You must use the dataset when you can, and if stuck you can use the internet.
Remember to inspect the dataset to understand its structure before trying to query it. Take it step by step.
"""


#### Model Plan Execution:


In [70]:
response = co.chat(
    model=model,
    message=message,
    tools=[web_search_tool, python_interpreter_tool],
    temperature=0,
)

print(response.text)

I will first inspect the dataset to understand its structure and see if it contains the information I need. Then, I will write and execute Python code to find the top 3 most streamed songs on Spotify in 2023 and the age and citizenship of the artists.


#### Processing Tool Calls in a Loop:


In [71]:
step = 0
while response.tool_calls:

    # Step id
    print_step(step)

    # The model plan
    print_plan(response.text)

    # Tool calls suggested by the model
    print_tool_calls(response.tool_calls)

    # Execute the tool calls
    tool_results = []
    for tool_call in response.tool_calls:
        outputs = functions_map[tool_call.name](**tool_call.parameters)
        tool_result = {"call": tool_call, "outputs": outputs}
        tool_results.append(tool_result)
        print_tool_results(tool_result['outputs'])

    # Let the model decide the next step: calling more tools? or answering?
    response = co.chat(
        model=model,
        message="",
        chat_history=response.chat_history,
        tools=[web_search_tool, python_interpreter_tool],
        tool_results=tool_results,
        temperature=0,
    )


    step+=1

#Print final answer
print_final_answer(response.text)




[1;34m == STEP 1 [0m

[1;32mThe plan is:[0m [1mI will first inspect the dataset to understand its structure and see if it contains the information I need. Then, I will write and execute Python code to find the top 3 most streamed songs on Spotify in 2023 and the age and citizenship of the artists.[0m

The model suggests making SINGLE TOOL CALL!!!
python_interpreter with this code:

   import pandas as pd    df = pd.read_csv('spotify_dataset.csv')  print(df.head())  print(df.columns)
[1;35mtool_results:[0m [1m{'console_output': "   Unnamed: 0                track_id                 artists  \\\n0           0  5SuOikwiRyPMVoIQDJUgSV             Gen Hoshino   \n1           1  4qPNDBW1i3p13qLCt0Ki3A            Ben Woodward   \n2           2  1iJBSr7s7jYXzM8EGcbK5b  Ingrid Michaelson;ZAYN   \n3           3  6lfxq3CG4xtTiEg7opyCyx            Kina Grannis   \n4           4  5vjLSffimiIP26QG5WcN2K        Chord Overstreet   \n\n                                          album_name  \\

### NOTE
##### From the answer you can see that the Agentic RAG was able to run the first 2 steps by using python interpreter tool call to process the .csv data and come up with the answer. 
##### However due to the usage of Free API key, we run out of tokens so The System Couldn't run the latest step which was the search engine tool call.