In [1]:
import sqlite3
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel
from dotenv import load_dotenv
import os
import json

load_dotenv()
cohere_token = os.getenv("cohere_key")

# Define the agent
model = LiteLLMModel(
    model_id="command-r",
    api_key=cohere_token,
)
agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)

# Define the function to choose the best algorithm
def choose_best_algorithm(train_db_file="train_mTSP.sqlite3", test_db_file="test_mTSP.sqlite3"):

    conn_train = sqlite3.connect(train_db_file)
    cursor_train = conn_train.cursor()

    # Select instances from the training database
    cursor_train.execute("""
        SELECT instance_id, nr_cities, nr_salesmen, average_distance, stddev_distance, density, salesmen_ratio, 
               bounding_box_area, aspect_ratio, spread, cluster_compactness, mst_total_length, entropy_distance_matrix 
        FROM instances 
    """)
    train_instances = cursor_train.fetchall()
    train_instances_with_columns = [
        {
            "instance_id": row[0], 
            "nr_cities": row[1], 
            "nr_salesmen": row[2],
            "average_distance": row[3],
            "stddev_distance": row[4],
            "density": row[5],
            "salesmen_ratio": row[6],
            "bounding_box_area": row[7],
            "aspect_ratio": row[8],
            "spread": row[9],
            "cluster_compactness": row[10],
            "mst_total_length": row[11],
            "entropy_distance_matrix": row[12]
        } for row in train_instances
    ]

    # Select algorithms from the training database
    cursor_train.execute("""
        SELECT instance_id, strategy, total_cost, normalized_cost, time_taken, distance_gap, efficiency 
        FROM algorithms 
        WHERE instance_id IN (SELECT instance_id FROM instances)
    """)
    train_algorithms_data = cursor_train.fetchall()
    train_algorithms_with_columns = [
        {
            "instance_id": row[0],
            "strategy": row[1],
            "total_cost": row[2],
            "normalized_cost": row[3],
            "time_taken": row[4],
            "distance_gap": row[5],
            "efficiency": row[6],
        }
        for row in train_algorithms_data
    ]


    conn_train.close()
    conn_test = sqlite3.connect(test_db_file)
    cursor_test = conn_test.cursor()

    # Select instances from the test database
    cursor_test.execute("""
        SELECT instance_id, nr_cities, nr_salesmen, average_distance, stddev_distance, density, salesmen_ratio, 
               bounding_box_area, aspect_ratio, spread, cluster_compactness, mst_total_length, entropy_distance_matrix 
        FROM instances 
    """)
    test_instances = cursor_test.fetchall()
    test_instances_with_columns = [
        {
            "instance_id": row[0], 
            "nr_cities": row[1], 
            "nr_salesmen": row[2],
            "average_distance": row[3],
            "stddev_distance": row[4],
            "density": row[5],
            "salesmen_ratio": row[6],
            "bounding_box_area": row[7],
            "aspect_ratio": row[8],
            "spread": row[9],
            "cluster_compactness": row[10],
            "mst_total_length": row[11],
            "entropy_distance_matrix": row[12]
        } for row in test_instances
    ]

    conn_test.close()

    # Prepare the data for the agent
    data = {
        "train_instances": train_instances_with_columns,
        "train_algorithms": train_algorithms_with_columns,
        "test_instances": test_instances_with_columns,
    }
    data_str = json.dumps(data, indent=4)

    # Task for the agent
    task = f"""
    You are an AI agent tasked with selecting the best algorithm for solving each instance of the mTSP problem.
    Below is the data from the training instances and their results for different strategies:

    {data_str}

    Your task is to:
    1. Analyze the performance of each strategy (Greedy, Branch and Cut, Ant Colony, KMeans-Greedy) on the training instances.
    2. Based on this analysis, recommend the best strategy for each test instance.
    3. Justify your recommendations based on the training data.
    4. Consider the following performance metrics: total cost, normalized_cost, time taken, efficiency, and distance gap. The total cost is the most important metric, followed by normalized_cost and efficiency. Time taken is less important, but it should be considered for strategies that are computationally expensive.
    5. Take into account that Branch and Cut and Ant Colony are more computationally expensive, while Greedy and KMeans-Greedy are faster but may not always yield the best results.
    6. Branch and Cut and Ant Colony have a 100 seconds time limit, so they may output a weaker solution if the time limit is reached or even no solution at all.

    Output your recommendations in the following format:
    [
    {{ "instance_id": <test_instance_id>, "recommended_strategy": <strategy>, "justification": <reason> }}
    ]   
    """

    response = agent.run(task)
    print(response)

choose_best_algorithm(train_db_file="train_mTSP.sqlite3", test_db_file="test_mTSP.sqlite3")


  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 