In [1]:
import json
import os
from typing import Any

import psycopg2
import sqlparse
from smolagents import OpenAIServerModel, ToolCallingAgent, tool


db_params = {
    "host": "127.0.0.1",
    "port": os.environ["POSTGRES_PORT"],
    "database": os.environ["POSTGRES_DB"],
    "user": os.environ["POSTGRES_USER"],
    "password": os.environ["POSTGRES_PASSWORD"],
}


In [2]:
@tool
def get_unique_column_values(column: str) -> str:
    """Return a sorted list of unique values from a given column in the 'resumes' table.

    Args:
        column (str): The column name to extract unique values from.

    Returns:
        str: JSON-encoded list of unique values or error message if the column is invalid.

    """
    allowed_columns = {"id", "name", "gender", "title", "summary", "languages", "skills", "certifications", "hobbies"}

    if column not in allowed_columns:
        return f"Error: Column '{column}' is not allowed for unique value extraction."

    query = f'SELECT DISTINCT "{column}" FROM resumes ORDER BY "{column}"'  # noqa: S608

    conn = None
    try:
        conn = psycopg2.connect(**db_params)
        with conn.cursor() as cursor:
            cursor.execute(query)
            values = cursor.fetchall()
            return json.dumps([v[0] for v in values], ensure_ascii=False)
    except psycopg2.Error as e:
        return f"Database error: {e!s}"
    finally:
        if conn:
            conn.close()


@tool
def validate_sql_query(query: str) -> str:
    """Check the syntax of a SQL query without executing it on the table. Useful for safe validation.

    Args:
        query (str): The SQL query to validate.

    Returns:
        str: A message indicating whether the query is syntactically valid or describing the syntax error.

    """
    conn = psycopg2.connect(**db_params)
    try:
        with conn.cursor() as cursor:
            cursor.execute("EXPLAIN " + query)
    except psycopg2.Error as e:
        return f"Ошибка в SQL-запросе: {e!s}"
    finally:
        conn.close()
    return "Запрос синтаксически корректен."


@tool
def get_unique_column_values(column: str) -> str:
    """Return a sorted list of unique values from a given column in the 'resumes' table.

    Args:
        column (str): The column name to extract unique values from.

    Returns:
        str: JSON-encoded list of unique values or error message if the column is invalid.

    """
    allowed_columns = {"id", "name", "gender", "title", "summary", "languages", "skills", "certifications", "hobbies"}

    if column not in allowed_columns:
        return f"Error: Column '{column}' is not allowed for unique value extraction."

    query = f'SELECT DISTINCT "{column}" FROM resumes ORDER BY "{column}"'  # noqa: S608

    conn = None
    try:
        conn = psycopg2.connect(**db_params)
        with conn.cursor() as cursor:
            cursor.execute(query)
            values = cursor.fetchall()
            return json.dumps([v[0] for v in values], ensure_ascii=False)
    except psycopg2.Error as e:
        return f"Database error: {e!s}"
    finally:
        if conn:
            conn.close()


@tool
def sql_engine(query: str) -> str:
    """Execute validated SQL SELECT queries on the 'resumes' table and returns results as a JSON string.

    Table Schema for 'resumes':
        - id (integer)              - primary key
        - contact_info (jsonb)      - {"email": "...", "phone": "...", ...}
        - experience (jsonb)        - list of work-experience blocks
        - education (jsonb)         - list of education blocks
        - portfolio (jsonb)         - list of projects {"name", "link", "description"}
        - languages (ARRAY)         - e.g. ['Русский – родной', 'Английский – B2']
        - skills (ARRAY)            - e.g. ['Kubernetes', 'Python']
        - certifications (ARRAY)    - certificate names
        - hobbies (ARRAY)           - list of hobbies
        - name (text)               - full name
        - gender (text)             - gender
        - title (text)              - current/target job title
        - summary (text)            - resume summary/about section

    Examples:
        >>> sql_engine(\'''
            SELECT id, name, title
            FROM resumes
            WHERE 'Kubernetes' = ANY(skills)
            ORDER BY id
            LIMIT 5;
        \''')
        "[ [4, 'Маргарита Кирилловна Дорофеева', 'DevOps Engineer'], ... ]"

    Important:
        • Only SELECT queries are allowed.
        • Never pass raw user input directly without validation.
        • Avoid requesting more than 1000 rows per call.
        • Double-quote column names if they contain uppercase or non-ASCII characters.

    Args:
        query (str): A valid SQL SELECT query.

    Returns:
        str:
            - JSON-encoded list of result rows for SELECT queries.
            - Or an error message string for invalid or forbidden queries.

    """
    # GUARDRAIL
    try:
        parsed = sqlparse.parse(query)
        if len(parsed) != 1:
            return "Error: Only one SQL statement is allowed."

        stmt = parsed[0]
        if stmt.get_type() != "SELECT":
            return "Error: Only SELECT queries are permitted."

    except Exception as e:  # noqa: BLE001
        return f"Error while parsing SQL query: {e!s}"

    ###
    output = ""
    conn = None
    try:
        conn = psycopg2.connect(**db_params)
        with conn.cursor() as cursor:
            cursor.execute(query)
            try:
                rows: list[tuple[Any, ...]] = cursor.fetchall()
                output = rows
            except psycopg2.ProgrammingError:
                output = "Query executed successfully, but no results to fetch."

        conn.commit()

    except psycopg2.errors.SyntaxError as e:
        output = f"Syntax error in SQL query: {e!s}"
    except psycopg2.Error as e:
        output = f"Database error: {e!s}"
    finally:
        if conn:
            conn.close()

    return json.dumps(output, ensure_ascii=False)


In [3]:
model = OpenAIServerModel(
    model_id=os.getenv("AGENT_LLM_API_MODEL"),
    api_base=os.getenv("AGENT_LLM_API_URL"),
    api_key=os.getenv("AGENT_LLM_API_TOKEN"),
    flatten_messages_as_text=True,
)

In [8]:
agent = ToolCallingAgent(
    tools=[sql_engine, get_unique_column_values],
    model=model,
    planning_interval=1,
    description=(
        "You are an HR assistant that helps users analyze and explore resume data from a database.\n"
        "You can execute validated SQL SELECT queries using `sql_engine`"
        "NEVER assume or hallucinate any data. Always rely on actual results from the database tool `sql_engine.\n"
        "Use short, factual answers based strictly on the database output.\n"
        "It is allowed to small-talk"
    ),
    max_steps=5,
)

In [9]:
agent.run("как дела?")

'Хорошо, спасибо! А у вас как дела?'