# khởi tạo agent

In [None]:
import anthropic
import pandas as pd

client = anthropic.Anthropic()
history = [] # History story

MODEL_NAME = "claude-3-7-sonnet-20250219"

# Định nghĩa hàm truy vấn

In [None]:
# 1. Hàm đọc CSV và tạo DataFrame
def get_dataframe_revit(file_path):
    try:
        df = pd.read_csv(file_path, encoding='utf-8')
        return df
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return None

# 2. Hàm lấy danh sách các cột trong DataFrame
def get_all_columns(df: pd.DataFrame) -> str:
    return ", ".join(str(item) for item in df.columns.tolist())

# 3. Hàm lấy các giá trị duy nhất trong một cột
def get_values_unique_in_column(df: pd.DataFrame, col: str) -> str:
    return ", ".join(str(item) for item in df[col].unique().tolist())

# 
def get_len_values_unique_in_column(df: pd.DataFrame, col: str) -> str:
    return str(len(df[col].unique().tolist()))

# 4. Hàm tìm kiếm giá trị trong cột
def get_elements_in_column(df: pd.DataFrame, col_name: str, value_search: str) -> str:
    category_column = [col for col in df.columns if col_name.lower() in col.lower()]
    if not category_column:
        return []
    values = [v for v in df[category_column[0]] if value_search.lower() in v.lower()]
    return ", ".join(str(item) for item in values)

def get_quantity_elements_in_column(df: pd.DataFrame, col_name: str, value_search: str) -> str:
    category_column = [col for col in df.columns if col_name.lower() in col.lower()]
    if not category_column:
        return []
    values = [v for v in df[category_column[0]] if value_search.lower() in v.lower()]
    return str( len(values) )

def get_total_area_elements(df : pd.DataFrame, column : str, element : str ) -> str:
  """
  Calculates the total area of elements whose Category contains "element" (case-insensitive).

  Args:
    data: A dictionary where keys are column names ('Column', 'Area')
          and values are dictionaries mapping indices to the corresponding values.
          Example:
          {'Column': {0: 'Revit Walls', ...}, 'Area': {0: 6.658051200000002, ...}}

  Returns:
    The total area of elements with "element" in their category.
  """
  total_area = 0
  data = df[[column, 'Area']].dropna().reset_index(drop=True).to_dict()
  categories = data.get(column, {})
  areas = data.get('Area', {})

  for index, category in categories.items():
    if isinstance(category, str) and element.lower() in category.lower():
      area = areas.get(index)
      if isinstance(area, (int, float)):
        total_area += area

  return str(total_area)

# Định nghĩa tools cho mô hình

In [None]:
# 5. Định nghĩa các tools cho Model.
tools = [
    {
        "name": "get_all_columns",
        "description": "Returns all column names and the quantity of columns in the data, The column name may be external_id, ElementId, Name, Category, CategoryId, Workset, Edited by ...",
        "input_schema": {
            "type": "object",
            "properties": {
                "column_names": {
                    "type": "string",
                    "description": "Columns name may be: dbId, external_id, ElementId, Name, Category, CategoryId, Workset, Edited by..."
                }
            },
            "required": ["column_names"]
        },
    },
    {
        "name": "get_values_unique_in_column",
        "description": "The column name may be external_id, ElementId, Name, Category, CategoryId, Workset, Edited by ...",
        "input_schema": {
            "type": "object",
            "properties": {
                "column_name": {
                    "type": "string",
                    "description": "The column name may be external_id, ElementId, Name, Category, CategoryId, Workset, Edited by ..."
                },
            },
            "required": ["column_name"]
        },
    },
    {
        "name": "get_len_values_unique_in_column",
        "description": "The column name may be external_id, ElementId, Name, Category, CategoryId, Workset, Edited by ...",
        "input_schema": {
            "type": "object",
            "properties": {
                "column_name": {
                    "type": "string",
                    "description": "Quantity of the only values in the column"
                },
            },
            "required": ["column_name"]
        },
    },
    {
        'name' : 'get_elements_in_column',
        'description' : '''Get all elements in the column containing the same elements as the input. Returns the list of all elements found, Question for example, take all the walls in the current data''',
        'input_schema' : {
            'type' : "object",
            'properties' : {
                'elements' : {
                    'type' : 'string',
                    'description': 'The search value, e.g. chairs, tables, windows, walls.',
                },
                'columns' : {
                    'type' : 'string',
                    "enum": ["Category", "Name", "Family Name", "Class"],
                    'description' : "The column names, e.g. Name, Category, Family Name, Workset, Class"
                },
            },
            'required' : ['elements']
        }
    },
    {
        'name' : 'get_quantity_elements_in_column',
        'description' : '''Get all elements in the column containing the same elements as the input, Returns the number of all attributes found, Questions for example, How many walls in data? or How many seats in the current data?.''',
        'input_schema' : {
            'type' : "object",
            'properties' : {
                'elements' : {
                    'type' : 'string',
                    'description': 'The search value, e.g. walls, chairs, tables, windows.',
                },
                'columns' : {
                    'type' : 'string',
                    "enum": ["Category", "Name", "Family Name", "Class"],
                    'description' : "The column names classification, e.g. Category, Name, Family Name, Workset, Class"
                },
            },
            'required' : ['elements']
        }
    },
    {
        'name' : 'get_total_area_elements',
        'description' : 
            """
                Calculates the total area of elements whose Category contains "element" (case-insensitive).

                Args:
                    data: A dictionary where keys are column names ('Column', 'Area')
                        and values are dictionaries mapping indices to the corresponding values.
                        Example:
                        {'Column': {0: 'Revit Walls', ...}, 'Area': {0: 6.658051200000002, ...}}

                Returns:
                    The total area of elements with "element" in their category.
            """
        ,'input_schema' : {
            'type' : "object",
            'properties' : {
                'column' : {
                    'type' : 'string'
                    ,'enum' : ['Category', 'Family Name', 'Workset']
                    ,'description': 'The column names, e.g. Category, Family Name, Workset.'
                },
                'element' : {
                    'type' : 'string',
                    'description' : "The search value, e.g. Walls, Floor."
                },
            },
            'required' : ['element']
        }
    }
]

# Gửi câu hỏi đến mô hình

In [None]:
# 6. Hàm gửi câu hỏi tới Model
def get_response(question: str, df: pd.DataFrame):
    # Nếu lịch sử rỗng, thêm context của DataFrame.

    history.append({"role": "user", "content": question})
    
    return client.messages.create(
        model=MODEL_NAME,
        max_tokens=64+32+32,
        tools=tools,
        messages=history,    
    )

# Định nghĩa hàm thực thi

In [None]:
# 7. Hàm thực hiện công cụ dựa trên tên tool và đầu vào
def execute_tool(tool_name, tool_input):
    path = 'data.csv'
    df = get_dataframe_revit(path)
    print(f"execute_tool: DataFrame loaded from path: {path}")
    
    if tool_name == "get_all_columns":
        return get_all_columns(df)
    elif tool_name == "get_values_unique_in_column":
        print("Tool input: ", tool_input)
        return get_values_unique_in_column(df, tool_input['column_name'])
    elif tool_name == "get_len_values_unique_in_column":
        print("Tool input: ", tool_input)
        return get_len_values_unique_in_column(df, tool_input['column_name'])
    elif tool_name == 'get_elements_in_column':
        print("Tool input: ", tool_input)
        return get_elements_in_column(df, tool_input['columns'], tool_input['elements'] )
    elif tool_name == 'get_quantity_elements_in_column':
        print("Tool input: ", tool_input)
        return get_quantity_elements_in_column(df, tool_input['columns'], tool_input['elements'])
    elif tool_name == 'get_total_area_elements':
        print("Tool input: ", tool_input)
        return get_total_area_elements(df, tool_input['column'], tool_input['element'])
    else:
        raise ValueError(f"Unknown tool: {tool_name}")

# Mô hình trả lời câu hỏi và lưu vào story

In [None]:
# 8. Hàm xử lý và in phản hồi từ model
def execute_response(response: object):
    print("*" * 100)
    print(response)
    print("*" * 100)
    for tool in response.content:
        if tool.type == "text":
            print("Execute_response:", tool.text)
        elif tool.type == "tool_use":
            tool_use = tool.name
            print(f"Tool used: {tool_use}")
            # Thực thi tool dựa trên tên và input đi kèm
            res = execute_tool(tool_use, tool.input)
            
            print(f"\nTool output: {res}")
            history.append({'role': "assistant", 'content': f"{response.content[0].text}, Result tool '{tool_use}': {res}"})
            return history[-1]['content']
        print()
    return ValueError("Lỗi rồi feen ơi !")

# Thực thi toàn bộ

## có bao nhiêu tường trong dữ liệu

In [52]:
def rep(question : str) -> str:
    df = pd.read_csv('data.csv')
    initial_response = get_response(question, df)
    # print(f"Question: {question}")
    return execute_response(initial_response)

question = "How many wall in data"
print(rep(question))

****************************************************************************************************
Message(id='msg_0143KWnBy6qGqc28NS1y8jUP', content=[TextBlock(citations=None, text="I'll find out how many walls are in the data for you. Let me count them:", type='text'), ToolUseBlock(id='toolu_01K4kszNB4E4NUmnTfzE63JX', input={'columns': 'Category', 'elements': 'Wall'}, name='get_quantity_elements_in_column', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1489, output_tokens=96))
****************************************************************************************************
Execute_response: I'll find out how many walls are in the data for you. Let me count them:

Tool used: get_quantity_elements_in_column
execute_tool: DataFrame loaded from path: data.csv
Tool input:  {'columns': 'Category', 'elements': 'Wall'}


## Lấy tất cả tường trong dữ liệu

In [None]:
question = "Get all wall in data"
print(rep(question))

****************************************************************************************************
Message(id='msg_013MMW99WycvwJzCQMDQX3uT', content=[TextBlock(citations=None, text="I'll get all walls in the data for you. Let me retrieve that information:", type='text'), ToolUseBlock(id='toolu_01RjoJ6msmmaTW89sVvUGApD', input={'columns': 'Category', 'elements': 'Wall'}, name='get_elements_in_column', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1536, output_tokens=92))
****************************************************************************************************
Execute_response: I'll get all walls in the data for you. Let me retrieve that information:

Tool used: get_elements_in_column
execute_tool: DataFrame loaded from path: data.csv
Tool input:  {'columns': 'Category', 'elements': 'Wall'}

Tool output: Re

## Tính tổng diện tích tường

In [None]:
# question = "Calculate the total area of the category containing the Wall attribute?"
question = "Calculate the total wall area?"

print(rep(question))

****************************************************************************************************
Message(id='msg_01FKtr4oDc3duNVJpREeLUoj', content=[TextBlock(citations=None, text="I'll calculate the total wall area for you.", type='text'), ToolUseBlock(id='toolu_01B4XTNjr1JC1vz7u2P4PziH', input={'element': 'Wall', 'column': 'Category'}, name='get_total_area_elements', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1448, output_tokens=85))
****************************************************************************************************
Execute_response: I'll calculate the total wall area for you.

Tool used: get_total_area_elements
execute_tool: DataFrame loaded from path: data.csv
Tool input:  {'element': 'Wall', 'column': 'Category'}

Tool output: 34.12638336000004
I'll calculate the total wall area for you., R

## Tính tổng diện tích tường trong Family Name

In [54]:
question = "Calculate the total wall area in Family Name"
print(rep(question))

****************************************************************************************************
Message(id='msg_01Y8gUoPZBMpz4oJ5v1iKfAW', content=[TextBlock(citations=None, text='I\'ll calculate the total wall area using the "Family Name" column as the filter parameter. Let me do that for you:', type='text'), ToolUseBlock(id='toolu_01MbqHL98GGGBnNSLaCzAnwS', input={'column': 'Family Name', 'element': 'Wall'}, name='get_total_area_elements', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1615, output_tokens=102))
****************************************************************************************************
Execute_response: I'll calculate the total wall area using the "Family Name" column as the filter parameter. Let me do that for you:

Tool used: get_total_area_elements
execute_tool: DataFrame loaded from p

# Có bao nhiêu Class trong dữ liệu

In [55]:
question = "How many class in data ?"
print(rep(question))

****************************************************************************************************
Message(id='msg_01JyDpAvsJXeNZvcAJ6vpxiZ', content=[TextBlock(citations=None, text="I'll check how many unique classes are in the data for you. Let me retrieve that information:", type='text'), ToolUseBlock(id='toolu_01Q45esXZrNhe7T3k7XSqjSN', input={'column_name': 'Class'}, name='get_len_values_unique_in_column', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1674, output_tokens=84))
****************************************************************************************************
Execute_response: I'll check how many unique classes are in the data for you. Let me retrieve that information:

Tool used: get_len_values_unique_in_column
execute_tool: DataFrame loaded from path: data.csv
Tool input:  {'column_name': 'Clas

In [46]:
tools = [
    {
        'name' : 'get_total_area_elements',
        'description' : 
            """
                Calculates the total area of elements whose Category contains "element" (case-insensitive).

                Args:
                    data: A dictionary where keys are column names ('Column', 'Area')
                        and values are dictionaries mapping indices to the corresponding values.
                        Example:
                        {'Column': {0: 'Revit Walls', ...}, 'Area': {0: 6.658051200000002, ...}}

                Returns:
                    The total area of elements with "element" in their category.
            """
        ,'input_schema' : {
            'type' : "object",
            'properties' : {
                'column' : {
                    'type' : 'string'
                    ,'enum' : ['Category', 'Family Name', 'Workset']
                    ,'description': 'The column names, e.g. Category, Family Name, Workset.'
                },
                'element' : {
                    'type' : 'string',
                    'description' : "The search value, e.g. Walls, Floor."
                },
            },
            'required' : ['element']
        }
    }
]

client.messages.create(
        model=MODEL_NAME,
        max_tokens=100,
        tools=tools,
        messages=[{'role' : 'user', 'content' : "Calculate the total area of the category containing the Wall attribute?"}]    
    )

Message(id='msg_01HtkWFrS8L6GgDsdoX2wvCm', content=[TextBlock(citations=None, text='I need to calculate the total area for categories containing "Wall". I\'ll use the available function to find this information.', type='text'), ToolUseBlock(id='toolu_016u8QysUPTBMRWHiAoqUHJR', input={'column': 'Category', 'element': 'Wall'}, name='get_total_area_elements', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=579, output_tokens=99))

In [44]:
df[['Family Name', 'Area']].dropna().reset_index(drop=True).to_dict()

{'Family Name': {0: 'Basic Wall',
  1: 'Basic Wall',
  2: 'Basic Wall',
  3: 'Basic Wall',
  4: 'Basic Wall',
  5: 'Basic Wall',
  6: 'Floor'},
 'Area': {0: 6.658051200000002,
  1: 4.7225712,
  2: 1.4709648000000022,
  3: 4.7225712,
  4: 7.107082560000035,
  5: 9.4451424,
  6: 10.071592760000014}}

In [42]:
df = pd.read_csv('data.csv', encoding='utf-8')
def get_total_area_elements(df : pd.DataFrame, column : str, element : str ) -> str:
  """
  Calculates the total area of elements whose Category contains "element" (case-insensitive).

  Args:
    data: A dictionary where keys are column names ('Column', 'Area')
          and values are dictionaries mapping indices to the corresponding values.
          Example:
          {'Column': {0: 'Revit Walls', ...}, 'Area': {0: 6.658051200000002, ...}}

  Returns:
    The total area of elements with "element" in their category.
  """
  total_area = 0
  data = df[[column, 'Area']].dropna().reset_index(drop=True).to_dict()
  categories = data.get(column, {})
  areas = data.get('Area', {})

  for index, category in categories.items():
    if isinstance(category, str) and element.lower() in category.lower():
      area = areas.get(index)
      if isinstance(area, (int, float)):
        total_area += area

  return total_area

get_total_area_elements(df, column='Class', element='room')

0

In [None]:
print('''Can search for that information in the "Category" column Or you can get information in the "Family Name", "Name", "Class column", ...
      \nWhich would typically identify elements like walls, window, .... ''')

Can search for that information in the "Category" column Or you can get information in the "Family Name", "Name", "Class column", ...
      
which would typically identify elements like walls, window, .... 


In [9]:
print(history[-1]['content'])

I'll help you retrieve all columns in the data and their classification information. Let me fetch that for you.
Result tool get_all_columns: Unnamed: 0, dbId, external_id, ElementId, Name, Category, CategoryId, Workset, Edited by, Image, Keynote, Export to IFC, Export to IFC As, IFC Predefined Type, IfcGUID, Model, Manufacturer, Comments, URL, Description, Cost, Mark, Color, Transparency, Glow, Smoothness, Shininess, Material Type, Class, Subclass, Behavior, Young modulus X, Young modulus Y, Young modulus Z, Poisson ratio X, Poisson ratio Y, Poisson ratio Z, Shear modulus X, Shear modulus Y, Shear modulus Z, Thermal expansion coefficient X, Thermal expansion coefficient Y, Thermal expansion coefficient Z, Unit weight, Minimum yield stress, Minimum tensile strength, Thermally Treated, Reduction factor for shear, Concrete compression, Lightweight, Shear strength modification, Young modulus, Poisson ratio, Shear modulus, Thermal expansion coefficient, Species, Grade, Bending, Compression 

In [17]:
df = pd.read_csv('data.csv')
question = "classification columns and search columns for work calculate area the walls"
initial_response = get_response(question, df)
print(f"Question: {question}")
print(execute_response(initial_response))

Question: classification columns and search columns for work calculate area the walls
****************************************************************************************************
Message(id='msg_01GQneGgd7j9WJ32kftPMU16', content=[TextBlock(citations=None, text="I'll help you identify classification columns and look for information about wall area calculations. Let me break this down more clearly.\n\nFirst, let me check what the main classification columns are in the data:", type='text'), ToolUseBlock(id='toolu_01PgAZAgA7dnARxXu3G1cRg1', input={'column_name': 'Category'}, name='get_values_unique_in_column', type='tool_use')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=7787, output_tokens=102))
****************************************************************************************************
Execute_response: I'll help you id

In [88]:
history

[{'role': 'user', 'content': 'How many walls in data?'},
 {'role': 'assistant',
  'content': "I'll help you find out how many walls are in the data. Let me check that information for you.\nResult tool get_quantity_elements_in_column: 6"},
 {'role': 'user', 'content': 'How many Family Name in data'},
 {'role': 'assistant',
  'content': "I'll help you find out how many unique Family Names are in the data. Let me check that information for you.\nResult tool get_len_values_unique_in_column: 507"},
 {'role': 'user', 'content': 'What is my previous question?'},
 {'role': 'user', 'content': 'How many walls in data'},
 {'role': 'user', 'content': 'Get all columns in data and classification '},
 {'role': 'assistant',
  'content': 'Your previous question was: "How many walls in data?"\n\nTo answer your current request about getting all columns in the data and their classification, I\'ll retrieve all the column names for you.\nResult tool get_all_columns: Unnamed: 0, dbId, external_id, ElementId,

In [87]:
print(initial_response.content[0].text)

Your previous question was: "How many walls in data?"

To answer your current request about getting all columns in the data and their classification, I'll retrieve all the column names for you.


In [79]:
len(history), history

(5,
 [{'role': 'user', 'content': 'How many walls in data?'},
  {'role': 'assistant',
   'content': "I'll help you find out how many walls are in the data. Let me check that information for you.\nResult tool get_quantity_elements_in_column: 6"},
  {'role': 'user', 'content': 'How many Family Name in data'},
  {'role': 'assistant',
   'content': "I'll help you find out how many unique Family Names are in the data. Let me check that information for you.\nResult tool get_len_values_unique_in_column: 507"},
  {'role': 'user', 'content': 'What is my previous question?'}])

In [78]:
df = pd.read_csv('data.csv')
question = "What is my previous question?"
initial_response = get_response(question, df)
print(f"Question: {question}")
execute_response(initial_response)

Question: What is my previous question?
****************************************************************************************************
Message(id='msg_01C5WJGot8RiwkcseV2HaZdY', content=[TextBlock(citations=None, text='Your previous question was "How many walls in data?" \n\nI answered this question by using the get_quantity_elements_in_column function to check the number of walls in the data, and the result showed there are 6 walls in the data.\n\nAfter that, you asked "How many Family Name in data" and I informed you that there are 507 unique Family Names in the data.', type='text')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1100, output_tokens=88))
****************************************************************************************************
Execute_response: Your previous question was "How many walls in data?" 

I an

In [80]:
df = pd.read_csv('data.csv')
question = "How many walls in data"
initial_response = get_response(question, df)
print(f"Question: {question}")
print(f"====================================================================\n")
execute_response(initial_response)

Question: How many walls in data

****************************************************************************************************
Message(id='msg_01Uo6GmujaDwVJQrSaxyfAh8', content=[TextBlock(citations=None, text='Your previous question was "How many walls in data?" \n\nI answered that question by checking the quantity of wall elements in the data, and the result showed that there are 6 walls in the data.', type='text')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=1106, output_tokens=46))
****************************************************************************************************
Execute_response: Your previous question was "How many walls in data?" 

I answered that question by checking the quantity of wall elements in the data, and the result showed that there are 6 walls in the data.



In [81]:
history

[{'role': 'user', 'content': 'How many walls in data?'},
 {'role': 'assistant',
  'content': "I'll help you find out how many walls are in the data. Let me check that information for you.\nResult tool get_quantity_elements_in_column: 6"},
 {'role': 'user', 'content': 'How many Family Name in data'},
 {'role': 'assistant',
  'content': "I'll help you find out how many unique Family Names are in the data. Let me check that information for you.\nResult tool get_len_values_unique_in_column: 507"},
 {'role': 'user', 'content': 'What is my previous question?'},
 {'role': 'user', 'content': 'How many walls in data'}]