# Benchmark for task type prediction using LLM

In [2]:
import os
import sys

module_path = os.path.abspath(os.path.join(os.sep.join(['..', '..'])))
if module_path not in sys.path:
    sys.path.append(module_path)

from langchain.chat_models import init_chat_model
from fedot_llm.benchmarks import TaskTypeBenchmark

## Easy mode 
Each description originally explicitly states the task, or the goal description was added manually. This is to show that model can confidently understand a well-described user task.

In [3]:
datasets_metadata_path = '../../datasets/dataset_descriptions/task_type_descriptions/task_type_descriptions.json'
benchmark = TaskTypeBenchmark(
    model=init_chat_model(
        model="llama3.1",
        model_provider='ollama'),
    datasets_metadata_path = datasets_metadata_path,
    output='debug')
predictions = await benchmark.predict()

Fetching datasets


100%|██████████| 10/10 [00:00<00:00, 9984.06it/s]

{'event': 'on_chain_start', 'data': {'input': {'detailed_description': '\nThis dataset contains the hourly and daily count of rental bikes between years 2011 and 2012 in Capital bikeshare system with the corresponding weather and seasonal information.\nThe aim is to predict the count of total rental bikes including both casual and registered.\n'}}, 'name': 'dataset_task_type_chain', 'tags': [], 'run_id': '32a15942-9b18-4890-8ebc-fd5ba0c8f3c1', 'metadata': {}, 'parent_ids': []}
{'event': 'on_prompt_start', 'data': {'input': {'detailed_description': '\nThis dataset contains the hourly and daily count of rental bikes between years 2011 and 2012 in Capital bikeshare system with the corresponding weather and seasonal information.\nThe aim is to predict the count of total rental bikes including both casual and registered.\n'}}, 'name': 'ChatPromptTemplate', 'tags': ['seq:step:1'], 'run_id': '07c32235-c6a6-4010-ade8-9e39bc6c5be5', 'metadata': {}, 'parent_ids': ['32a15942-9b18-4890-8ebc-fd5ba0




{'event': 'on_chat_model_stream', 'data': {'chunk': AIMessageChunk(content='Regression', id='run-2d3e2f8f-0721-433b-9891-ffd3d28601e2')}, 'run_id': '2d3e2f8f-0721-433b-9891-ffd3d28601e2', 'name': 'ChatOllama', 'tags': ['seq:step:2'], 'metadata': {'ls_provider': 'ollama', 'ls_model_name': 'llama3.1', 'ls_model_type': 'chat', 'ls_temperature': None}, 'parent_ids': ['32a15942-9b18-4890-8ebc-fd5ba0c8f3c1']}
{'event': 'on_parser_start', 'data': {}, 'name': 'StrOutputParser', 'tags': ['seq:step:3'], 'run_id': '5e0de2ec-5149-4b2c-9bdc-d75d9ce7b63e', 'metadata': {}, 'parent_ids': ['32a15942-9b18-4890-8ebc-fd5ba0c8f3c1']}
{'event': 'on_parser_stream', 'run_id': '5e0de2ec-5149-4b2c-9bdc-d75d9ce7b63e', 'name': 'StrOutputParser', 'tags': ['seq:step:3'], 'metadata': {}, 'data': {'chunk': 'Regression'}, 'parent_ids': ['32a15942-9b18-4890-8ebc-fd5ba0c8f3c1']}
{'event': 'on_chain_start', 'data': {}, 'name': 'RunnableLambda', 'tags': ['seq:step:4'], 'run_id': '98a7debd-533b-47df-b977-a67070fcadf4', 'me

In [4]:
benchmark.display_results(result = predictions)

f1: 0.9090909090909091


Dataset: bike_sharing
Task Description: 
This dataset contains the hourly and daily count of rental bikes between years 2011 and 2012 in Capital bikeshare system with the corresponding weather and seasonal information.
The aim is to predict the count of total rental bikes including both casual and registered.

Target type: regression
Predicted type: regression


Dataset: auto_mpg
Task Description: 
Revised from CMU StatLib library, data concerns city-cycle fuel consumption
This dataset is a slightly modified version of the dataset provided in the StatLib library.  In line with the use by Ross Quinlan (1993) in predicting the attribute "mpg", 8 of the original instances were removed because they had unknown values for the "mpg" attribute.  The original dataset is available in the file "auto-mpg.data-original".
"The data concerns city-cycle fuel consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5 continuous attributes." (Quin

## Hard mode 
Description only contains notes on data or vaguely describes the target, any explicit statement was removed. It is expected that the model should understand the task mostly from minial and undeited raw context and the data itself.

In [6]:
datasets_metadata_path = '../../datasets/dataset_descriptions/task_type_descriptions/task_type_descriptions_hard.json'
benchmark = TaskTypeBenchmark(
    model=init_chat_model(
        model="llama3.1",
        model_provider='ollama'),
    datasets_metadata_path = datasets_metadata_path,
    output='debug')
predictions = await benchmark.predict()

Fetching datasets


100%|██████████| 10/10 [00:00<00:00, 9974.56it/s]

{'event': 'on_chain_start', 'data': {'input': {'detailed_description': '\nThis dataset contains the hourly and daily count of rental bikes between years 2011 and 2012 in Capital bikeshare system with the corresponding weather and seasonal information.\n'}}, 'name': 'dataset_task_type_chain', 'tags': [], 'run_id': 'dc9f2c48-eaa4-40c2-b7ee-ed16740acd59', 'metadata': {}, 'parent_ids': []}
{'event': 'on_prompt_start', 'data': {'input': {'detailed_description': '\nThis dataset contains the hourly and daily count of rental bikes between years 2011 and 2012 in Capital bikeshare system with the corresponding weather and seasonal information.\n'}}, 'name': 'ChatPromptTemplate', 'tags': ['seq:step:1'], 'run_id': '8f65ab08-c275-4d00-bdcf-533679c71394', 'metadata': {}, 'parent_ids': ['dc9f2c48-eaa4-40c2-b7ee-ed16740acd59']}
{'event': 'on_prompt_end', 'data': {'output': ChatPromptValue(messages=[SystemMessage(content='Your task is to define whether the task is regression or classification. Only ans




{'event': 'on_chat_model_stream', 'data': {'chunk': AIMessageChunk(content='Regression', id='run-9034cdd8-4cf1-41aa-8229-92e30f5d3c50')}, 'run_id': '9034cdd8-4cf1-41aa-8229-92e30f5d3c50', 'name': 'ChatOllama', 'tags': ['seq:step:2'], 'metadata': {'ls_provider': 'ollama', 'ls_model_name': 'llama3.1', 'ls_model_type': 'chat', 'ls_temperature': None}, 'parent_ids': ['dc9f2c48-eaa4-40c2-b7ee-ed16740acd59']}
{'event': 'on_parser_start', 'data': {}, 'name': 'StrOutputParser', 'tags': ['seq:step:3'], 'run_id': 'a57d8b2b-6389-45c8-a8a4-68ea69285b41', 'metadata': {}, 'parent_ids': ['dc9f2c48-eaa4-40c2-b7ee-ed16740acd59']}
{'event': 'on_parser_stream', 'run_id': 'a57d8b2b-6389-45c8-a8a4-68ea69285b41', 'name': 'StrOutputParser', 'tags': ['seq:step:3'], 'metadata': {}, 'data': {'chunk': 'Regression'}, 'parent_ids': ['dc9f2c48-eaa4-40c2-b7ee-ed16740acd59']}
{'event': 'on_chain_start', 'data': {}, 'name': 'RunnableLambda', 'tags': ['seq:step:4'], 'run_id': 'bdf275fd-66a4-4d5d-9803-0080aa6bd024', 'me

In [7]:
benchmark.display_results(result = predictions)

f1: 0.7692307692307693


Dataset: bike_sharing
Task Description: 
This dataset contains the hourly and daily count of rental bikes between years 2011 and 2012 in Capital bikeshare system with the corresponding weather and seasonal information.

Target type: regression
Predicted type: regression


Dataset: auto_mpg
Task Description: 
Revised from CMU StatLib library, data concerns city-cycle fuel consumption
This dataset is a slightly modified version of the dataset provided in the StatLib library.  In line with the use by Ross Quinlan (1993) in predicting the attribute "mpg", 8 of the original instances were removed because they had unknown values for the "mpg" attribute.  The original dataset is available in the file "auto-mpg.data-original".
"The data concerns city-cycle fuel consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5 continuous attributes." (Quinlan, 1993)

Target type: regression
Predicted type: regression


Dataset: abalone
Task Descr