In [1]:
import os
import sys

sys.path.append(os.path.join(os.getcwd(), '..'))

from schemas.feature import FeatureSelectionRequest
from agents.feature_agent import run_feature_agent
import pandas as pd

In [2]:
# 1. Load a small sample
df = pd.read_csv("../datasets/titanic.csv").head(500)
sample = df.to_dict(orient="list")

# 2. Create request (without pre-computed stats – will be filled automatically)
req = FeatureSelectionRequest(
    metadata={
        "dataset_name": "Titanic",
        "problem_type": "classification",
        "target_column": "Survived",
    },
    basic_stats={},
    data_sample=sample,
    max_features=4,
)

# 3. Run agent
resp = run_feature_agent(req)

[32m2025-05-25 23:02:32.724[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m:[36m185[0m - [1mProcessing request for dataset 'Titanic'[0m
[32m2025-05-25 23:02:32.730[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m:[36m194[0m - [1mComputing basic stats...[0m
[32m2025-05-25 23:02:32.730[0m | [1mINFO    [0m | [36mutils.mini_eda[0m:[36mcompute_basic_stats[0m:[36m35[0m - [1mComputing basic stats for 12 features[0m
[32m2025-05-25 23:02:32.744[0m | [1mINFO    [0m | [36mutils.mini_eda[0m:[36mcompute_basic_stats[0m:[36m101[0m - [1mBasic stats computed for 12 features[0m
[32m2025-05-25 23:02:32.756[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m:[36m204[0m - [1mTop MI-recommended features: ['Fare', 'PassengerId', 'Pclass', 'SibSp', 'Age', 'Parch'][0m
[32m2025-05-25 23:02:32.759[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m

In [7]:
print(resp.selected_features,"\n", resp.preprocessing_code, "\n", resp.reasoning)

[FeatureSpec(name='Fare', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.25), FeatureSpec(name='PassengerId', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.15), FeatureSpec(name='Pclass', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.1), FeatureSpec(name='SibSp', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.05)] 
 UEsDBBQAAAAAAFG4uVqunMPTtJ8AALSfAAALAAAAc2NoZW1hLmpzb257CiAgIl9fY2xhc3NfXyI6ICJDb2x1bW5UcmFuc2Zvcm1lciIsCiAgIl9fbW9kdWxlX18iOiAic2tsZWFybi5jb21wb3NlLl9jb2x1bW5fdHJhbnNmb3JtZXIiLAogICJfX2xvYWRlcl9fIjogIk9iamVjdE5vZGUiLAogICJjb250ZW50IjogewogICAgIl9fY2xhc3NfXyI6ICJkaWN0IiwKICAgICJfX21vZHVsZV9fIjogImJ1aWx0aW5zIiwKICAgICJfX2xvYWRlcl9fIjogIkRpY3ROb2RlIiwKICAgICJjb250ZW50IjogewogICAgICAidHJhbnNmb3JtZXJzIjogewogICAgICAgICJfX2NsYXNzX18iOiAibGlzdCIsCiAgICAgICAgIl9fbW9kdWxlX18iOiAiYnVpbHRpbnMiLAogICAgICAgICJfX2xvYWRlcl9fIjogIkxpc3RO

In [3]:
print([f.name for f in resp.selected_features])

['Fare', 'SibSp', 'Pclass', 'Parch']


In [4]:
print(resp.selected_features)

print(resp.preprocessing_code)

[FeatureSpec(name='Fare', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.25), FeatureSpec(name='SibSp', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.15), FeatureSpec(name='Pclass', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.1), FeatureSpec(name='Parch', dtype='numeric', origin='raw', transformer='StandardScaler', params={}, importance=0.05)]
UEsDBBQAAAAAAChVqFrA3RJlyaEAAMmhAAALAAAAc2NoZW1hLmpzb257CiAgIl9fY2xhc3NfXyI6ICJDb2x1bW5UcmFuc2Zvcm1lciIsCiAgIl9fbW9kdWxlX18iOiAic2tsZWFybi5jb21wb3NlLl9jb2x1bW5fdHJhbnNmb3JtZXIiLAogICJfX2xvYWRlcl9fIjogIk9iamVjdE5vZGUiLAogICJjb250ZW50IjogewogICAgIl9fY2xhc3NfXyI6ICJkaWN0IiwKICAgICJfX21vZHVsZV9fIjogImJ1aWx0aW5zIiwKICAgICJfX2xvYWRlcl9fIjogIkRpY3ROb2RlIiwKICAgICJjb250ZW50IjogewogICAgICAidHJhbnNmb3JtZXJzIjogewogICAgICAgICJfX2NsYXNzX18iOiAibGlzdCIsCiAgICAgICAgIl9fbW9kdWxlX18iOiAiYnVpbHRpbnMiLAogICAgICAgICJfX2xvYWRlcl9fIjogIkxpc3ROb2RlIiwK