In [None]:
import os
import sys

sys.path.append(os.path.join(os.getcwd(), '..'))

from schemas.feature import FeatureSelectionRequest
from agents.feature_agent import run_feature_agent
from schemas.model_selection import ModelSelectionRequest
from agents.model_selection_agent import run_model_agent
from schemas.evaluation import EvaluationRequest
from agents.evaluation_agent import run_evaluation_agent
from utils.metrics_calculator import calculate_metrics

import pandas as pd
import base64
import skops.io as sio

In [4]:
df = pd.read_csv('../datasets/titanic.csv').head(500)
sample = df.to_dict(orient='list')

In [5]:
feature_req = FeatureSelectionRequest(
    metadata={
        "dataset_name": "Titanic",
        "problem_type": "classification",
        "target_column": "Survived"
    },
    basic_stats={},
    data_sample=sample,
    max_features=4,
)

In [6]:
feature_resp = run_feature_agent(feature_req)

[32m2025-05-30 20:35:12.047[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m:[36m186[0m - [1mProcessing request for dataset 'Titanic'[0m
[32m2025-05-30 20:35:12.051[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m:[36m195[0m - [1mComputing basic stats...[0m
[32m2025-05-30 20:35:12.051[0m | [1mINFO    [0m | [36mutils.mini_eda[0m:[36mcompute_basic_stats[0m:[36m35[0m - [1mComputing basic stats for 12 features[0m
[32m2025-05-30 20:35:12.070[0m | [1mINFO    [0m | [36mutils.mini_eda[0m:[36mcompute_basic_stats[0m:[36m101[0m - [1mBasic stats computed for 12 features[0m
[32m2025-05-30 20:35:12.086[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m:[36m205[0m - [1mTop MI-recommended features: ['Fare', 'PassengerId', 'Pclass', 'SibSp', 'Parch', 'Age'][0m
[32m2025-05-30 20:35:12.090[0m | [1mINFO    [0m | [36magents.feature_agent[0m:[36mrun_feature_agent[0m

In [28]:
features_preprocessing_code = feature_resp.preprocessing_code

In [8]:
print("Selected Features:")
for feature in feature_resp.selected_features:
    print(f"{feature.name} ({feature.dtype}) - importance: {feature.importance}")

Selected Features:
Fare (numeric) - importance: 0.35
Pclass (numeric) - importance: 0.25
SibSp (numeric) - importance: 0.15
Parch (numeric) - importance: 0.1


In [9]:
print("Feature Agent Reasoning:")
print(feature_resp.reasoning)

Feature Agent Reasoning:
The selected features for the predictive model are Fare, Pclass, SibSp, and Parch. These features were chosen based on their mutual information scores, indicating their importance in predicting the target variable 'Survived'. Including these features will provide a balance between predictive power and model interpretability, as they capture key information about the passengers' fare, class, family relationships (SibSp and Parch), which are likely to influence survival on the Titanic.


In [10]:
selected_features = feature_resp.selected_features

In [12]:
tmp_df = df.head(10)
tmp_sample = tmp_df.to_dict(orient='list')

In [13]:
model_req = ModelSelectionRequest(
    metadata={
        "dataset_name": "Titanic",
        "problem_type": "classification",
        "target_column": "Survived"
    },
    selected_features=[f.name for f in selected_features],
    data=tmp_sample
)

In [None]:
model_resp = run_model_agent(model_req, features_preprocessing_code)

[32m2025-05-30 20:54:16.418[0m | [1mINFO    [0m | [36magents.model_selection_agent[0m:[36mrun_model_agent[0m:[36m98[0m - [1mProcessing request for dataset 'Titanic'[0m
[32m2025-05-30 20:54:16.420[0m | [1mINFO    [0m | [36magents.model_selection_agent[0m:[36m_build_prompt[0m:[36m52[0m - [1mBuilding the prompt for model selection.[0m
[32m2025-05-30 20:54:16.421[0m | [1mINFO    [0m | [36magents.model_selection_agent[0m:[36mrun_model_agent[0m:[36m102[0m - [1mPrompt length: 154 characters[0m
[32m2025-05-30 20:54:20.206[0m | [1mINFO    [0m | [36magents.model_selection_agent[0m:[36mrun_model_agent[0m:[36m107[0m - [1mLLM response received successfully[0m
[32m2025-05-30 20:54:20.222[0m | [1mINFO    [0m | [36magents.model_selection_agent[0m:[36mrun_model_agent[0m:[36m113[0m - [1mPipeline serialized to {len(pipe_blob)} bytes[0m
[32m2025-05-30 20:54:20.222[0m | [1mINFO    [0m | [36magents.model_selection_agent[0m:[36mrun_model_agen

In [20]:
model_resp

(ModelSelectionResponse(model_name=<ModelEnum.RANDOMFOREST: 'RandomForest'>, hyperparameters={'n_estimators': 100, 'max_depth': 5}, reasoning='Random Forest is a versatile and powerful model for classification tasks. It can handle a mix of numeric and categorical features well, making it suitable for the selected features in the Titanic dataset. Additionally, Random Forest provides good accuracy and feature importance which can aid in interpretability.'),
 'UEsDBBQAAAAAAMqmvlrt/JqbM0sBADNLAQALAAAAc2NoZW1hLmpzb257CiAgIl9fY2xhc3NfXyI6ICJQaXBlbGluZSIsCiAgIl9fbW9kdWxlX18iOiAic2tsZWFybi5waXBlbGluZSIsCiAgIl9fbG9hZGVyX18iOiAiT2JqZWN0Tm9kZSIsCiAgImNvbnRlbnQiOiB7CiAgICAiX19jbGFzc19fIjogImRpY3QiLAogICAgIl9fbW9kdWxlX18iOiAiYnVpbHRpbnMiLAogICAgIl9fbG9hZGVyX18iOiAiRGljdE5vZGUiLAogICAgImNvbnRlbnQiOiB7CiAgICAgICJzdGVwcyI6IHsKICAgICAgICAiX19jbGFzc19fIjogImxpc3QiLAogICAgICAgICJfX21vZHVsZV9fIjogImJ1aWx0aW5zIiwKICAgICAgICAiX19sb2FkZXJfXyI6ICJMaXN0Tm9kZSIsCiAgICAgICAgImNvbnRlbnQiOiBbCiAgICAgICAgICB7CiAgIC

In [29]:
model_name = model_resp[0].model_name
model_hyperparams = model_resp[0].hyperparameters
model_preprocessing_code = model_resp[1]

In [25]:
print("Model Selection:")
print(f"Selected Model: {model_name}")

Model Selection:
Selected Model: ModelEnum.RANDOMFOREST


In [26]:
print("Hyperparameters of Selected Model:")
for param, value in model_hyperparams.items():
    print(f"{param}: {value}")

Hyperparameters of Selected Model:
n_estimators: 100
max_depth: 5


In [27]:
print("Model Agent Reasoning:")
print(model_resp[0].reasoning)

Model Agent Reasoning:
Random Forest is a versatile and powerful model for classification tasks. It can handle a mix of numeric and categorical features well, making it suitable for the selected features in the Titanic dataset. Additionally, Random Forest provides good accuracy and feature importance which can aid in interpretability.


In [32]:
def decode_pipeline(base64_blob):
    binary_blob = base64.b64decode(base64_blob)
    pipe = sio.loads(binary_blob)
    return pipe

In [33]:
df = pd.read_csv('../datasets/titanic.csv')

pipeline = decode_pipeline(model_preprocessing_code)

In [34]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df, df['Survived'], test_size=0.2, random_state=42)

In [35]:
pipeline.fit(X_train, y_train)

y_train_pred = pipeline.predict(X_train)
y_test_pred = pipeline.predict(X_test)

In [66]:
current_metrics = calculate_metrics(
    y_train_true=y_train,
    y_train_pred=y_train_pred,
    y_test_true=y_test,
    y_test_pred=y_test_pred,
    problem_type="classification"
)

In [67]:
current_metrics

{'train_accuracy': 0.7556179775280899,
 'test_accuracy': 0.7541899441340782,
 'train_precision': 0.7670454545454546,
 'test_precision': 0.8260869565217391,
 'train_recall': 0.503731343283582,
 'test_recall': 0.5135135135135135,
 'train_f1_score': 0.6081081081081081,
 'test_f1_score': 0.6333333333333333}

In [49]:
optimization_goal = "Maximize Recall, avoid overfitting"

In [52]:
eval_req = EvaluationRequest(
    metadata={
        "dataset_name": "Titanic",
        "problem_type": "classification",
        "target_column": "Survived"
    },
    selected_features=selected_features,
    model_name=model_name,
    hyperparameters=model_hyperparams
)

In [53]:
decision = run_evaluation_agent(
    request=eval_req,
    current_metrics=current_metrics,
    history=[],
    model_info={
        "model_name": model_name,
        "hyperparameters": model_hyperparams
    },
    optimization_goal=optimization_goal
)

[32m2025-05-30 21:13:27.622[0m | [1mINFO    [0m | [36magents.evaluation_agent[0m:[36mrun_evaluation_agent[0m:[36m110[0m - [1mRunning evaluation agent for dataset 'Titanic'[0m
[32m2025-05-30 21:13:27.623[0m | [1mINFO    [0m | [36magents.evaluation_agent[0m:[36mrun_evaluation_agent[0m:[36m121[0m - [1mPrompt length: 580 characters[0m
[32m2025-05-30 21:13:29.226[0m | [1mINFO    [0m | [36magents.evaluation_agent[0m:[36mrun_evaluation_agent[0m:[36m125[0m - [1mLLM decision: continue[0m


In [54]:
decision

EvaluationDecision(recommendation='continue', reasoning='The current model shows decent performance on both training and test sets with a balanced precision and recall. The model is not overfitting as the performance on the test set is close to the training set. Since there are no previous iterations to compare, it is recommended to continue training the model with the current configuration and features to further improve performance while monitoring for overfitting.', confidence=0.8)

In [60]:
evaluation_recommendation = decision.recommendation
evaluation_reasoning = decision.reasoning
evaluation_confidence = decision.confidence

In [61]:
print("Model Recommendation:")
print(f"Recommendation: {evaluation_recommendation}")

Model Recommendation:
Recommendation: continue


In [62]:
print("Reasoning")
print(evaluation_reasoning)

Reasoning
The current model shows decent performance on both training and test sets with a balanced precision and recall. The model is not overfitting as the performance on the test set is close to the training set. Since there are no previous iterations to compare, it is recommended to continue training the model with the current configuration and features to further improve performance while monitoring for overfitting.


In [63]:
print("confidence")
print(evaluation_confidence)

confidence
0.8
