# **Use Cars Regression - Playground Series PS4S9**

- RMSE 20 hours training L5 models
 with a L6 weight ensemble.
- MAE 4 hours training L2 models with L3 weight ensemble.

# **Setup**

In [3]:
%%capture

# AutoGluon is a SOTA AutoML framework
%pip install -q setuptools wheel autogluon.tabular[all,skex] dask[dataframe]
%pip install -U -q ipywidgets cloudpickle==2.2.1

In [4]:
# %%capture

# # LightGBM GPU instalation (restart after installing)
# ! git clone --recursive https://github.com/Microsoft/LightGBM

# #You can run this oneliner which will build and compile LightGBM with GPU enabled in colab:
# ! cd LightGBM && rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=1 ../../LightGBM && make -j4 && cd ../python-package && python3 setup.py install --precompile --gpu;
# !mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd

In [5]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
import os
import cloudpickle
import re
from autogluon.tabular import TabularDataset, TabularPredictor
warnings.filterwarnings('ignore')

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
base_path = os.getenv('DATA_FOLDER_PATH', '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/')

train_data = TabularDataset(os.path.join(base_path, 'train.csv'))
test_data = TabularDataset(os.path.join(base_path, 'test.csv'))
use_cars = TabularDataset(os.path.join(base_path, 'used_cars.csv'))

In [8]:
train_data.drop(columns=['id'], inplace=True)
test_data.drop(columns=['id'], inplace=True)
use_cars[['milage', 'price']] = use_cars[['milage', 'price']].map(lambda x: int(''.join(re.findall(r'\d+', x))))
train = pd.concat([train_data, use_cars], ignore_index=True)

In [9]:
submission = TabularDataset(os.path.join(base_path, 'sample_submission.csv'))


# **Preprocessing and Feature engineering**

Inspire by this [kaggle notebooks](https://www.kaggle.com/code/swandipsingha/autoglun-lgbm-and-nn)

In [10]:
def extract_age_features(df):
    """
    Extracts age-related features from the dataset.
    """
    current_year = 2024
    df['Vehicle_Age'] = current_year - df['model_year']
    # Avoid division by zero
    df['Vehicle_Age'] = df['Vehicle_Age'].replace(0, 1)
    df['Mileage_per_Year'] = df['milage'] / df['Vehicle_Age']
    df['milage_with_age'] = df.groupby('Vehicle_Age')['milage'].transform('mean')
    df['Mileage_per_Year_with_age'] = df.groupby('Vehicle_Age')['Mileage_per_Year'].transform('mean')
    return df

def extract_other_features(df):
    """
    Extracts additional features such as luxury brand indicator.
    """
    luxury_brands = [
        'Mercedes-Benz', 'BMW', 'Audi', 'Porsche', 'Land', 'Lexus', 'Jaguar',
        'Bentley', 'Maserati', 'Lamborghini', 'Rolls-Royce', 'Ferrari',
        'McLaren', 'Aston', 'Maybach'
    ]
    df['Is_Luxury_Brand'] = df['brand'].apply(lambda x: 1 if x in luxury_brands else 0)
    return df

def update(df):
    """
    Handles rare categories by grouping them under 'noise' and fills missing values.
    Converts specified columns to categorical types.
    """
    threshold = 100
    categorical_columns = [
        'brand', 'model', 'fuel_type', 'engine',
        'transmission', 'ext_col', 'int_col', 'accident', 'clean_title'
    ]
    rare_columns = ['model', 'engine', 'transmission', 'ext_col', 'int_col']

    for col in rare_columns:
        freq = df[col].value_counts()
        df[col] = df[col].apply(lambda x: x if freq[x] >= threshold else 'noise')

    for col in categorical_columns:
        df[col] = df[col].fillna('missing')
        df[col] = df[col].astype('category')

    return df

# Apply feature engineering to both training and test sets
train = extract_age_features(train)
train = extract_other_features(train)
train = update(train)

test = extract_age_features(test_data)
test = extract_other_features(test_data)
test = update(test)

# **Autogluon Training RMSE**

In [None]:
# Setting up
eval_metric = 'rmse'
label = 'price'
problem_type='regression'

# Models to exclude
excluded_model_types = ['KNN']

# Initialize the TabularPredictor
predictor = TabularPredictor(label=label, eval_metric=eval_metric, problem_type=problem_type,
                             path = os.path.join(base_path, "Autogluon/202409_20hr_train"))

# Fit the model
predictor.fit(train_data=train,
              time_limit=3600*20,
              presets="best_quality",
              excluded_model_types=excluded_model_types,
              num_bag_folds=5,
              num_bag_sets = 10,
              num_stack_levels=4,
              full_weighted_ensemble_additionally=True
)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
CPU Count:          2
Memory Avail:       11.06 GB / 12.67 GB (87.2%)
Disk Space Avail:   171.00 GB / 225.83 GB (75.7%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=4, num_bag_folds=5, num_bag_sets=10
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 18000s 

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x798d8a7389d0>

In [None]:
predictor = TabularPredictor.load(os.path.join(base_path, "Autogluon/202409_20hr_train"))

In [None]:
leaderboard_test = predictor.leaderboard()
leaderboard_test

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_ALL_L6,-72403.701301,root_mean_squared_error,328.590529,39879.135358,0.003205,0.762355,6,True,85
1,WeightedEnsemble_L3,-72424.331896,root_mean_squared_error,153.660733,16755.988870,0.004543,0.624946,3,True,41
2,WeightedEnsemble_L4,-72430.343493,root_mean_squared_error,254.084535,28436.112292,0.003482,0.552062,4,True,59
3,CatBoost_BAG_L3,-72436.880165,root_mean_squared_error,228.950053,27780.211033,1.271147,535.711881,3,True,45
4,WeightedEnsemble_L5,-72460.866840,root_mean_squared_error,369.968760,42974.950444,0.004268,0.832023,5,True,77
...,...,...,...,...,...,...,...,...,...,...
81,XGBoost_r33_BAG_L4,-78604.666388,root_mean_squared_error,327.570339,38840.132060,1.404315,36.678396,4,True,76
82,NeuralNetTorch_r22_BAG_L3,-90214.157608,root_mean_squared_error,237.903558,27815.637030,10.224651,571.137878,3,True,57
83,NeuralNetTorch_r22_BAG_L1,-90214.235535,root_mean_squared_error,6.057364,1020.770602,6.057364,1020.770602,1,True,16
84,NeuralNetTorch_r22_BAG_L2,-90214.252639,root_mean_squared_error,132.966143,14888.305473,9.326166,668.090714,2,True,39


## **Submission**

In [None]:
leaderboard_test.head(10)

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_ALL_L6,-72403.701301,root_mean_squared_error,328.590529,39879.135358,0.003205,0.762355,6,True,85
1,WeightedEnsemble_L3,-72424.331896,root_mean_squared_error,153.660733,16755.98887,0.004543,0.624946,3,True,41
2,WeightedEnsemble_L4,-72430.343493,root_mean_squared_error,254.084535,28436.112292,0.003482,0.552062,4,True,59
3,CatBoost_BAG_L3,-72436.880165,root_mean_squared_error,228.950053,27780.211033,1.271147,535.711881,3,True,45
4,WeightedEnsemble_L5,-72460.86684,root_mean_squared_error,369.96876,42974.950444,0.004268,0.832023,5,True,77
5,CatBoost_r177_BAG_L3,-72462.623174,root_mean_squared_error,228.253683,27425.524631,0.574776,181.025479,3,True,51
6,CatBoost_BAG_L4,-72476.981999,root_mean_squared_error,326.987309,39159.329232,0.821285,355.875569,4,True,63
7,CatBoost_r9_BAG_L2,-72484.6763,root_mean_squared_error,125.34532,14504.054593,1.705343,283.839834,2,True,37
8,WeightedEnsemble_L2,-72488.901868,root_mean_squared_error,69.255063,7656.946847,0.005305,1.217805,2,True,23
9,WeightedEnsemble_L6,-72503.869436,root_mean_squared_error,443.498913,50010.659121,0.003664,0.297196,6,True,86


In [None]:
sub_weigth_ensemble_L6_all = submission.copy()
sub_weigth_ensemble_L6_all['price'] = predictor.predict(test_data, model='WeightedEnsemble_ALL_L6')
sub_weigth_ensemble_L6_all.to_csv(os.path.join(base_path, 'autogluon_weigth_ensemble_L6_all.csv'), index=False)

In [None]:
sub_weigth_ensemble_L6 = submission.copy()
sub_weigth_ensemble_L6['price'] = predictor.predict(test_data, model='WeightedEnsemble_L6')
sub_weigth_ensemble_L6.to_csv(os.path.join(base_path, 'autogluon_weigth_ensemble_L6.csv'), index=False)

In [None]:
sub_weigth_ensemble_L3 = submission.copy()
sub_weigth_ensemble_L3['price'] = predictor.predict(test_data, model='WeightedEnsemble_L3')
sub_weigth_ensemble_L3.to_csv(os.path.join(base_path, 'autogluon_weigth_ensemble_L3.csv'), index=False)

In [None]:
sub_weigth_ensemble_L4 = submission.copy()
sub_weigth_ensemble_L4['price'] = predictor.predict(test_data, model='WeightedEnsemble_L4')
sub_weigth_ensemble_L4.to_csv(os.path.join(base_path, 'autogluon_weigth_ensemble_L4.csv'), index=False)

In [None]:
sub_catboost_bag_L3 = submission.copy()
sub_catboost_bag_L3['price'] = predictor.predict(test_data, model='CatBoost_BAG_L3')
sub_catboost_bag_L3.to_csv(os.path.join(base_path, 'autogluon_CatBoost_BAG_L3.csv'), index=False)

In [None]:
sub_weigth_ensemble_L5 = submission.copy()
sub_weigth_ensemble_L5['price'] = predictor.predict(test_data, model='WeightedEnsemble_L5')
sub_weigth_ensemble_L5.to_csv(os.path.join(base_path, 'autogluon_WeightedEnsemble_L5.csv'), index=False)

In [None]:
sub_weigth_ensemble_L2 = submission.copy()
sub_weigth_ensemble_L2['price'] = predictor.predict(test_data, model='WeightedEnsemble_L2')
sub_weigth_ensemble_L2.to_csv(os.path.join(base_path, 'autogluon_WeightedEnsemble_L2.csv'), index=False)

In [None]:
sub_catboost_r177_bag_L3 = submission.copy()
sub_catboost_r177_bag_L3['price'] = predictor.predict(test_data, model='CatBoost_r177_BAG_L3')
sub_catboost_r177_bag_L3.to_csv(os.path.join(base_path, 'autogluon_CatBoost_r177_BAG_L3.csv'), index=False)

# **Autogluon training MAE**

In [None]:
# Setting up
eval_metric = 'mae'
label = 'price'
problem_type='regression'
hours = 8

# Models to exclude
excluded_model_types = ['KNN']

# Initialize the TabularPredictor
predictor = TabularPredictor(label=label, eval_metric=eval_metric, problem_type=problem_type,
                             path = os.path.join(base_path, "Autogluon/202409_MAE_train"))

# Fit the model
predictor.fit(
        train_data=train,
        time_limit=3600*hours,
        presets="best_quality",
        hyperparameters={
            'GBM': ['GBMLarge'],
            'CAT': {},
            'XT': {},
            'FASTAI': {},
            'FT_TRANSFORMER': {}
        },
        hyperparameter_tune_kwargs="auto",
        num_bag_folds=8,
        num_bag_sets=20,
        num_stack_levels=2,
        ag_args_ensemble={'use_orig_features': True},
        full_weighted_ensemble_additionally=True,
        feature_prune_kwargs={}
)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
CPU Count:          8
Memory Avail:       48.66 GB / 50.99 GB (95.4%)
Disk Space Avail:   179.92 GB / 225.83 GB (79.7%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=2, num_bag_folds=8, num_bag_sets=20
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 7200s o

[36m(_dystack pid=8021)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=8021)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L1   │
[36m(_dystack pid=8021)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=8021)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=8021)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=8021)[0m │ Number of trials                 1000                     │
[36m(_dystack pid=8021)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=8021)[0m 
[36m(_dystack pid=8021)[0m View detailed results here: /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L1
[36m(_dystack pid=8021)[0m 


[36m(_dystack pid=8021)[0m Reached timeout of 575.5639956877899 seconds. Stopping all trials.
[36m(_dystack pid=8021)[0m Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.
[36m(_dystack pid=8021)[0m You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
[36m(_dystack pid=8021)[0m You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
[36m(_dystack pid=8021)[0m Wrote the latest version of all result files and experiment state to '/kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L1' in 0.0095s.
[36m(_dystack pid=8021)[0m Failed to fetch metrics for 7 trial(s):
[36m(_dysta

[36m(_dystack pid=8021)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=8021)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L2   │
[36m(_dystack pid=8021)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=8021)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=8021)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=8021)[0m │ Number of trials                 1000                     │
[36m(_dystack pid=8021)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=8021)[0m 
[36m(_dystack pid=8021)[0m View detailed results here: /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L2


[36m(_dystack pid=8021)[0m [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949
[36m(_dystack pid=8021)[0m Reached timeout of 775.3406988262295 seconds. Stopping all trials.
[36m(_dystack pid=8021)[0m Wrote the latest version of all result files and experiment state to '/kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L2' in 0.0322s.


[36m(_dystack pid=8021)[0m 


[36m(_dystack pid=8021)[0m Failed to fetch metrics for 6 trial(s):
[36m(_dystack pid=8021)[0m - da69b124: FileNotFoundError('Could not fetch metrics for da69b124: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L2/da69b124')
[36m(_dystack pid=8021)[0m - 30f8e5a4: FileNotFoundError('Could not fetch metrics for 30f8e5a4: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L2/30f8e5a4')
[36m(_dystack pid=8021)[0m - 7aee4d6a: FileNotFoundError('Could not fetch metrics for 7aee4d6a: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L2/7aee4d6a')
[36m(_dystack pid=8021)[0m - 6e1f4d4e: FileNotFoundError('Could not fetch metrics for 6e1f4d4e: both result.json and progress.csv were not found at /kaggle/worki

[36m(_dystack pid=8021)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=8021)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L3   │
[36m(_dystack pid=8021)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=8021)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=8021)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=8021)[0m │ Number of trials                 1000                     │
[36m(_dystack pid=8021)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=8021)[0m 
[36m(_dystack pid=8021)[0m View detailed results here: /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L3


[36m(_dystack pid=8021)[0m [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949
[36m(_dystack pid=8021)[0m Reached timeout of 497.3687201499939 seconds. Stopping all trials.
[36m(_dystack pid=8021)[0m Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.
[36m(_dystack pid=8021)[0m You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
[36m(_dystack pid=8021)[0m You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
[36

[36m(_dystack pid=8021)[0m 


[36m(_dystack pid=8021)[0m Failed to fetch metrics for 5 trial(s):
[36m(_dystack pid=8021)[0m - 0bab5b92: FileNotFoundError('Could not fetch metrics for 0bab5b92: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L3/0bab5b92')
[36m(_dystack pid=8021)[0m - 38fb101b: FileNotFoundError('Could not fetch metrics for 38fb101b: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L3/38fb101b')
[36m(_dystack pid=8021)[0m - 28ec977e: FileNotFoundError('Could not fetch metrics for 28ec977e: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L3/28ec977e')
[36m(_dystack pid=8021)[0m - c1befa1c: FileNotFoundError('Could not fetch metrics for c1befa1c: both result.json and progress.csv were not found at /kaggle/worki

+-----------------------------------------------------------+
| Configuration for experiment     NeuralNetFastAI_BAG_L1   |
+-----------------------------------------------------------+
| Search algorithm                 SearchGenerator          |
| Scheduler                        FIFOScheduler            |
| Number of trials                 1000                     |
+-----------------------------------------------------------+

View detailed results here: /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L1


2024-09-29 18:46:23,344	INFO timeout.py:54 -- Reached timeout of 1740.110628005147 seconds. Stopping all trials.
You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-09-29 18:46:23,360	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L1' in 0.0111s.
- d746ea00: FileNotFoundError('Could not fetch metrics for d746ea00: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L1/d746ea00')
- e213eaf9: FileNotFoundError('Could not fetch metrics for e213eaf9: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFa




	-19690.6413	 = Validation score   (-mean_absolute_error)
	7.66s	 = Training   runtime
	2.6s	 = Validation runtime
Hyperparameter tuning model: FTTransformer_BAG_L1 ... Tuning model for up to 1740.11s of the 18410.29s of remaining time.
	No hyperparameter search space specified for FTTransformer_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLocalFoldFittingStrategy (5 workers, per: cpus=1, gpus=0, memory=0.12%)
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/autogluon/core/models/ensemble/bagged_ensemble_model.py", line 1333, in _hyperparameter_tune
    hpo_executor.validate_search_space(search_space, self.name)
  File "/usr/local/lib/python3.10/dist-packages/autogluon/core/hpo/executors.py", line 512, in validate_search_space
    raise EmptySearchSpace
autogluon.core.hpo.exceptions.EmptySearchSpace

During handling of the above exception, another exceptio

+-----------------------------------------------------------+
| Configuration for experiment     NeuralNetFastAI_BAG_L2   |
+-----------------------------------------------------------+
| Search algorithm                 SearchGenerator          |
| Scheduler                        FIFOScheduler            |
| Number of trials                 1000                     |
+-----------------------------------------------------------+

View detailed results here: /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L2


2024-09-29 20:59:30,912	INFO timeout.py:54 -- Reached timeout of 2232.559145084274 seconds. Stopping all trials.
You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-09-29 20:59:30,934	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L2' in 0.0147s.
- 519986ed: FileNotFoundError('Could not fetch metrics for 519986ed: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L2/519986ed')
- 9bafefd8: FileNotFoundError('Could not fetch metrics for 9bafefd8: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFa




	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.29%)
	-19726.5627	 = Validation score   (-mean_absolute_error)
	8.29s	 = Training   runtime
	1.38s	 = Validation runtime
Performing feature pruning with model: FeatureSelector_LightGBMLarge_BAG_L2, total time limit: 992.88s, stop threshold: 10, prune ratio: 0.05, prune threshold: noise.
	Number of training samples 192542 is greater than 50000. Using 50000 samples as training data.
	Feature selection model is bagged and replace_bag=True. Using a non-bagged version of the model for feature selection.
	Expected model fit time: 2.66s, and expected candidate generation time: 1.98s.
	Round 1 of feature pruning model fit (2.66s):
		Validation score of the model fit on original features is (-20665.7002).
	Round 2 of feature pruning model fit (3.81s):
		Validation score of the current model fit on 25 features (-20636.5469) is better than validation score of the best mo

+-----------------------------------------------------------+
| Configuration for experiment     NeuralNetFastAI_BAG_L3   |
+-----------------------------------------------------------+
| Search algorithm                 SearchGenerator          |
| Scheduler                        FIFOScheduler            |
| Number of trials                 1000                     |
+-----------------------------------------------------------+

View detailed results here: /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L3


2024-09-29 22:57:35,063	INFO timeout.py:54 -- Reached timeout of 1387.9454704642296 seconds. Stopping all trials.
You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-09-29 22:57:35,084	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L3' in 0.0146s.
- 60de663d: FileNotFoundError('Could not fetch metrics for 60de663d: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetFastAI_BAG_L3/60de663d')
- bf250dcd: FileNotFoundError('Could not fetch metrics for bf250dcd: both result.json and progress.csv were not found at /kaggle/working/Autogluon/202409_MAE_train/models/NeuralNetF




	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.37%)
	-19811.8602	 = Validation score   (-mean_absolute_error)
	10.6s	 = Training   runtime
	1.44s	 = Validation runtime
Performing feature pruning with model: FeatureSelector_LightGBMLarge_BAG_L3, total time limit: 518.07s, stop threshold: 10, prune ratio: 0.05, prune threshold: noise.
	Number of training samples 192542 is greater than 50000. Using 50000 samples as training data.
	Feature selection model is bagged and replace_bag=True. Using a non-bagged version of the model for feature selection.
	Expected model fit time: 5.28s, and expected candidate generation time: 3.07s.
	Round 1 of feature pruning model fit (5.28s):
		Validation score of the model fit on original features is (-20623.3919).
	Round 2 of feature pruning model fit (2.69s):
		Validation score of the current model fit on 33 features (-20655.68) is not better than validation score of the best 

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7e9350ae9540>

In [None]:
predictor = TabularPredictor.load(os.path.join(base_path, "Autogluon/202409_MAE_train"))

In [None]:
leaderboard_test = predictor.leaderboard()
leaderboard_test

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_ALL_L4,-17982.369702,mean_absolute_error,93.137936,8079.133357,0.002778,0.548741,4,True,49
1,WeightedEnsemble_L2,-18054.796327,mean_absolute_error,22.817122,1703.293692,0.002323,0.425666,2,True,11
2,NeuralNetFastAI_BAG_L1/e0cb5511_Prune,-18183.479282,mean_absolute_error,7.845421,561.044498,7.845421,561.044498,1,True,9
3,WeightedEnsemble_L3,-18206.19682,mean_absolute_error,57.060124,5039.942585,0.002943,0.444527,3,True,30
4,WeightedEnsemble_L4,-18214.12308,mean_absolute_error,101.701235,8693.59392,0.003186,0.372546,4,True,50
5,NeuralNetFastAI_BAG_L2/b293ca8b,-18433.114924,mean_absolute_error,41.608279,3756.025792,4.255948,271.612895,2,True,16
6,NeuralNetFastAI_BAG_L1/e213eaf9_Prune,-18468.65292,mean_absolute_error,6.523739,630.887675,6.523739,630.887675,1,True,8
7,NeuralNetFastAI_BAG_L2/238d0081,-18480.180684,mean_absolute_error,44.414904,4061.978701,7.062573,577.565804,2,True,17
8,NeuralNetFastAI_BAG_L3/091b55a8,-18486.213958,mean_absolute_error,93.135158,8078.584615,2.688395,271.374838,3,True,44
9,NeuralNetFastAI_BAG_L2/dcee2eb7_Prune,-18508.835151,mean_absolute_error,41.481488,3821.797244,4.129158,337.384347,2,True,28


## **Submission**

In [None]:
leaderboard_test.head(10)

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_ALL_L4,-17982.369702,mean_absolute_error,93.137936,8079.133357,0.002778,0.548741,4,True,49
1,WeightedEnsemble_L2,-18054.796327,mean_absolute_error,22.817122,1703.293692,0.002323,0.425666,2,True,11
2,NeuralNetFastAI_BAG_L1/e0cb5511_Prune,-18183.479282,mean_absolute_error,7.845421,561.044498,7.845421,561.044498,1,True,9
3,WeightedEnsemble_L3,-18206.19682,mean_absolute_error,57.060124,5039.942585,0.002943,0.444527,3,True,30
4,WeightedEnsemble_L4,-18214.12308,mean_absolute_error,101.701235,8693.59392,0.003186,0.372546,4,True,50
5,NeuralNetFastAI_BAG_L2/b293ca8b,-18433.114924,mean_absolute_error,41.608279,3756.025792,4.255948,271.612895,2,True,16
6,NeuralNetFastAI_BAG_L1/e213eaf9_Prune,-18468.65292,mean_absolute_error,6.523739,630.887675,6.523739,630.887675,1,True,8
7,NeuralNetFastAI_BAG_L2/238d0081,-18480.180684,mean_absolute_error,44.414904,4061.978701,7.062573,577.565804,2,True,17
8,NeuralNetFastAI_BAG_L3/091b55a8,-18486.213958,mean_absolute_error,93.135158,8078.584615,2.688395,271.374838,3,True,44
9,NeuralNetFastAI_BAG_L2/dcee2eb7_Prune,-18508.835151,mean_absolute_error,41.481488,3821.797244,4.129158,337.384347,2,True,28


In [None]:
WeightedEnsemble_ALL_L4 = submission.copy()
WeightedEnsemble_ALL_L4['price'] = predictor.predict(test_data, model='WeightedEnsemble_ALL_L4')
WeightedEnsemble_ALL_L4.to_csv(os.path.join(base_path, 'autogluon_MAE_WeightedEnsemble_ALL_L4.csv'), index=False)

In [None]:
WeightedEnsemble_L2 = submission.copy()
WeightedEnsemble_L2['price'] = predictor.predict(test_data, model='WeightedEnsemble_L2')
WeightedEnsemble_L2.to_csv(os.path.join(base_path, 'autogluon_MAE_WeightedEnsemble_L2.csv'), index=False)

In [None]:
NeuralNetFastAI_BAG_L1_Prune = submission.copy()
NeuralNetFastAI_BAG_L1_Prune['price'] = predictor.predict(test_data, model='NeuralNetFastAI_BAG_L1/e0cb5511_Prune')
NeuralNetFastAI_BAG_L1_Prune.to_csv(os.path.join(base_path, 'autogluon_MAE_NeuralNetFastAI_BAG_L1_Prune.csv'), index=False)

In [None]:
CatBoost_BAG_L3_T7 = submission.copy()
CatBoost_BAG_L3_T7['price'] = predictor.predict(test_data, model='CatBoost_BAG_L3/T7')
CatBoost_BAG_L3_T7.to_csv(os.path.join(base_path, 'autogluon_MAE_CatBoost_BAG_L3_T7.csv'), index=False)

In [None]:
#predictor.clone(os.path.join(base_path, "Autogluon/202409_MAE_train"))

Cloned TabularPredictor located in '/kaggle/working/Autogluon/202409_MAE_train' to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MAE_train'.
	To load the cloned predictor: predictor_clone = TabularPredictor.load(path="/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MAE_train")


'/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MAE_train'

# **Autogluon MSE - MAE Training**

## **Analizing outliers**

In [11]:
predictor_mse = TabularPredictor.load(os.path.join(base_path, "Autogluon/202409_20hr_train"))
predictor_mae = TabularPredictor.load(os.path.join(base_path, "Autogluon/202409_MAE_train"))

In [12]:
train['mse_price'] = predictor_mse.predict(train, model='WeightedEnsemble_L3')
train['mae_price'] = predictor_mae.predict(train, model='NeuralNetFastAI_BAG_L1/e0cb5511_Prune')
train['diff_price'] = train['mse_price'] - train['mae_price']
train['mse_mae_idx'] = train['mse_price'] / train['mae_price']

In [13]:
test['mse_price'] = predictor_mse.predict(test, model='WeightedEnsemble_L3')
test['mae_price'] = predictor_mae.predict(test, model='NeuralNetFastAI_BAG_L1/e0cb5511_Prune')
test['diff_price'] = test['mse_price'] - test['mae_price']
test['mse_mae_idx'] = test['mse_price'] / test['mae_price']

In [14]:
train['mse_squared_error'] = (train['price'] - train['mse_price'])**2
train = train.sort_values('mse_squared_error', ascending=False)
train['cumsum_error'] = train['mse_squared_error'].cumsum()
train['pct_cumsum_error'] = train['cumsum_error'] / train['mse_squared_error'].sum()
train = train.sort_values('pct_cumsum_error')
train['mse_mae_idx'] = train['mse_price'] / train['mae_price']
train['outlier'] = np.where(train['pct_cumsum_error'] <= 0.95, 1, 0)

In [15]:
# prompt: calculate the statistics of mse_mae_idx group by outlier
train.groupby('outlier')['mse_mae_idx'].agg(['mean', 'median', 'std', 'min', 'max'])

Unnamed: 0_level_0,mean,median,std,min,max
outlier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.05771,1.052996,0.133352,0.391522,8.0205
1,1.186414,1.156738,0.306516,0.569911,9.496945


In [16]:
stats = train.groupby('outlier')['mse_mae_idx'].agg(['mean', 'median', 'std', 'min', 'max']).T
stats['diff'] = stats[1] - stats[0]
stats

outlier,0,1,diff
mean,1.05771,1.186414,0.128704
median,1.052996,1.156738,0.103743
std,0.133352,0.306516,0.173164
min,0.391522,0.569911,0.17839
max,8.0205,9.496945,1.476445


In [17]:
train.groupby('outlier').size()

Unnamed: 0_level_0,0
outlier,Unnamed: 1_level_1
0,185246
1,7296


In [18]:
from sklearn.metrics import mean_squared_error
import math

rmse_all = math.sqrt(mean_squared_error(train['price'], train['mse_price']))
print(f"RMSE All: {rmse_all}")
rmse = math.sqrt(mean_squared_error(train[train['outlier'] == 0]['price'], train[train['outlier'] == 0]['mse_price']))
print(f"RMSE without outliers: {rmse}")


RMSE All: 69620.81394374814
RMSE without outliers: 15871.859058116876


## **Training 12 hr**

Will include new features from MSE and MAE.

In [19]:
train_mse_mae = train.copy()
train_mse_mae.drop(columns=['mse_squared_error',	'cumsum_error', 'pct_cumsum_error', 'outlier'], inplace=True)

In [20]:
from sklearn.model_selection import train_test_split
train_split, val_split = train_test_split(train_mse_mae, test_size=0.1, random_state=42)

In [26]:
# Setting up
eval_metric = 'rmse'
label = 'price'
problem_type='regression'
hours = 12

# Models to exclude
excluded_model_types = ['KNN']

# Initialize the TabularPredictor
predictor = TabularPredictor(label=label, eval_metric=eval_metric, problem_type=problem_type,
                             path = os.path.join(base_path, "Autogluon/202409_MSE_MAE_diff_train"))

# Fit the model
predictor.fit(
        train_data=train_split,
        time_limit=3600*hours,
        presets="best_quality",
        hyperparameters={
            'GBM': ['GBMLarge'],
            'XGB': {},
            'CAT': {},
            'XT': {},
            'FASTAI': {},
        },
        hyperparameter_tune_kwargs="auto",
        num_bag_folds=8,
        num_bag_sets=20,
        num_stack_levels=3,
        ag_args_ensemble={'use_orig_features': True},
        full_weighted_ensemble_additionally=True,
        feature_prune_kwargs={}
)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
CPU Count:          8
Memory Avail:       36.59 GB / 50.99 GB (71.7%)
Disk Space Avail:   162.38 GB / 225.83 GB (71.9%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=8, num_bag_sets=20
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 10800s 

[36m(_dystack pid=262164)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=262164)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L1   │
[36m(_dystack pid=262164)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=262164)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=262164)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=262164)[0m │ Number of trials                 1000                     │
[36m(_dystack pid=262164)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=262164)[0m 
[36m(_dystack pid=262164)[0m View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L1


[36m(_dystack pid=262164)[0m Reached timeout of 647.5075974633229 seconds. Stopping all trials.
[36m(_dystack pid=262164)[0m Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.
[36m(_dystack pid=262164)[0m You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
[36m(_dystack pid=262164)[0m You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
[36m(_dystack pid=262164)[0m Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L1' in 0.0

[36m(_dystack pid=262164)[0m 


[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.24%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.24%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.29%)
[36m(_dystack pid=262164)[0m 	Stopping HPO to satisfy time limit...
[36m(_dystack pid=262164)[0m Fitted model: XGBoost_BAG_L1/T1 ...
[36m(_dystack pid=262164)[0m 	-59588.7685	 = Validation score   (-root_mean_squared_error)
[36m(_dystack pid=262164)[0m 	161.31s	 = Training   runtime
[36m(_dystack pid=262164)[0m 	0.95s	 = Validation runtime
[36m(_dystack pid=262164)[0m Fitted model: XGBoost_BAG_L1/T2 ...
[36m(_dystack pid=262164)[0m 	-59218.126	 = Validation score   (-root_mean_squared_erro

[36m(_dystack pid=262164)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=262164)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L2   │
[36m(_dystack pid=262164)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=262164)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=262164)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=262164)[0m │ Number of trials                 1000                     │
[36m(_dystack pid=262164)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=262164)[0m 
[36m(_dystack pid=262164)[0m View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L2


[36m(_dystack pid=262164)[0m Reached timeout of 717.9792926321554 seconds. Stopping all trials.
[36m(_dystack pid=262164)[0m Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.
[36m(_dystack pid=262164)[0m You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
[36m(_dystack pid=262164)[0m You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
[36m(_dystack pid=262164)[0m Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L2' in 0.1

[36m(_dystack pid=262164)[0m 


[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.36%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.35%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.43%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.35%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.52%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.35%)
[36m(_dystack p

[36m(_dystack pid=262164)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=262164)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L3   │
[36m(_dystack pid=262164)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=262164)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=262164)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=262164)[0m │ Number of trials                 1000                     │
[36m(_dystack pid=262164)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=262164)[0m 
[36m(_dystack pid=262164)[0m View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L3


[36m(_dystack pid=262164)[0m Reached timeout of 823.9286307168388 seconds. Stopping all trials.
[36m(_dystack pid=262164)[0m Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L3' in 0.0401s.
[36m(_dystack pid=262164)[0m Failed to fetch metrics for 8 trial(s):
[36m(_dystack pid=262164)[0m - 7242b410: FileNotFoundError('Could not fetch metrics for 7242b410: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L3/7242b410')
[36m(_dystack pid=262164)[0m - b468bd60: FileNotFoundError('Could not fetch metrics for b468bd60: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9

[36m(_dystack pid=262164)[0m 


[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.39%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.39%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.48%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.39%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.58%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.39%)
[36m(_dystack p

[36m(_dystack pid=262164)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=262164)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L4   │
[36m(_dystack pid=262164)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=262164)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=262164)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=262164)[0m │ Number of trials                 1000                     │
[36m(_dystack pid=262164)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=262164)[0m 
[36m(_dystack pid=262164)[0m View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L4


[36m(_dystack pid=262164)[0m Reached timeout of 801.1011560153961 seconds. Stopping all trials.
[36m(_dystack pid=262164)[0m Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L4' in 0.1855s.
[36m(_dystack pid=262164)[0m Failed to fetch metrics for 5 trial(s):
[36m(_dystack pid=262164)[0m - 931a472d: FileNotFoundError('Could not fetch metrics for 931a472d: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L4/931a472d')
[36m(_dystack pid=262164)[0m - fcd8d45d: FileNotFoundError('Could not fetch metrics for fcd8d45d: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9

[36m(_dystack pid=262164)[0m 


[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.48%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.44%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.53%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.44%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.65%)
[36m(_dystack pid=262164)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.44%)
[36m(_dystack p

+-----------------------------------------------------------+
| Configuration for experiment     NeuralNetFastAI_BAG_L1   |
+-----------------------------------------------------------+
| Search algorithm                 SearchGenerator          |
| Scheduler                        FIFOScheduler            |
| Number of trials                 1000                     |
+-----------------------------------------------------------+

View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L1


2024-09-30 08:11:26,766	INFO timeout.py:54 -- Reached timeout of 2063.5749919653294 seconds. Stopping all trials.
2024-09-30 08:11:26,824	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L1' in 0.0537s.
- 44fb0226: FileNotFoundError('Could not fetch metrics for 44fb0226: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L1/44fb0226')
- 718d7ce3: FileNotFoundError('Could not fetch metrics for 718d7ce3: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L1/718d7ce3')
- 7b4bcafe: FileNotFoundError('Could not fet




	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.26%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.26%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.33%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.26%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.37%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.26%)
	Stopping HPO to satisfy time limit...
Fitted model: XGBoost_BAG_L1/T1 ...
	-59828.4542	 = Validation score   (-root_mean_squared_error)
	125.87s	 = Training   runtime
	0.87s	 = Validation runtime
Fitte

+-----------------------------------------------------------+
| Configuration for experiment     NeuralNetFastAI_BAG_L2   |
+-----------------------------------------------------------+
| Search algorithm                 SearchGenerator          |
| Scheduler                        FIFOScheduler            |
| Number of trials                 1000                     |
+-----------------------------------------------------------+

View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L2


2024-09-30 09:56:23,129	INFO timeout.py:54 -- Reached timeout of 2294.056481796985 seconds. Stopping all trials.
You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-09-30 09:56:23,205	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L2' in 0.0596s.
- 325da827: FileNotFoundError('Could not fetch metrics for 325da827: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L2/325da827')
- 44d2dc4e: FileNotFoundErro




	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.55%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.51%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.60%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.51%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.73%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.51%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.60%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFol

+-----------------------------------------------------------+
| Configuration for experiment     NeuralNetFastAI_BAG_L3   |
+-----------------------------------------------------------+
| Search algorithm                 SearchGenerator          |
| Scheduler                        FIFOScheduler            |
| Number of trials                 1000                     |
+-----------------------------------------------------------+

View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L3


2024-09-30 12:02:09,786	INFO timeout.py:54 -- Reached timeout of 2646.3139840552926 seconds. Stopping all trials.
2024-09-30 12:02:09,826	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L3' in 0.0371s.
- efa81922: FileNotFoundError('Could not fetch metrics for efa81922: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L3/efa81922')
- ffea7463: FileNotFoundError('Could not fetch metrics for ffea7463: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L3/ffea7463')
- c773cf7e: FileNotFoundError('Could not fet




	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.58%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.58%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.69%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.58%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.83%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.58%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.69%)
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFol

+-----------------------------------------------------------+
| Configuration for experiment     NeuralNetFastAI_BAG_L4   |
+-----------------------------------------------------------+
| Search algorithm                 SearchGenerator          |
| Scheduler                        FIFOScheduler            |
| Number of trials                 1000                     |
+-----------------------------------------------------------+

View detailed results here: /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L4


2024-09-30 14:06:58,390	INFO timeout.py:54 -- Reached timeout of 2567.077212138176 seconds. Stopping all trials.
2024-09-30 14:06:58,454	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L4' in 0.0539s.
- 2d8f5f74: FileNotFoundError('Could not fetch metrics for 2d8f5f74: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L4/2d8f5f74')
- 9d349a0f: FileNotFoundError('Could not fetch metrics for 9d349a0f: both result.json and progress.csv were not found at /content/drive/MyDrive/DS_Projects/Playground_Series/ps4s9_Regression_Use_Cars/Data/Autogluon/202409_MSE_MAE_diff_train/models/NeuralNetFastAI_BAG_L4/9d349a0f')
- 626677e8: FileNotFoundError('Could not fetc




Fitted model: NeuralNetFastAI_BAG_L4/197d86d0 ...
	-55364.0882	 = Validation score   (-root_mean_squared_error)
	238.53s	 = Training   runtime
	2.41s	 = Validation runtime
Fitted model: NeuralNetFastAI_BAG_L4/d264a2d3 ...
	-54469.2591	 = Validation score   (-root_mean_squared_error)
	62.64s	 = Training   runtime
	1.64s	 = Validation runtime
Fitted model: NeuralNetFastAI_BAG_L4/ae3ce58d ...
	-54915.981	 = Validation score   (-root_mean_squared_error)
	176.03s	 = Training   runtime
	9.91s	 = Validation runtime
Fitted model: NeuralNetFastAI_BAG_L4/81916fa5 ...
	-54592.6696	 = Validation score   (-root_mean_squared_error)
	75.18s	 = Training   runtime
	1.79s	 = Validation runtime
Fitted model: NeuralNetFastAI_BAG_L4/45e5f9a9 ...
	-55298.1103	 = Validation score   (-root_mean_squared_error)
	249.71s	 = Training   runtime
	7.68s	 = Validation runtime
Fitted model: NeuralNetFastAI_BAG_L4/cff97826 ...
	-55071.1257	 = Validation score   (-root_mean_squared_error)
	188.19s	 = Training   runtime


<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7a378c3c4310>

In [21]:
predictor = TabularPredictor.load(os.path.join(base_path, "Autogluon/202409_MSE_MAE_diff_train"))

In [None]:
leaderboard_test = predictor.leaderboard(val_split)
leaderboard_test

In [None]:
leaderboard_test.sort_values('score_val', ascending=False).head(10)

In [30]:
sub_NeuralNetFastAI_BAG_L3 = submission.copy()
sub_NeuralNetFastAI_BAG_L3['price'] = predictor.predict(test, model='NeuralNetFastAI_BAG_L3/cb0a7cc5')
sub_NeuralNetFastAI_BAG_L3.to_csv(os.path.join(base_path, 'autogluon_MSE_Idx_NeuralNetFastAI_BAG_L3.csv'), index=False)

In [None]:
sub_WeightedEnsemble_ALL_L5 = submission.copy()
sub_WeightedEnsemble_ALL_L5['price'] = predictor.predict(test, model='WeightedEnsemble_ALL_L5')
sub_WeightedEnsemble_ALL_L5.to_csv(os.path.join(base_path, 'autogluon_MSE_Idx_WeightedEnsemble_ALL_L5.csv'), index=False)