In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
import math
import warnings


warnings.filterwarnings('ignore')

print("All libraries imported successfully.")


All libraries imported successfully.


In [7]:
# --- Load the Dataset ---
# We use the cleaned CSV file provided.
file_path = '/Users/clarencemarvin/Downloads/centanet_cleaned_proximity.csv'

try:
    data = pd.read_csv(file_path)
    print(f"Dataset loaded successfully: {file_path}")
    print(f"Dataset shape: {data.shape}")
except FileNotFoundError:
    print(f"ERROR: File not found at {file_path}")
    print("Please make sure the file 'centanet_data_cleaned_for_model.csv' is in the correct folder.")

# Display basic data info to confirm
if 'data' in locals():
    print("\nData Head:")
    print(data.head())
    print("\nData Types:")
    print(data.info())

Dataset loaded successfully: /Users/clarencemarvin/Downloads/centanet_cleaned_proximity.csv
Dataset shape: (6156, 22)

Data Head:
                           property_name            district  bedroom_count  \
0                    LAKE SILVER Tower 5          Wu Kai Sha              2   
1      TSUEN KING GARDEN PHASE 2 BLOCK 8  Tsuen King Circuit              3   
2       BELVEDERE GARDEN PHASE 1 BLOCK 3           Belvedere              3   
3             MARBELLA BLOCK 1 (TOWER A)          Ma On Shan              3   
4  LOHAS PARK PHASE 10 LP10 Tower 1 (1A)          Lohas Park              3   

      price  property_age  saleable_area  pet_policy   latitude   longitude  \
0   7600000            16            573        True  22.428921  114.244390   
1   5800000            37            577       False  22.378113  114.110063   
2   5400000            38            644       False  22.372633  114.101914   
3   7300000            24            737        True  22.427676  114.240275   


In [8]:
# Dataset 1: Manual encoding for RF and SVR
print("=== Creating Dataset 1: Manual Encoding (for RF and SVR) ===")

# Create a copy for manual encoding
data_encoded = data.copy()

# One-hot encode categorical features
categorical_features = ['property_name', 'district']
data_encoded_dummy = pd.get_dummies(data_encoded, columns=categorical_features, prefix=categorical_features)

print(f"Original dataset shape: {data.shape}")
print(f"After one-hot encoding shape: {data_encoded_dummy.shape}")
print(f"Added {data_encoded_dummy.shape[1] - data.shape[1]} new columns from encoding")

# Prepare X and y for encoded dataset
X_encoded = data_encoded_dummy.drop('price', axis=1)
y_encoded = data_encoded_dummy['price']

print(f"Encoded dataset columns: {list(X_encoded.columns)}")

# ===================================================================

print("\n=== Creating Dataset 2: Native Categorical (for XGB, CatBoost, LightGBM) ===")

# Create a copy for native categorical handling
data_categorical = data.copy()

# Convert categorical columns to proper dtype
data_categorical['property_name'] = data_categorical['property_name'].astype('category')
data_categorical['district'] = data_categorical['district'].astype('category')

print("Categorical columns info:")
print(f"property_name - {data_categorical['property_name'].nunique()} unique categories")
print(f"district - {data_categorical['district'].nunique()} unique categories")

# Prepare X and y for categorical dataset
X_categorical = data_categorical.drop('price', axis=1)
y_categorical = data_categorical['price']

print(f"Categorical dataset shape: {X_categorical.shape}")
print(f"Data types:")
print(X_categorical.dtypes)

print("\n=== Both datasets ready! ===")
print("Dataset 1 (X_encoded, y_encoded) - for RF and SVR")
print("Dataset 2 (X_categorical, y_categorical) - for XGB, CatBoost, LightGBM")

=== Creating Dataset 1: Manual Encoding (for RF and SVR) ===
Original dataset shape: (6156, 22)
After one-hot encoding shape: (6156, 4418)
Added 4396 new columns from encoding
Encoded dataset columns: ['bedroom_count', 'property_age', 'saleable_area', 'pet_policy', 'latitude', 'longitude', 'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m', 'category_Community_Facilities_within_1000m', 'category_Education_within_2000m', 'category_Recreation_within_1000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'category_Religion_within_2000m', 'category_Transportation_within_1000m', 'category_Tourism_within_2000m', 'distance_to_nearest_mtr_km', 'distance_to_nearest_match_km', 'property_name_#LYOS Maisonette Building C', 'property_name_10 LASALLE', 'property_name_128 WATERLOO', 'property_name_128 Waterloo', 'property_name_15-15A HO MAN TIN STREET', 'property_name_18 Upper East', 'property_name_2Gether', 'property_name_3 CHICO TERRACE', 'property_name_3

In [9]:
# --- Feature Engineering and Preprocessing ---

# 1. Drop columns not useful for modeling
columns_to_drop = ['latitude', 'longitude', 'walking_time_to_mtr','category_Community_Facilities_within_1000m', 'distance_to_nearest_match_km', 'distance_to_nearest_mtr_km', 'category_Recreation_within_1000m', 'category_Religion_within_2000m', 'category_Tourism_within_2000m', 'category_Transportation_within_1000m']
data = data.drop(columns=[col for col in columns_to_drop if col in data.columns])

# 2. Feature Engineering: Convert 'pet_policy' to a binary feature
# 1 if a policy is listed (Not-NA), 0 if it's missing (NA)
if 'pet_policy' in data.columns:
    data['pet_policy_binary'] = data['pet_policy'].notna().astype(int)
    data = data.drop('pet_policy', axis=1)
    print("Feature 'pet_policy_binary' created.")

# 3. Define the target variable (y) and features (X)
y = data['price']
X = data.drop('price', axis=1)

# 4. Identify numerical and categorical columns
# We will need these lists for our preprocessors
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

print(f"\nIdentified {len(numerical_cols)} numerical features:")
print(numerical_cols)
print(f"\nIdentified {len(categorical_cols)} categorical features:")
print(categorical_cols)

# 5. Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nData split into training and testing sets:")
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

Feature 'pet_policy_binary' created.

Identified 9 numerical features:
['bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary']

Identified 2 categorical features:
['property_name', 'district']

Data split into training and testing sets:
X_train shape: (4924, 11)
X_test shape: (1232, 11)


In [10]:
# --- Model Evaluation Function ---

# This helper function will calculate all our performance metrics
# and return them in a dictionary.
# We store 'y_pred' so we can use it for our final CSV.

def evaluate_model(model, X_test, y_test, model_name):
    """Calculates R2, RMSE, and Accuracy metrics for a trained model."""
    
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    r2 = r2_score(y_test, y_pred)
    rmse = math.sqrt(mean_squared_error(y_test, y_pred))
    
    # Calculate percentage error for accuracy
    percent_error = np.abs((y_test - y_pred) / y_test)
    accuracy_10 = np.mean(percent_error < 0.1) * 100  # Within 10%
    accuracy_20 = np.mean(percent_error < 0.2) * 100  # Within 20%
    accuracy_30 = np.mean(percent_error < 0.3) * 100  # Within 30%
    
    print(f"\n--- {model_name} Performance ---")
    print(f"R-squared: {r2:.4f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"Accuracy (error < 10%): {accuracy_10:.2f}%")
    print(f"Accuracy (error < 20%): {accuracy_20:.2f}%")
    print(f"Accuracy (error < 30%): {accuracy_30:.2f}%")
    
    # Return results as a dictionary
    return {
        'model_name': model_name,
        'r2': r2,
        'rmse': rmse,
        'accuracy_10': accuracy_10,
        'accuracy_20': accuracy_20,
        'accuracy_30': accuracy_30,
        'y_pred': y_pred
    }

In [11]:
# --- Model Evaluation Function ---

# This helper function will calculate all our performance metrics
# and return them in a dictionary.
# We store 'y_pred' so we can use it for our final CSV.

def evaluate_model(model, X_test, y_test, model_name):
    """Calculates R2, RMSE, and Accuracy metrics for a trained model."""
    
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    r2 = r2_score(y_test, y_pred)
    rmse = math.sqrt(mean_squared_error(y_test, y_pred))
    
    # Calculate percentage error for accuracy
    percent_error = np.abs((y_test - y_pred) / y_test)
    accuracy_10 = np.mean(percent_error < 0.1) * 100  # Within 10%
    accuracy_20 = np.mean(percent_error < 0.2) * 100  # Within 20%
    accuracy_30 = np.mean(percent_error < 0.3) * 100  # Within 30%
    
    print(f"\n--- {model_name} Performance ---")
    print(f"R-squared: {r2:.4f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"Accuracy (error < 10%): {accuracy_10:.2f}%")
    print(f"Accuracy (error < 20%): {accuracy_20:.2f}%")
    print(f"Accuracy (error < 30%): {accuracy_30:.2f}%")
    
    # Return results as a dictionary
    return {
        'model_name': model_name,
        'r2': r2,
        'rmse': rmse,
        'accuracy_10': accuracy_10,
        'accuracy_20': accuracy_20,
        'accuracy_30': accuracy_30,
        'y_pred': y_pred  # Store predictions for final CSV
    }

In [12]:
# --- General Preprocessor (for RF, SVR, XGBoost) ---

# This preprocessor will be used by models that require
# explicit One-Hot Encoding for categorical data.

# Pipeline for numerical features:
# 1. Impute missing values with the median
# 2. Scale features using StandardScaler (very important for SVR)
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Pipeline for categorical features:
# 1. Impute missing values with the string 'missing'
# 2. One-Hot Encode the features
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

# Combine these transformers into a single ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ],
    verbose_feature_names_out=False
)

print("General preprocessor (with One-Hot Encoding) created.")

#keknya gabutuh

General preprocessor (with One-Hot Encoding) created.


In [23]:
#new


In [14]:
from sklearn.model_selection import train_test_split
import numpy as np

# STEP 1: Clean your features first
print("=== Feature Cleaning ===")
print("Current X columns:", list(X_categorical.columns))

if 'property_name' in X_categorical.columns:
    X_categorical = X_categorical.drop('property_name', axis=1)
    print("✅ Removed property_name from features")

if 'distance_to_nearest_mtr_km' in X_categorical.columns:
    X_categorical = X_categorical.drop('distance_to_nearest_mtr_km', axis=1)
    print("✅ Removed distance_to_nearest_mtr_km from features")

# Remove additional columns
columns_to_drop = [
    'latitude', 
    'longitude', 
    'walking_time_to_mtr',
    'category_Community_Facilities_within_1000m', 
    'distance_to_nearest_match_km', 
    'distance_to_nearest_mtr_km', 
    'category_Recreation_within_1000m', 
    'category_Religion_within_2000m', 
    'category_Tourism_within_2000m', 
    'category_Transportation_within_1000m'
]

# Drop columns that exist in the dataframe
columns_dropped = []
for col in columns_to_drop:
    if col in X_categorical.columns:
        X_categorical = X_categorical.drop(col, axis=1)
        columns_dropped.append(col)

if columns_dropped:
    print(f"✅ Removed {len(columns_dropped)} additional columns: {', '.join(columns_dropped)}")
else:
    print("ℹ️ None of the additional columns were found in the dataframe")

if 'pet_policy' in X_categorical.columns:
    X_categorical['pet_policy_binary'] = X_categorical['pet_policy'].notna().astype(int)
    X_categorical = X_categorical.drop('pet_policy', axis=1)
    print("Feature 'pet_policy_binary' created.")

print("Final X columns:", list(X_categorical.columns))
print(f"X shape: {X_categorical.shape}")

# STEP 2: Three-way data split
print("\n=== Creating Three-Way Data Split ===")

# First split: separate out test set (20% of total data)
X_temp, X_test, y_temp, y_test = train_test_split(
    X_categorical, 
    y_categorical, 
    test_size=0.2,      # 20% for final testing
    random_state=42
)

# Second split: divide remaining 80% into train (60% of total) and validation (20% of total)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, 
    y_temp, 
    test_size=0.25,     # 25% of 80% = 20% of total for validation
    random_state=42
)

# Display the split sizes
total_samples = len(X_categorical)
print(f"\nTotal samples: {total_samples:,}")
print(f"Training set: {len(X_train):,} samples ({len(X_train)/total_samples*100:.1f}%)")
print(f"Validation set: {len(X_val):,} samples ({len(X_val)/total_samples*100:.1f}%)")
print(f"Test set: {len(X_test):,} samples ({len(X_test)/total_samples*100:.1f}%)")

# Verify the split adds up correctly
assert len(X_train) + len(X_val) + len(X_test) == total_samples
print(f"✅ Split verification: {len(X_train)} + {len(X_val)} + {len(X_test)} = {total_samples}")

# Check target variable distribution across splits
print(f"\n=== Target Variable Distribution ===")
print(f"Training set - Mean: ${y_train.mean():,.2f}, Std: ${y_train.std():,.2f}")
print(f"Validation set - Mean: ${y_val.mean():,.2f}, Std: ${y_val.std():,.2f}")
print(f"Test set - Mean: ${y_test.mean():,.2f}, Std: ${y_test.std():,.2f}")
print(f"Original data - Mean: ${y_categorical.mean():,.2f}, Std: ${y_categorical.std():,.2f}")

# Check feature consistency across splits
print(f"\n=== Feature Consistency Check ===")
print(f"All splits have same features: {list(X_train.columns) == list(X_val.columns) == list(X_test.columns)}")
print(f"Number of features: {X_train.shape[1]}")

print(f"\n🎯 Data is now properly split and ready for modeling!")
print(f"Next steps:")
print(f"1. Use X_train/y_train for training")
print(f"2. Use X_val/y_val for hyperparameter optimization")
print(f"3. Use X_test/y_test ONLY for final evaluation")

=== Feature Cleaning ===
Current X columns: ['district', 'bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary']
ℹ️ None of the additional columns were found in the dataframe
Final X columns: ['district', 'bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary']
X shape: (6156, 10)

=== Creating Three-Way Data Split ===

Total samples: 6,156
Training set: 3,693 samples (60.0%)
Validation set: 1,231 samples (20.0%)
Test set: 1,232 samples (20.0%)
✅ Split verification: 3693 + 1231 + 1232 = 6156

=== Target Variable Distribution ===
Training set - Mean: $8,899,962.09, Std: $5,112,105.30
Validation set - Mean: $8,811,689.68, Std: $4,790,063.20
Test set - Mea

In [48]:
X_train.columns

Index(['district', 'bedroom_count', 'property_age', 'saleable_area',
       'travel_time_to_cbd', 'total_poi_within_1000m',
       'category_Education_within_2000m', 'category_Medical_within_2000m',
       'category_Public_Market_within_1000m', 'pet_policy_binary'],
      dtype='object')

In [15]:
import optuna
from sklearn.model_selection import cross_val_score
import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

def lgb_objective(trial):
    # Test L1 vs L2 regularization with 2.5-5.0 range
    reg_alpha = trial.suggest_float('reg_alpha', 2.5, 5.0)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 2.5, 5.0)  # L2 regularization
    
    params = {
        'num_leaves': trial.suggest_int('num_leaves', 31, 150),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 15, 35),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.7, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 300),
        'learning_rate': trial.suggest_float('learning_rate', 0.05, 0.2),
        'max_depth': trial.suggest_int('max_depth', 5, 7),
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda,
        'objective': 'regression',
        'random_state': 42,
        'verbosity': -1,
        'categorical_feature': 'auto'
    }
    
    try:
        model = lgb.LGBMRegressor(**params)
        
        # ✅ CHANGE 1: Train on training set only
        model.fit(X_train, y_train)
        
        # ✅ CHANGE 2: Evaluate on validation set (not training!)
        val_predictions = model.predict(X_val)
        val_mse = mean_squared_error(y_val, val_predictions)
        
        # ✅ CHANGE 3: Return negative MSE for maximization
        return -val_mse
        
    except Exception as e:
        print(f"Trial {trial.number} failed: {e}")
        return float('-inf')

# Progress callback
def print_progress(study, trial):
    if trial.number % 25 == 0:
        # ✅ CHANGE 4: Show validation RMSE
        val_rmse = np.sqrt(-study.best_value)
        print(f"Completed {trial.number + 1} trials. Best validation RMSE: ${val_rmse:,.2f}")

print("Starting LightGBM hyperparameter optimization with L1/L2 regularization...")
print("🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)")  # ✅ CHANGE 5
print("Testing 100 different parameter combinations...")
print("Parameter ranges:")
print("  num_leaves: 31-150")
print("  min_data_in_leaf: 15-35") 
print("  feature_fraction: 0.7-1.0")
print("  n_estimators: 100-300")
print("  learning_rate: 0.05-0.2")
print("  max_depth: 5-7")
print("  reg_alpha (L1): 2.5-5.0")
print("  reg_lambda (L2): 2.5-5.0")

print(f"\nUsing proper train/validation/test splits:")
print(f"  Training samples: {len(X_train):,}")
print(f"  Validation samples: {len(X_val):,}")
print(f"  Test samples: {len(X_test):,}")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.NopPruner()
)

study.optimize(
    lgb_objective, 
    n_trials=100,
    timeout=7200,
    callbacks=[print_progress],
    show_progress_bar=True,
    n_jobs=1
)

# ✅ CHANGE 6: Updated results display
best_val_rmse = np.sqrt(-study.best_value)
print(f"\n=== LightGBM Optuna Results (Optimized for Validation) ===")
print(f"Completed {len(study.trials)} out of 100 trials")
print("Best trial:")
print(f"  Trial number: {study.best_trial.number}")
print(f"  Value (neg_MSE): {study.best_trial.value}")
print(f"  Validation RMSE: ${best_val_rmse:,.2f}")
print("  Best params:")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

# Regularization analysis
best_l1 = study.best_trial.params['reg_alpha']
best_l2 = study.best_trial.params['reg_lambda']
print(f"\n=== Regularization Analysis ===")
print(f"Best L1 (reg_alpha): {best_l1:.4f}")
print(f"Best L2 (reg_lambda): {best_l2:.4f}")
print(f"Total regularization: {best_l1 + best_l2:.4f}")

if best_l1 > best_l2 * 1.5:
    print("✨ L1 regularization strongly preferred (feature selection focus)")
elif best_l2 > best_l1 * 1.5:
    print("✨ L2 regularization strongly preferred (weight shrinkage focus)")
elif best_l1 > 0.1 and best_l2 > 0.1:
    print("✨ Mixed L1+L2 regularization preferred (Elastic Net)")
else:
    print("✨ Minimal regularization preferred")

print("\nTraining final LightGBM model with best parameters...")
best_lgb_model = lgb.LGBMRegressor(
    **study.best_trial.params, 
    objective='regression',
    random_state=42,
    verbosity=-1,
    categorical_feature='auto'
)

# Fit on training data only
best_lgb_model.fit(X_train, y_train)

# Get predictions for all three sets
train_predictions = best_lgb_model.predict(X_train)
val_predictions = best_lgb_model.predict(X_val)
test_predictions = best_lgb_model.predict(X_test)

# Calculate metrics for all three sets
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

train_mae = mean_absolute_error(y_train, train_predictions)
val_mae = mean_absolute_error(y_val, val_predictions)
test_mae = mean_absolute_error(y_test, test_predictions)

train_r2 = r2_score(y_train, train_predictions)
val_r2 = r2_score(y_val, val_predictions)
test_r2 = r2_score(y_test, test_predictions)

# Display performance across all sets
print(f"\n=== Model Performance Across All Sets ===")
print(f"Training Set:")
print(f"  RMSE: ${train_rmse:,.2f}")
print(f"  MAE: ${train_mae:,.2f}")
print(f"  R²: {train_r2:.4f}")

print(f"\nValidation Set:")
print(f"  RMSE: ${val_rmse:,.2f}")
print(f"  MAE: ${val_mae:,.2f}")
print(f"  R²: {val_r2:.4f}")

print(f"\nTest Set (UNSEEN DATA):")
print(f"  RMSE: ${test_rmse:,.2f}")
print(f"  MAE: ${test_mae:,.2f}")
print(f"  R²: {test_r2:.4f}")

# Check for overfitting
print(f"\n=== Overfitting Analysis ===")
if train_rmse < val_rmse * 0.8:
    print(f"⚠️  WARNING: Possible overfitting detected!")
    print(f"   Training RMSE ({train_rmse:,.2f}) much lower than validation RMSE ({val_rmse:,.2f})")
else:
    print(f"✅ Model generalizes well - training and validation performance similar")

if abs(val_rmse - test_rmse) / val_rmse < 0.1:
    print(f"✅ Validation set is good proxy for test performance")
else:
    print(f"⚠️  Large difference between validation and test performance")

# Create results with TEST set only (true unseen data)
results_df = X_test.copy()
results_df['actual_price'] = y_test
results_df['lgb_predicted_price'] = test_predictions
results_df['prediction_error'] = results_df['actual_price'] - results_df['lgb_predicted_price']
results_df['absolute_error'] = abs(results_df['prediction_error'])

# Save TEST ONLY results
results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/lgb_predictions_regularized.csv', index=False)

# ✅ NEW: Create combined results with ALL data (train + val + test)
train_df = X_train.copy()
train_df['dataset'] = 'train'
train_df['actual_price'] = y_train
train_df['lgb_predicted_price'] = train_predictions
train_df['prediction_error'] = train_df['actual_price'] - train_df['lgb_predicted_price']
train_df['absolute_error'] = abs(train_df['prediction_error'])

val_df = X_val.copy()
val_df['dataset'] = 'validation'
val_df['actual_price'] = y_val
val_df['lgb_predicted_price'] = val_predictions
val_df['prediction_error'] = val_df['actual_price'] - val_df['lgb_predicted_price']
val_df['absolute_error'] = abs(val_df['prediction_error'])

test_df = X_test.copy()
test_df['dataset'] = 'test'
test_df['actual_price'] = y_test
test_df['lgb_predicted_price'] = test_predictions
test_df['prediction_error'] = test_df['actual_price'] - test_df['lgb_predicted_price']
test_df['absolute_error'] = abs(test_df['prediction_error'])

# Combine all three datasets
all_results_df = pd.concat([train_df, val_df, test_df], ignore_index=True)

# Save COMBINED results
all_results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/lgb_predictions_all_data.csv', index=False)

print(f"\n✅ Saved two CSV files:")
print(f"   1. Test only: lgb_predictions_regularized.csv ({len(results_df):,} rows)")
print(f"   2. All data: lgb_predictions_all_data.csv ({len(all_results_df):,} rows)")

print(f"\n=== Feature Importance Analysis ===")
feature_importance = best_lgb_model.feature_importances_
feature_names = X_train.columns

importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

print("Top features by importance:")
for idx, row in importance_df.head(10).iterrows():
    print(f"  {row['feature']}: {row['importance']:.4f}")

importance_df['importance_pct'] = (importance_df['importance'] / importance_df['importance'].sum()) * 100
importance_df.to_csv('/Users/clarencemarvin/Downloads/regularized2/lgb_feature_importance.csv', index=False)

# Trial summary
failed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.FAIL]
print(f"\nTrial Summary:")
print(f"  Total trials: {len(study.trials)}")
print(f"  Successful trials: {len(study.trials) - len(failed_trials)}")
print(f"  Failed trials: {len(failed_trials)}")

print(f"\n🎯 FINAL RESULT: Test RMSE ${test_rmse:,.2f} on truly unseen data!")

[I 2025-11-24 20:53:18,006] A new study created in memory with name: no-name-2fc46dfc-9629-4492-9aa3-1146e2ef2d1a


Starting LightGBM hyperparameter optimization with L1/L2 regularization...
🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)
Testing 100 different parameter combinations...
Parameter ranges:
  num_leaves: 31-150
  min_data_in_leaf: 15-35
  feature_fraction: 0.7-1.0
  n_estimators: 100-300
  learning_rate: 0.05-0.2
  max_depth: 5-7
  reg_alpha (L1): 2.5-5.0
  reg_lambda (L2): 2.5-5.0

Using proper train/validation/test splits:
  Training samples: 3,693
  Validation samples: 1,231
  Test samples: 1,232


Best trial: 0. Best value: -2.71105e+12:   1%|          | 1/100 [00:00<01:20,  1.24it/s, 0.81/7200 seconds]

[I 2025-11-24 20:53:18,822] Trial 0 finished with value: -2711048151192.732 and parameters: {'reg_alpha': 2.61605014760634, 'reg_lambda': 3.434584137704446, 'max_depth': 7, 'num_leaves': 72, 'min_data_in_leaf': 28, 'feature_fraction': 0.8706648511635897, 'n_estimators': 257, 'learning_rate': 0.11846793264367296}. Best is trial 0 with value: -2711048151192.732.
Completed 1 trials. Best validation RMSE: $1,646,526.09


Best trial: 1. Best value: -2.63204e+12:   2%|▏         | 2/100 [00:01<00:48,  2.03it/s, 1.08/7200 seconds]

[I 2025-11-24 20:53:19,093] Trial 1 finished with value: -2632038959980.827 and parameters: {'reg_alpha': 4.3664147755200196, 'reg_lambda': 3.910329562771055, 'max_depth': 5, 'num_leaves': 31, 'min_data_in_leaf': 31, 'feature_fraction': 0.8836550517340181, 'n_estimators': 132, 'learning_rate': 0.1331668280086195}. Best is trial 1 with value: -2632038959980.827.


Best trial: 2. Best value: -2.56671e+12:   3%|▎         | 3/100 [00:01<00:35,  2.72it/s, 1.30/7200 seconds]

[I 2025-11-24 20:53:19,312] Trial 2 finished with value: -2566713046679.436 and parameters: {'reg_alpha': 2.7996733686566473, 'reg_lambda': 4.656917499967795, 'max_depth': 5, 'num_leaves': 32, 'min_data_in_leaf': 32, 'feature_fraction': 0.8573700637383673, 'n_estimators': 109, 'learning_rate': 0.1995852317234618}. Best is trial 2 with value: -2566713046679.436.


Best trial: 2. Best value: -2.56671e+12:   4%|▍         | 4/100 [00:01<00:42,  2.25it/s, 1.86/7200 seconds]

[I 2025-11-24 20:53:19,877] Trial 3 finished with value: -2707157780014.5166 and parameters: {'reg_alpha': 4.914754127249596, 'reg_lambda': 3.5686301934390405, 'max_depth': 6, 'num_leaves': 32, 'min_data_in_leaf': 33, 'feature_fraction': 0.9320794907284582, 'n_estimators': 198, 'learning_rate': 0.0502342283069299}. Best is trial 2 with value: -2566713046679.436.


Best trial: 2. Best value: -2.56671e+12:   5%|▌         | 5/100 [00:02<00:44,  2.13it/s, 2.38/7200 seconds]

[I 2025-11-24 20:53:20,391] Trial 4 finished with value: -2824601572954.8594 and parameters: {'reg_alpha': 4.529912442371376, 'reg_lambda': 2.8240158234104964, 'max_depth': 5, 'num_leaves': 32, 'min_data_in_leaf': 30, 'feature_fraction': 0.918423930197601, 'n_estimators': 269, 'learning_rate': 0.12786467057749346}. Best is trial 2 with value: -2566713046679.436.


Best trial: 2. Best value: -2.56671e+12:   6%|▌         | 6/100 [00:02<00:43,  2.15it/s, 2.83/7200 seconds]

[I 2025-11-24 20:53:20,848] Trial 5 finished with value: -2655553546752.7505 and parameters: {'reg_alpha': 4.252331698474108, 'reg_lambda': 4.402596281283167, 'max_depth': 6, 'num_leaves': 62, 'min_data_in_leaf': 34, 'feature_fraction': 0.8503226345794072, 'n_estimators': 178, 'learning_rate': 0.08962153817795075}. Best is trial 2 with value: -2566713046679.436.


Best trial: 2. Best value: -2.56671e+12:   7%|▋         | 7/100 [00:03<00:41,  2.23it/s, 3.24/7200 seconds]

[I 2025-11-24 20:53:21,259] Trial 6 finished with value: -2650720077474.5283 and parameters: {'reg_alpha': 4.767042346823313, 'reg_lambda': 4.830947386843764, 'max_depth': 7, 'num_leaves': 32, 'min_data_in_leaf': 30, 'feature_fraction': 0.9818820825208687, 'n_estimators': 101, 'learning_rate': 0.11547841375395018}. Best is trial 2 with value: -2566713046679.436.


Best trial: 2. Best value: -2.56671e+12:   8%|▊         | 8/100 [00:03<00:44,  2.06it/s, 3.81/7200 seconds]

[I 2025-11-24 20:53:21,823] Trial 7 finished with value: -2648450326226.9644 and parameters: {'reg_alpha': 3.8758915485451086, 'reg_lambda': 2.6283550597965046, 'max_depth': 7, 'num_leaves': 74, 'min_data_in_leaf': 16, 'feature_fraction': 0.9793272947456144, 'n_estimators': 100, 'learning_rate': 0.054962595668829314}. Best is trial 2 with value: -2566713046679.436.


Best trial: 2. Best value: -2.56671e+12:   9%|▉         | 9/100 [00:04<00:47,  1.92it/s, 4.41/7200 seconds]

[I 2025-11-24 20:53:22,420] Trial 8 finished with value: -2626333695845.4917 and parameters: {'reg_alpha': 4.747891099788935, 'reg_lambda': 4.258191879294763, 'max_depth': 7, 'num_leaves': 110, 'min_data_in_leaf': 17, 'feature_fraction': 0.9621674247383065, 'n_estimators': 148, 'learning_rate': 0.1932619152048207}. Best is trial 2 with value: -2566713046679.436.


Best trial: 9. Best value: -2.53368e+12:  10%|█         | 10/100 [00:05<00:51,  1.76it/s, 5.09/7200 seconds]

[I 2025-11-24 20:53:23,101] Trial 9 finished with value: -2533678360795.1255 and parameters: {'reg_alpha': 2.9230955312762372, 'reg_lambda': 4.549509377184105, 'max_depth': 7, 'num_leaves': 47, 'min_data_in_leaf': 16, 'feature_fraction': 0.9501020495624447, 'n_estimators': 175, 'learning_rate': 0.08144002398885875}. Best is trial 9 with value: -2533678360795.1255.


Best trial: 10. Best value: -2.48597e+12:  11%|█         | 11/100 [00:05<00:54,  1.65it/s, 5.78/7200 seconds]

[I 2025-11-24 20:53:23,793] Trial 10 finished with value: -2485971187991.113 and parameters: {'reg_alpha': 3.1852475104536158, 'reg_lambda': 4.1051388210043624, 'max_depth': 6, 'num_leaves': 44, 'min_data_in_leaf': 21, 'feature_fraction': 0.7569794058769233, 'n_estimators': 234, 'learning_rate': 0.08312947575099539}. Best is trial 10 with value: -2485971187991.113.


Best trial: 11. Best value: -2.46858e+12:  12%|█▏        | 12/100 [00:06<00:55,  1.59it/s, 6.46/7200 seconds]

[I 2025-11-24 20:53:24,476] Trial 11 finished with value: -2468581799290.2183 and parameters: {'reg_alpha': 3.2989041274571878, 'reg_lambda': 4.024101297186413, 'max_depth': 6, 'num_leaves': 44, 'min_data_in_leaf': 21, 'feature_fraction': 0.7408733627496868, 'n_estimators': 236, 'learning_rate': 0.08267493721765935}. Best is trial 11 with value: -2468581799290.2183.


Best trial: 12. Best value: -2.43712e+12:  13%|█▎        | 13/100 [00:07<00:56,  1.55it/s, 7.14/7200 seconds]

[I 2025-11-24 20:53:25,153] Trial 12 finished with value: -2437120820517.0854 and parameters: {'reg_alpha': 3.2899205108677205, 'reg_lambda': 3.934958188264602, 'max_depth': 6, 'num_leaves': 43, 'min_data_in_leaf': 22, 'feature_fraction': 0.7370954243214991, 'n_estimators': 237, 'learning_rate': 0.08409554113038292}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  14%|█▍        | 14/100 [00:07<00:59,  1.45it/s, 7.93/7200 seconds]

[I 2025-11-24 20:53:25,940] Trial 13 finished with value: -2604491944903.905 and parameters: {'reg_alpha': 3.298277864150966, 'reg_lambda': 3.215495860492651, 'max_depth': 6, 'num_leaves': 40, 'min_data_in_leaf': 23, 'feature_fraction': 0.708581599890619, 'n_estimators': 296, 'learning_rate': 0.1587521522680528}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  15%|█▌        | 15/100 [00:08<00:59,  1.43it/s, 8.66/7200 seconds]

[I 2025-11-24 20:53:26,669] Trial 14 finished with value: -2479659420089.8677 and parameters: {'reg_alpha': 3.6096987240276976, 'reg_lambda': 3.756357226248715, 'max_depth': 6, 'num_leaves': 40, 'min_data_in_leaf': 20, 'feature_fraction': 0.7839822055671384, 'n_estimators': 228, 'learning_rate': 0.09799892763425735}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  16%|█▌        | 16/100 [00:09<00:58,  1.44it/s, 9.34/7200 seconds]

[I 2025-11-24 20:53:27,351] Trial 15 finished with value: -2535261846151.8345 and parameters: {'reg_alpha': 3.7319137757606695, 'reg_lambda': 3.9891250683173034, 'max_depth': 6, 'num_leaves': 53, 'min_data_in_leaf': 26, 'feature_fraction': 0.7910151749458796, 'n_estimators': 229, 'learning_rate': 0.06874980996303626}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  17%|█▋        | 17/100 [00:09<00:53,  1.54it/s, 9.88/7200 seconds]

[I 2025-11-24 20:53:27,898] Trial 16 finished with value: -2559251225966.7993 and parameters: {'reg_alpha': 3.3962319802838965, 'reg_lambda': 3.3111548991337982, 'max_depth': 5, 'num_leaves': 31, 'min_data_in_leaf': 20, 'feature_fraction': 0.7003044182195616, 'n_estimators': 255, 'learning_rate': 0.10431559102866056}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  18%|█▊        | 18/100 [00:10<00:56,  1.46it/s, 10.65/7200 seconds]

[I 2025-11-24 20:53:28,664] Trial 17 finished with value: -2600100043092.878 and parameters: {'reg_alpha': 3.9709079739321327, 'reg_lambda': 3.1195913576619394, 'max_depth': 6, 'num_leaves': 38, 'min_data_in_leaf': 24, 'feature_fraction': 0.7455107021973514, 'n_estimators': 297, 'learning_rate': 0.14523835314393924}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  19%|█▉        | 19/100 [00:11<00:50,  1.61it/s, 11.12/7200 seconds]

[I 2025-11-24 20:53:29,131] Trial 18 finished with value: -2549346019722.4136 and parameters: {'reg_alpha': 2.997013400586595, 'reg_lambda': 3.6918913853903383, 'max_depth': 5, 'num_leaves': 32, 'min_data_in_leaf': 22, 'feature_fraction': 0.8195896735054236, 'n_estimators': 210, 'learning_rate': 0.07002448935363684}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  20%|██        | 20/100 [00:11<00:52,  1.51it/s, 11.87/7200 seconds]

[I 2025-11-24 20:53:29,888] Trial 19 finished with value: -2480078262007.359 and parameters: {'reg_alpha': 3.5294469818716028, 'reg_lambda': 4.226732934789113, 'max_depth': 6, 'num_leaves': 51, 'min_data_in_leaf': 18, 'feature_fraction': 0.7406603546835729, 'n_estimators': 268, 'learning_rate': 0.16710829392932985}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  21%|██        | 21/100 [00:12<00:50,  1.57it/s, 12.46/7200 seconds]

[I 2025-11-24 20:53:30,474] Trial 20 finished with value: -2550923466641.541 and parameters: {'reg_alpha': 3.137568421662972, 'reg_lambda': 3.9381217508366326, 'max_depth': 6, 'num_leaves': 44, 'min_data_in_leaf': 26, 'feature_fraction': 0.8168362448625803, 'n_estimators': 197, 'learning_rate': 0.10538500176491483}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  22%|██▏       | 22/100 [00:13<00:50,  1.53it/s, 13.15/7200 seconds]

[I 2025-11-24 20:53:31,164] Trial 21 finished with value: -2569147655245.3716 and parameters: {'reg_alpha': 3.6899266575748872, 'reg_lambda': 3.6968081772709707, 'max_depth': 6, 'num_leaves': 37, 'min_data_in_leaf': 19, 'feature_fraction': 0.7740000577288864, 'n_estimators': 226, 'learning_rate': 0.09710853202848133}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  23%|██▎       | 23/100 [00:13<00:52,  1.48it/s, 13.88/7200 seconds]

[I 2025-11-24 20:53:31,893] Trial 22 finished with value: -2561951353451.597 and parameters: {'reg_alpha': 3.4856499196826554, 'reg_lambda': 3.7928253807351306, 'max_depth': 6, 'num_leaves': 42, 'min_data_in_leaf': 21, 'feature_fraction': 0.7282728268196084, 'n_estimators': 243, 'learning_rate': 0.0700799444928897}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  24%|██▍       | 24/100 [00:14<00:49,  1.53it/s, 14.48/7200 seconds]

[I 2025-11-24 20:53:32,498] Trial 23 finished with value: -2628050077123.103 and parameters: {'reg_alpha': 2.533794587213845, 'reg_lambda': 3.4839691451097243, 'max_depth': 6, 'num_leaves': 48, 'min_data_in_leaf': 25, 'feature_fraction': 0.7991239519617297, 'n_estimators': 216, 'learning_rate': 0.09363240497369424}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  25%|██▌       | 25/100 [00:15<00:50,  1.49it/s, 15.20/7200 seconds]

[I 2025-11-24 20:53:33,210] Trial 24 finished with value: -2477222568948.6094 and parameters: {'reg_alpha': 3.932913963440229, 'reg_lambda': 4.176308660768124, 'max_depth': 6, 'num_leaves': 37, 'min_data_in_leaf': 19, 'feature_fraction': 0.7661268234744224, 'n_estimators': 246, 'learning_rate': 0.07751191939353623}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  26%|██▌       | 26/100 [00:16<00:53,  1.38it/s, 16.04/7200 seconds]

[I 2025-11-24 20:53:34,057] Trial 25 finished with value: -2524617914824.388 and parameters: {'reg_alpha': 3.91179773177895, 'reg_lambda': 4.391407145917205, 'max_depth': 6, 'num_leaves': 36, 'min_data_in_leaf': 15, 'feature_fraction': 0.721295844109821, 'n_estimators': 275, 'learning_rate': 0.060280957121882625}. Best is trial 12 with value: -2437120820517.0854.
Completed 26 trials. Best validation RMSE: $1,561,128.06


Best trial: 12. Best value: -2.43712e+12:  27%|██▋       | 27/100 [00:17<00:58,  1.26it/s, 17.01/7200 seconds]

[I 2025-11-24 20:53:35,020] Trial 26 finished with value: -2528721071419.5176 and parameters: {'reg_alpha': 4.121979734892551, 'reg_lambda': 4.94671831561144, 'max_depth': 7, 'num_leaves': 60, 'min_data_in_leaf': 23, 'feature_fraction': 0.7656929515201932, 'n_estimators': 284, 'learning_rate': 0.07666983831851683}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  28%|██▊       | 28/100 [00:17<00:51,  1.39it/s, 17.55/7200 seconds]

[I 2025-11-24 20:53:35,567] Trial 27 finished with value: -2491135506321.142 and parameters: {'reg_alpha': 3.2527240443202716, 'reg_lambda': 4.112113122296613, 'max_depth': 5, 'num_leaves': 31, 'min_data_in_leaf': 18, 'feature_fraction': 0.7354854541992972, 'n_estimators': 252, 'learning_rate': 0.11041418394217381}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  29%|██▉       | 29/100 [00:18<00:48,  1.48it/s, 18.13/7200 seconds]

[I 2025-11-24 20:53:36,141] Trial 28 finished with value: -2557628659958.052 and parameters: {'reg_alpha': 3.076376432572644, 'reg_lambda': 4.597804839029429, 'max_depth': 6, 'num_leaves': 35, 'min_data_in_leaf': 22, 'feature_fraction': 0.8098694232814214, 'n_estimators': 185, 'learning_rate': 0.062229376471279796}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  30%|███       | 30/100 [00:18<00:49,  1.41it/s, 18.91/7200 seconds]

[I 2025-11-24 20:53:36,920] Trial 29 finished with value: -2574197790723.998 and parameters: {'reg_alpha': 2.87101698037255, 'reg_lambda': 4.395843118095167, 'max_depth': 7, 'num_leaves': 90, 'min_data_in_leaf': 27, 'feature_fraction': 0.7613584588071722, 'n_estimators': 242, 'learning_rate': 0.08435619246903978}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  31%|███       | 31/100 [00:19<00:47,  1.47it/s, 19.53/7200 seconds]

[I 2025-11-24 20:53:37,542] Trial 30 finished with value: -2498910639454.0854 and parameters: {'reg_alpha': 3.3866380273190733, 'reg_lambda': 4.118693685962589, 'max_depth': 6, 'num_leaves': 45, 'min_data_in_leaf': 19, 'feature_fraction': 0.8292055043412431, 'n_estimators': 213, 'learning_rate': 0.11887904639932007}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  32%|███▏      | 32/100 [00:20<00:47,  1.42it/s, 20.28/7200 seconds]

[I 2025-11-24 20:53:38,293] Trial 31 finished with value: -2543875235976.441 and parameters: {'reg_alpha': 3.6310083406978912, 'reg_lambda': 3.912766223240843, 'max_depth': 6, 'num_leaves': 40, 'min_data_in_leaf': 20, 'feature_fraction': 0.7916670938869347, 'n_estimators': 247, 'learning_rate': 0.09818803432509615}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  33%|███▎      | 33/100 [00:20<00:46,  1.44it/s, 20.95/7200 seconds]

[I 2025-11-24 20:53:38,966] Trial 32 finished with value: -2510913485381.9043 and parameters: {'reg_alpha': 2.695352081645055, 'reg_lambda': 3.7818436102446684, 'max_depth': 6, 'num_leaves': 41, 'min_data_in_leaf': 20, 'feature_fraction': 0.7737494238810436, 'n_estimators': 223, 'learning_rate': 0.07617611093221754}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  34%|███▍      | 34/100 [00:21<00:46,  1.43it/s, 21.66/7200 seconds]

[I 2025-11-24 20:53:39,678] Trial 33 finished with value: -2527405688570.077 and parameters: {'reg_alpha': 3.5925960385640194, 'reg_lambda': 3.5628305767010326, 'max_depth': 6, 'num_leaves': 38, 'min_data_in_leaf': 24, 'feature_fraction': 0.722980877994114, 'n_estimators': 261, 'learning_rate': 0.0901235994024123}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  35%|███▌      | 35/100 [00:22<00:44,  1.45it/s, 22.34/7200 seconds]

[I 2025-11-24 20:53:40,351] Trial 34 finished with value: -2495526216478.5615 and parameters: {'reg_alpha': 4.125304331712108, 'reg_lambda': 3.997262658736381, 'max_depth': 6, 'num_leaves': 42, 'min_data_in_leaf': 22, 'feature_fraction': 0.7816715373752146, 'n_estimators': 238, 'learning_rate': 0.10129472005445792}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  36%|███▌      | 36/100 [00:22<00:39,  1.60it/s, 22.81/7200 seconds]

[I 2025-11-24 20:53:40,820] Trial 35 finished with value: -2531258627156.606 and parameters: {'reg_alpha': 3.799708727155627, 'reg_lambda': 3.374638796907667, 'max_depth': 5, 'num_leaves': 31, 'min_data_in_leaf': 18, 'feature_fraction': 0.8832928887011905, 'n_estimators': 200, 'learning_rate': 0.06388430145879802}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  37%|███▋      | 37/100 [00:23<00:41,  1.52it/s, 23.54/7200 seconds]

[I 2025-11-24 20:53:41,555] Trial 36 finished with value: -2568317556543.1543 and parameters: {'reg_alpha': 3.4033623546389604, 'reg_lambda': 4.264334241253042, 'max_depth': 6, 'num_leaves': 47, 'min_data_in_leaf': 21, 'feature_fraction': 0.7537555498243149, 'n_estimators': 280, 'learning_rate': 0.13810575283085852}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  38%|███▊      | 38/100 [00:23<00:36,  1.68it/s, 23.99/7200 seconds]

[I 2025-11-24 20:53:42,008] Trial 37 finished with value: -2631982976560.6074 and parameters: {'reg_alpha': 4.369593192456804, 'reg_lambda': 3.7813035564497413, 'max_depth': 5, 'num_leaves': 31, 'min_data_in_leaf': 19, 'feature_fraction': 0.8656055981789836, 'n_estimators': 204, 'learning_rate': 0.1244231680812277}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  39%|███▉      | 39/100 [00:24<00:37,  1.62it/s, 24.66/7200 seconds]

[I 2025-11-24 20:53:42,673] Trial 38 finished with value: -2546892213285.167 and parameters: {'reg_alpha': 4.121304301971461, 'reg_lambda': 3.088839823210251, 'max_depth': 6, 'num_leaves': 34, 'min_data_in_leaf': 28, 'feature_fraction': 0.8351876520639616, 'n_estimators': 259, 'learning_rate': 0.088863547949716}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  40%|████      | 40/100 [00:25<00:36,  1.64it/s, 25.25/7200 seconds]

[I 2025-11-24 20:53:43,260] Trial 39 finished with value: -2500358335579.278 and parameters: {'reg_alpha': 2.725051037995803, 'reg_lambda': 3.6019741103887837, 'max_depth': 6, 'num_leaves': 39, 'min_data_in_leaf': 17, 'feature_fraction': 0.9146749903594831, 'n_estimators': 188, 'learning_rate': 0.07667047468584762}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  41%|████      | 41/100 [00:25<00:34,  1.69it/s, 25.80/7200 seconds]

[I 2025-11-24 20:53:43,812] Trial 40 finished with value: -2632840167742.9326 and parameters: {'reg_alpha': 3.256750111953803, 'reg_lambda': 4.750933878284627, 'max_depth': 7, 'num_leaves': 58, 'min_data_in_leaf': 35, 'feature_fraction': 0.7151898381694743, 'n_estimators': 148, 'learning_rate': 0.055240883907465926}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  42%|████▏     | 42/100 [00:26<00:36,  1.58it/s, 26.53/7200 seconds]

[I 2025-11-24 20:53:44,545] Trial 41 finished with value: -2470139043120.1523 and parameters: {'reg_alpha': 3.511369372670495, 'reg_lambda': 4.21099779590459, 'max_depth': 6, 'num_leaves': 51, 'min_data_in_leaf': 18, 'feature_fraction': 0.7415240457228564, 'n_estimators': 263, 'learning_rate': 0.1799213667486237}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  43%|████▎     | 43/100 [00:27<00:36,  1.54it/s, 27.21/7200 seconds]

[I 2025-11-24 20:53:45,228] Trial 42 finished with value: -2561492145627.2715 and parameters: {'reg_alpha': 3.815545536795378, 'reg_lambda': 4.188279056314227, 'max_depth': 6, 'num_leaves': 51, 'min_data_in_leaf': 17, 'feature_fraction': 0.7469851901704643, 'n_estimators': 223, 'learning_rate': 0.18147873690292718}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  44%|████▍     | 44/100 [00:28<00:39,  1.43it/s, 28.03/7200 seconds]

[I 2025-11-24 20:53:46,047] Trial 43 finished with value: -2479155087666.774 and parameters: {'reg_alpha': 3.4937671848450687, 'reg_lambda': 4.42701718782779, 'max_depth': 6, 'num_leaves': 45, 'min_data_in_leaf': 15, 'feature_fraction': 0.7340590150690656, 'n_estimators': 266, 'learning_rate': 0.11201616607718429}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  45%|████▌     | 45/100 [00:28<00:39,  1.38it/s, 28.82/7200 seconds]

[I 2025-11-24 20:53:46,832] Trial 44 finished with value: -2544106203499.113 and parameters: {'reg_alpha': 3.9875633112905446, 'reg_lambda': 4.460079470525733, 'max_depth': 6, 'num_leaves': 49, 'min_data_in_leaf': 15, 'feature_fraction': 0.735880244745466, 'n_estimators': 267, 'learning_rate': 0.13477261518066225}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  46%|████▌     | 46/100 [00:29<00:40,  1.33it/s, 29.64/7200 seconds]

[I 2025-11-24 20:53:47,652] Trial 45 finished with value: -2631258318237.2485 and parameters: {'reg_alpha': 3.492273074333684, 'reg_lambda': 4.292164838114759, 'max_depth': 6, 'num_leaves': 45, 'min_data_in_leaf': 16, 'feature_fraction': 0.7022006998491617, 'n_estimators': 286, 'learning_rate': 0.14728466100014512}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  47%|████▋     | 47/100 [00:30<00:40,  1.31it/s, 30.42/7200 seconds]

[I 2025-11-24 20:53:48,437] Trial 46 finished with value: -2582404569807.919 and parameters: {'reg_alpha': 3.3600697674679902, 'reg_lambda': 4.519338766173164, 'max_depth': 6, 'num_leaves': 55, 'min_data_in_leaf': 15, 'feature_fraction': 0.7188374418982361, 'n_estimators': 248, 'learning_rate': 0.11173444757803923}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  48%|████▊     | 48/100 [00:31<00:38,  1.34it/s, 31.14/7200 seconds]

[I 2025-11-24 20:53:49,151] Trial 47 finished with value: -2584507350154.65 and parameters: {'reg_alpha': 3.033607622507418, 'reg_lambda': 4.749697924256125, 'max_depth': 6, 'num_leaves': 43, 'min_data_in_leaf': 16, 'feature_fraction': 0.7626573971162987, 'n_estimators': 237, 'learning_rate': 0.17114739572024085}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  49%|████▉     | 49/100 [00:32<00:41,  1.24it/s, 32.08/7200 seconds]

[I 2025-11-24 20:53:50,089] Trial 48 finished with value: -2648821152733.4893 and parameters: {'reg_alpha': 3.1560856001225877, 'reg_lambda': 4.026772058523694, 'max_depth': 7, 'num_leaves': 65, 'min_data_in_leaf': 18, 'feature_fraction': 0.731283688433714, 'n_estimators': 261, 'learning_rate': 0.19323873213051604}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  50%|█████     | 50/100 [00:32<00:38,  1.29it/s, 32.79/7200 seconds]

[I 2025-11-24 20:53:50,803] Trial 49 finished with value: -2490197002407.68 and parameters: {'reg_alpha': 3.711711281833646, 'reg_lambda': 4.337638658634669, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 23, 'feature_fraction': 0.7538135396433971, 'n_estimators': 271, 'learning_rate': 0.12407341881868106}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  51%|█████     | 51/100 [00:33<00:32,  1.49it/s, 33.20/7200 seconds]

[I 2025-11-24 20:53:51,219] Trial 50 finished with value: -2559926367857.4233 and parameters: {'reg_alpha': 2.938847698723541, 'reg_lambda': 4.15573023644516, 'max_depth': 6, 'num_leaves': 50, 'min_data_in_leaf': 17, 'feature_fraction': 0.7112003180433626, 'n_estimators': 119, 'learning_rate': 0.1520625396022622}. Best is trial 12 with value: -2437120820517.0854.
Completed 51 trials. Best validation RMSE: $1,561,128.06


Best trial: 12. Best value: -2.43712e+12:  52%|█████▏    | 52/100 [00:33<00:32,  1.48it/s, 33.90/7200 seconds]

[I 2025-11-24 20:53:51,913] Trial 51 finished with value: -2534601012480.8696 and parameters: {'reg_alpha': 3.5937523150674195, 'reg_lambda': 3.872351660108516, 'max_depth': 6, 'num_leaves': 43, 'min_data_in_leaf': 20, 'feature_fraction': 0.7725164116411565, 'n_estimators': 234, 'learning_rate': 0.08560989006716337}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  53%|█████▎    | 53/100 [00:34<00:32,  1.44it/s, 34.64/7200 seconds]

[I 2025-11-24 20:53:52,652] Trial 52 finished with value: -2526658107205.0835 and parameters: {'reg_alpha': 3.481664930960438, 'reg_lambda': 4.0548244613332916, 'max_depth': 6, 'num_leaves': 53, 'min_data_in_leaf': 19, 'feature_fraction': 0.7837524965419802, 'n_estimators': 254, 'learning_rate': 0.10762296621914903}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  54%|█████▍    | 54/100 [00:35<00:30,  1.50it/s, 35.24/7200 seconds]

[I 2025-11-24 20:53:53,257] Trial 53 finished with value: -2623216286450.4297 and parameters: {'reg_alpha': 3.2487016017880697, 'reg_lambda': 3.8952789297956256, 'max_depth': 6, 'num_leaves': 41, 'min_data_in_leaf': 32, 'feature_fraction': 0.742526618504595, 'n_estimators': 230, 'learning_rate': 0.11700529260443962}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  55%|█████▌    | 55/100 [00:36<00:32,  1.40it/s, 36.06/7200 seconds]

[I 2025-11-24 20:53:54,073] Trial 54 finished with value: -2456390314219.0366 and parameters: {'reg_alpha': 3.6764903682503807, 'reg_lambda': 3.6483705632632666, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 21, 'feature_fraction': 0.7996409418963099, 'n_estimators': 292, 'learning_rate': 0.09418778924031931}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  56%|█████▌    | 56/100 [00:37<00:35,  1.23it/s, 37.10/7200 seconds]

[I 2025-11-24 20:53:55,118] Trial 55 finished with value: -2486936124467.187 and parameters: {'reg_alpha': 3.844809879006884, 'reg_lambda': 4.2154576127031795, 'max_depth': 6, 'num_leaves': 48, 'min_data_in_leaf': 21, 'feature_fraction': 0.7442599611846803, 'n_estimators': 288, 'learning_rate': 0.0807636976806241}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  57%|█████▋    | 57/100 [00:37<00:35,  1.23it/s, 37.93/7200 seconds]

[I 2025-11-24 20:53:55,939] Trial 56 finished with value: -2592315692493.5713 and parameters: {'reg_alpha': 3.981698198737009, 'reg_lambda': 3.6396805174698037, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 24, 'feature_fraction': 0.8052333374631938, 'n_estimators': 294, 'learning_rate': 0.07130934426852417}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  58%|█████▊    | 58/100 [00:38<00:33,  1.24it/s, 38.71/7200 seconds]

[I 2025-11-24 20:53:56,722] Trial 57 finished with value: -2503788105346.272 and parameters: {'reg_alpha': 3.332885749862666, 'reg_lambda': 3.499960421382491, 'max_depth': 6, 'num_leaves': 53, 'min_data_in_leaf': 22, 'feature_fraction': 0.7531046725510864, 'n_estimators': 278, 'learning_rate': 0.09450119554518367}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  59%|█████▉    | 59/100 [00:39<00:32,  1.25it/s, 39.49/7200 seconds]

[I 2025-11-24 20:53:57,500] Trial 58 finished with value: -2554213096805.9272 and parameters: {'reg_alpha': 3.4692997968741075, 'reg_lambda': 4.5049158866666685, 'max_depth': 6, 'num_leaves': 49, 'min_data_in_leaf': 21, 'feature_fraction': 0.729083535155809, 'n_estimators': 266, 'learning_rate': 0.10225744686138466}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  60%|██████    | 60/100 [00:40<00:34,  1.16it/s, 40.51/7200 seconds]

[I 2025-11-24 20:53:58,519] Trial 59 finished with value: -2508889212308.0713 and parameters: {'reg_alpha': 3.6626981631638875, 'reg_lambda': 2.5965006894443836, 'max_depth': 6, 'num_leaves': 44, 'min_data_in_leaf': 19, 'feature_fraction': 0.9989272690413872, 'n_estimators': 299, 'learning_rate': 0.05053205223797097}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  61%|██████    | 61/100 [00:41<00:30,  1.26it/s, 41.13/7200 seconds]

[I 2025-11-24 20:53:59,143] Trial 60 finished with value: -2538208575588.4824 and parameters: {'reg_alpha': 4.486594542895438, 'reg_lambda': 4.351539408875012, 'max_depth': 5, 'num_leaves': 31, 'min_data_in_leaf': 23, 'feature_fraction': 0.7962710343470251, 'n_estimators': 274, 'learning_rate': 0.0900010127654229}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  62%|██████▏   | 62/100 [00:41<00:29,  1.30it/s, 41.84/7200 seconds]

[I 2025-11-24 20:53:59,854] Trial 61 finished with value: -2535261060208.9487 and parameters: {'reg_alpha': 3.755634350105846, 'reg_lambda': 3.9608775847094657, 'max_depth': 6, 'num_leaves': 39, 'min_data_in_leaf': 20, 'feature_fraction': 0.7851196791109339, 'n_estimators': 248, 'learning_rate': 0.09908671592604391}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  63%|██████▎   | 63/100 [00:42<00:28,  1.30it/s, 42.61/7200 seconds]

[I 2025-11-24 20:54:00,628] Trial 62 finished with value: -2439705483480.4688 and parameters: {'reg_alpha': 3.517267592653531, 'reg_lambda': 3.8693582945824514, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 18, 'feature_fraction': 0.768250967315769, 'n_estimators': 244, 'learning_rate': 0.07996790126655949}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  64%|██████▍   | 64/100 [00:43<00:26,  1.34it/s, 43.31/7200 seconds]

[I 2025-11-24 20:54:01,326] Trial 63 finished with value: -2495812115484.4873 and parameters: {'reg_alpha': 3.5223291944790116, 'reg_lambda': 3.839405031274432, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 18, 'feature_fraction': 0.7671068534176715, 'n_estimators': 218, 'learning_rate': 0.07776640104390878}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  65%|██████▌   | 65/100 [00:44<00:26,  1.33it/s, 44.08/7200 seconds]

[I 2025-11-24 20:54:02,091] Trial 64 finished with value: -2495824964672.9434 and parameters: {'reg_alpha': 3.5753899745058333, 'reg_lambda': 3.69294403473158, 'max_depth': 6, 'num_leaves': 47, 'min_data_in_leaf': 17, 'feature_fraction': 0.7574776889961379, 'n_estimators': 241, 'learning_rate': 0.07199276728332366}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  66%|██████▌   | 66/100 [00:44<00:26,  1.30it/s, 44.88/7200 seconds]

[I 2025-11-24 20:54:02,895] Trial 65 finished with value: -2466083197110.464 and parameters: {'reg_alpha': 3.1873849908219665, 'reg_lambda': 4.05376090134499, 'max_depth': 6, 'num_leaves': 51, 'min_data_in_leaf': 16, 'feature_fraction': 0.7752877475934545, 'n_estimators': 255, 'learning_rate': 0.08261880957746098}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 12. Best value: -2.43712e+12:  67%|██████▋   | 67/100 [00:45<00:25,  1.28it/s, 45.70/7200 seconds]

[I 2025-11-24 20:54:03,712] Trial 66 finished with value: -2498313913172.2104 and parameters: {'reg_alpha': 3.089740038559877, 'reg_lambda': 4.075862461984644, 'max_depth': 6, 'num_leaves': 51, 'min_data_in_leaf': 16, 'feature_fraction': 0.7719295985619307, 'n_estimators': 255, 'learning_rate': 0.08256446316658822}. Best is trial 12 with value: -2437120820517.0854.


Best trial: 67. Best value: -2.41646e+12:  68%|██████▊   | 68/100 [00:46<00:24,  1.30it/s, 46.43/7200 seconds]

[I 2025-11-24 20:54:04,445] Trial 67 finished with value: -2416455635376.4688 and parameters: {'reg_alpha': 3.1889680893898382, 'reg_lambda': 3.8432363007586283, 'max_depth': 6, 'num_leaves': 55, 'min_data_in_leaf': 19, 'feature_fraction': 0.8403835594633041, 'n_estimators': 233, 'learning_rate': 0.06272047555295636}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  69%|██████▉   | 69/100 [00:47<00:23,  1.32it/s, 47.16/7200 seconds]

[I 2025-11-24 20:54:05,170] Trial 68 finished with value: -2523957070771.3525 and parameters: {'reg_alpha': 4.996696686190323, 'reg_lambda': 3.853094910694582, 'max_depth': 6, 'num_leaves': 55, 'min_data_in_leaf': 18, 'feature_fraction': 0.8353600514587949, 'n_estimators': 208, 'learning_rate': 0.06648525121112628}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  70%|███████   | 70/100 [00:47<00:22,  1.34it/s, 47.87/7200 seconds]

[I 2025-11-24 20:54:05,888] Trial 69 finished with value: -2538004270196.261 and parameters: {'reg_alpha': 3.1672471296329876, 'reg_lambda': 3.9629454206874777, 'max_depth': 6, 'num_leaves': 55, 'min_data_in_leaf': 21, 'feature_fraction': 0.8212494157560184, 'n_estimators': 228, 'learning_rate': 0.0858587761426917}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  71%|███████   | 71/100 [00:48<00:21,  1.34it/s, 48.62/7200 seconds]

[I 2025-11-24 20:54:06,637] Trial 70 finished with value: -2475315776088.8564 and parameters: {'reg_alpha': 3.288327541616899, 'reg_lambda': 3.689634396481515, 'max_depth': 6, 'num_leaves': 57, 'min_data_in_leaf': 20, 'feature_fraction': 0.8595582807784529, 'n_estimators': 221, 'learning_rate': 0.05808301866337214}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  72%|███████▏  | 72/100 [00:49<00:21,  1.31it/s, 49.42/7200 seconds]

[I 2025-11-24 20:54:07,435] Trial 71 finished with value: -2473310492982.7007 and parameters: {'reg_alpha': 3.2877235382346157, 'reg_lambda': 3.7570461784578053, 'max_depth': 6, 'num_leaves': 57, 'min_data_in_leaf': 20, 'feature_fraction': 0.8562452217548701, 'n_estimators': 233, 'learning_rate': 0.05739451095899807}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  73%|███████▎  | 73/100 [00:50<00:20,  1.33it/s, 50.15/7200 seconds]

[I 2025-11-24 20:54:08,160] Trial 72 finished with value: -2511410256468.4834 and parameters: {'reg_alpha': 3.2350971245053746, 'reg_lambda': 3.5349524789129125, 'max_depth': 6, 'num_leaves': 57, 'min_data_in_leaf': 22, 'feature_fraction': 0.8807990662038626, 'n_estimators': 233, 'learning_rate': 0.06439603785818865}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  74%|███████▍  | 74/100 [00:50<00:20,  1.29it/s, 50.99/7200 seconds]

[I 2025-11-24 20:54:09,000] Trial 73 finished with value: -2507625432118.468 and parameters: {'reg_alpha': 2.962588244548023, 'reg_lambda': 3.7374006982610997, 'max_depth': 6, 'num_leaves': 52, 'min_data_in_leaf': 19, 'feature_fraction': 0.8463937899437325, 'n_estimators': 251, 'learning_rate': 0.05244830576713682}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  75%|███████▌  | 75/100 [00:51<00:19,  1.30it/s, 51.73/7200 seconds]

[I 2025-11-24 20:54:09,742] Trial 74 finished with value: -2524014302735.709 and parameters: {'reg_alpha': 3.414373435301236, 'reg_lambda': 3.4433965292656756, 'max_depth': 6, 'num_leaves': 60, 'min_data_in_leaf': 21, 'feature_fraction': 0.9119913812965251, 'n_estimators': 242, 'learning_rate': 0.06790287212886782}. Best is trial 67 with value: -2416455635376.4688.


Best trial: 67. Best value: -2.41646e+12:  76%|███████▌  | 76/100 [00:52<00:17,  1.40it/s, 52.33/7200 seconds]

[I 2025-11-24 20:54:10,342] Trial 75 finished with value: -2479709793216.714 and parameters: {'reg_alpha': 2.862040724625973, 'reg_lambda': 3.825256038491321, 'max_depth': 6, 'num_leaves': 50, 'min_data_in_leaf': 17, 'feature_fraction': 0.8488888005393033, 'n_estimators': 161, 'learning_rate': 0.05877264988553233}. Best is trial 67 with value: -2416455635376.4688.
Completed 76 trials. Best validation RMSE: $1,554,495.30


Best trial: 76. Best value: -2.40509e+12:  77%|███████▋  | 77/100 [00:53<00:17,  1.30it/s, 53.22/7200 seconds]

[I 2025-11-24 20:54:11,238] Trial 76 finished with value: -2405087905258.0024 and parameters: {'reg_alpha': 3.096882141506273, 'reg_lambda': 4.0226004436973675, 'max_depth': 6, 'num_leaves': 49, 'min_data_in_leaf': 18, 'feature_fraction': 0.8901570382889373, 'n_estimators': 262, 'learning_rate': 0.07417854704442373}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  78%|███████▊  | 78/100 [00:54<00:17,  1.25it/s, 54.09/7200 seconds]

[I 2025-11-24 20:54:12,108] Trial 77 finished with value: -2503192387715.6816 and parameters: {'reg_alpha': 3.10772477565472, 'reg_lambda': 4.115997011204604, 'max_depth': 6, 'num_leaves': 49, 'min_data_in_leaf': 18, 'feature_fraction': 0.9287905577414488, 'n_estimators': 263, 'learning_rate': 0.07376807131880314}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  79%|███████▉  | 79/100 [00:54<00:16,  1.24it/s, 54.92/7200 seconds]

[I 2025-11-24 20:54:12,937] Trial 78 finished with value: -2510887131612.554 and parameters: {'reg_alpha': 3.031982495423549, 'reg_lambda': 4.039722246727249, 'max_depth': 6, 'num_leaves': 54, 'min_data_in_leaf': 16, 'feature_fraction': 0.9054566531589413, 'n_estimators': 257, 'learning_rate': 0.09349828564389302}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  80%|████████  | 80/100 [00:55<00:17,  1.14it/s, 55.95/7200 seconds]

[I 2025-11-24 20:54:13,961] Trial 79 finished with value: -2530759705529.8076 and parameters: {'reg_alpha': 3.1972587852455474, 'reg_lambda': 3.9270312333805726, 'max_depth': 6, 'num_leaves': 43, 'min_data_in_leaf': 19, 'feature_fraction': 0.8999868348786204, 'n_estimators': 291, 'learning_rate': 0.08033347310479044}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  81%|████████  | 81/100 [00:56<00:17,  1.12it/s, 56.90/7200 seconds]

[I 2025-11-24 20:54:14,910] Trial 80 finished with value: -2582586180117.833 and parameters: {'reg_alpha': 3.419558854360014, 'reg_lambda': 2.828189055829185, 'max_depth': 6, 'num_leaves': 52, 'min_data_in_leaf': 18, 'feature_fraction': 0.9568750163228599, 'n_estimators': 273, 'learning_rate': 0.0866038076719549}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  82%|████████▏ | 82/100 [00:57<00:15,  1.18it/s, 57.62/7200 seconds]

[I 2025-11-24 20:54:15,638] Trial 81 finished with value: -2495914067957.053 and parameters: {'reg_alpha': 3.314038073538403, 'reg_lambda': 3.7433687894934904, 'max_depth': 6, 'num_leaves': 48, 'min_data_in_leaf': 20, 'feature_fraction': 0.8723624921367081, 'n_estimators': 235, 'learning_rate': 0.05628341689536721}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  83%|████████▎ | 83/100 [00:58<00:14,  1.20it/s, 58.43/7200 seconds]

[I 2025-11-24 20:54:16,445] Trial 82 finished with value: -2472584880080.8667 and parameters: {'reg_alpha': 3.3652552905435766, 'reg_lambda': 3.6253438376995177, 'max_depth': 6, 'num_leaves': 50, 'min_data_in_leaf': 17, 'feature_fraction': 0.8101578385841256, 'n_estimators': 243, 'learning_rate': 0.07478878934627702}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  84%|████████▍ | 84/100 [00:59<00:13,  1.15it/s, 59.37/7200 seconds]

[I 2025-11-24 20:54:17,385] Trial 83 finished with value: -2437485520783.288 and parameters: {'reg_alpha': 3.2042095004830213, 'reg_lambda': 3.6232547479360138, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 17, 'feature_fraction': 0.8148904262164596, 'n_estimators': 281, 'learning_rate': 0.07423485141915143}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  85%|████████▌ | 85/100 [01:00<00:12,  1.16it/s, 60.21/7200 seconds]

[I 2025-11-24 20:54:18,226] Trial 84 finished with value: -2418456532692.7246 and parameters: {'reg_alpha': 2.8581276096298756, 'reg_lambda': 3.293216635588105, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 17, 'feature_fraction': 0.8253283453119211, 'n_estimators': 251, 'learning_rate': 0.08132481362709132}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  86%|████████▌ | 86/100 [01:01<00:11,  1.18it/s, 61.05/7200 seconds]

[I 2025-11-24 20:54:19,060] Trial 85 finished with value: -2456476747908.0327 and parameters: {'reg_alpha': 2.8238375702121696, 'reg_lambda': 3.2568275533570463, 'max_depth': 6, 'num_leaves': 44, 'min_data_in_leaf': 17, 'feature_fraction': 0.8264598103643422, 'n_estimators': 283, 'learning_rate': 0.08003110575782878}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  87%|████████▋ | 87/100 [01:01<00:11,  1.18it/s, 61.90/7200 seconds]

[I 2025-11-24 20:54:19,910] Trial 86 finished with value: -2471332351202.0073 and parameters: {'reg_alpha': 2.7255315485527705, 'reg_lambda': 3.139536529391749, 'max_depth': 6, 'num_leaves': 45, 'min_data_in_leaf': 16, 'feature_fraction': 0.8263699854409913, 'n_estimators': 281, 'learning_rate': 0.07972378115398794}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  88%|████████▊ | 88/100 [01:02<00:10,  1.10it/s, 62.95/7200 seconds]

[I 2025-11-24 20:54:20,967] Trial 87 finished with value: -2464685929516.0063 and parameters: {'reg_alpha': 2.5612334105160928, 'reg_lambda': 3.2644164088406478, 'max_depth': 6, 'num_leaves': 47, 'min_data_in_leaf': 15, 'feature_fraction': 0.8419301311240719, 'n_estimators': 282, 'learning_rate': 0.062242680535419564}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  89%|████████▉ | 89/100 [01:03<00:10,  1.05it/s, 64.00/7200 seconds]

[I 2025-11-24 20:54:22,009] Trial 88 finished with value: -2447336692900.596 and parameters: {'reg_alpha': 2.5696067864167915, 'reg_lambda': 3.251929948343596, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 15, 'feature_fraction': 0.8371541411508688, 'n_estimators': 283, 'learning_rate': 0.06336742872189674}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  90%|█████████ | 90/100 [01:04<00:09,  1.07it/s, 64.89/7200 seconds]

[I 2025-11-24 20:54:22,903] Trial 89 finished with value: -2466954092144.092 and parameters: {'reg_alpha': 2.880172319203346, 'reg_lambda': 3.3293879758430265, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 17, 'feature_fraction': 0.8103760328214958, 'n_estimators': 290, 'learning_rate': 0.06806186733336085}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  91%|█████████ | 91/100 [01:05<00:08,  1.07it/s, 65.82/7200 seconds]

[I 2025-11-24 20:54:23,833] Trial 90 finished with value: -2473234740636.6313 and parameters: {'reg_alpha': 2.8055008485803223, 'reg_lambda': 2.9335029631447025, 'max_depth': 6, 'num_leaves': 44, 'min_data_in_leaf': 15, 'feature_fraction': 0.8373302660179772, 'n_estimators': 300, 'learning_rate': 0.07165989055097863}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  92%|█████████▏| 92/100 [01:06<00:07,  1.07it/s, 66.76/7200 seconds]

[I 2025-11-24 20:54:24,771] Trial 91 finished with value: -2408384982277.664 and parameters: {'reg_alpha': 2.515077243502068, 'reg_lambda': 3.2582062611643074, 'max_depth': 6, 'num_leaves': 47, 'min_data_in_leaf': 15, 'feature_fraction': 0.8414762142683974, 'n_estimators': 284, 'learning_rate': 0.06576265830723753}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  93%|█████████▎| 93/100 [01:07<00:06,  1.08it/s, 67.65/7200 seconds]

[I 2025-11-24 20:54:25,668] Trial 92 finished with value: -2476439008183.875 and parameters: {'reg_alpha': 2.6775047743184657, 'reg_lambda': 3.071542639211996, 'max_depth': 6, 'num_leaves': 42, 'min_data_in_leaf': 16, 'feature_fraction': 0.8262884019204643, 'n_estimators': 277, 'learning_rate': 0.06506327336943932}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  94%|█████████▍| 94/100 [01:08<00:05,  1.09it/s, 68.55/7200 seconds]

[I 2025-11-24 20:54:26,567] Trial 93 finished with value: -2486222553633.337 and parameters: {'reg_alpha': 2.5066496279900505, 'reg_lambda': 3.3888502124881104, 'max_depth': 6, 'num_leaves': 45, 'min_data_in_leaf': 17, 'feature_fraction': 0.8162643469574269, 'n_estimators': 294, 'learning_rate': 0.0749791948854822}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  95%|█████████▌| 95/100 [01:09<00:04,  1.08it/s, 69.50/7200 seconds]

[I 2025-11-24 20:54:27,515] Trial 94 finished with value: -2514552896282.513 and parameters: {'reg_alpha': 2.589458837567069, 'reg_lambda': 3.202790140961672, 'max_depth': 6, 'num_leaves': 47, 'min_data_in_leaf': 15, 'feature_fraction': 0.8016014776711416, 'n_estimators': 287, 'learning_rate': 0.061581946781792934}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  96%|█████████▌| 96/100 [01:10<00:03,  1.11it/s, 70.33/7200 seconds]

[I 2025-11-24 20:54:28,347] Trial 95 finished with value: -2480825277370.4043 and parameters: {'reg_alpha': 2.6218213027765653, 'reg_lambda': 3.0408232721457957, 'max_depth': 6, 'num_leaves': 44, 'min_data_in_leaf': 16, 'feature_fraction': 0.855734015547025, 'n_estimators': 271, 'learning_rate': 0.07008359234707662}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  97%|█████████▋| 97/100 [01:11<00:03,  1.00s/it, 71.58/7200 seconds]

[I 2025-11-24 20:54:29,591] Trial 96 finished with value: -2506907415075.0376 and parameters: {'reg_alpha': 2.8040104378589157, 'reg_lambda': 3.196124360255759, 'max_depth': 6, 'num_leaves': 48, 'min_data_in_leaf': 19, 'feature_fraction': 0.8311095963940086, 'n_estimators': 283, 'learning_rate': 0.07831497144636868}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  98%|█████████▊| 98/100 [01:12<00:02,  1.01s/it, 72.60/7200 seconds]

[I 2025-11-24 20:54:30,617] Trial 97 finished with value: -2675276820673.502 and parameters: {'reg_alpha': 2.6525789535529496, 'reg_lambda': 3.2657980642489335, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 30, 'feature_fraction': 0.8180317862009517, 'n_estimators': 277, 'learning_rate': 0.09085506048773409}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12:  99%|█████████▉| 99/100 [01:13<00:01,  1.02s/it, 73.64/7200 seconds]

[I 2025-11-24 20:54:31,656] Trial 98 finished with value: -2549024201818.788 and parameters: {'reg_alpha': 3.0012904864641086, 'reg_lambda': 3.45900176081594, 'max_depth': 6, 'num_leaves': 41, 'min_data_in_leaf': 18, 'feature_fraction': 0.8427121859571446, 'n_estimators': 292, 'learning_rate': 0.08756019314343683}. Best is trial 76 with value: -2405087905258.0024.


Best trial: 76. Best value: -2.40509e+12: 100%|██████████| 100/100 [01:15<00:00,  1.32it/s, 75.49/7200 seconds]


[I 2025-11-24 20:54:33,505] Trial 99 finished with value: -2484754499796.9434 and parameters: {'reg_alpha': 2.7760411941427856, 'reg_lambda': 2.964813980095601, 'max_depth': 6, 'num_leaves': 45, 'min_data_in_leaf': 17, 'feature_fraction': 0.8231050919293389, 'n_estimators': 268, 'learning_rate': 0.06657417310238027}. Best is trial 76 with value: -2405087905258.0024.

=== LightGBM Optuna Results (Optimized for Validation) ===
Completed 100 out of 100 trials
Best trial:
  Trial number: 76
  Value (neg_MSE): -2405087905258.0024
  Validation RMSE: $1,550,834.58
  Best params:
    reg_alpha: 3.096882141506273
    reg_lambda: 4.0226004436973675
    max_depth: 6
    num_leaves: 49
    min_data_in_leaf: 18
    feature_fraction: 0.8901570382889373
    n_estimators: 262
    learning_rate: 0.07417854704442373

=== Regularization Analysis ===
Best L1 (reg_alpha): 3.0969
Best L2 (reg_lambda): 4.0226
Total regularization: 7.1195
✨ Mixed L1+L2 regularization preferred (Elastic Net)

Training final Li

In [50]:
import optuna
from sklearn.model_selection import cross_val_score
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

def xgb_objective(trial):
    reg_alpha = trial.suggest_float('reg_alpha', 2.5, 5.0)
    reg_lambda = trial.suggest_float('reg_lambda', 2.5, 5.0)
    
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 0.85),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 200),
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda,
        'objective': 'reg:squarederror',
        'random_state': 42,
        'verbosity': 0,
        'n_jobs': -1,
        'enable_categorical': True
    }
    
    try:
        model = xgb.XGBRegressor(**params)
        
        # ✅ CHANGE 1: Train on training set only
        model.fit(X_train, y_train)
        
        # ✅ CHANGE 2: Evaluate on validation set (not training!)
        val_predictions = model.predict(X_val)
        val_mse = mean_squared_error(y_val, val_predictions)
        
        # ✅ CHANGE 3: Return negative MSE for maximization
        return -val_mse
        
    except Exception as e:
        print(f"Trial {trial.number} failed: {e}")
        return float('-inf')

def print_progress(study, trial):
    if trial.number % 25 == 0:
        # ✅ CHANGE 4: Show validation RMSE
        val_rmse = np.sqrt(-study.best_value)
        print(f"Completed {trial.number + 1} trials. Best validation RMSE: ${val_rmse:,.2f}")

print("Starting XGBoost hyperparameter optimization...")
print("🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)")  # ✅ CHANGE 5
print(f"Training samples: {len(X_train):,}")

study = optuna.create_study(direction='maximize')
study.optimize(
    xgb_objective, 
    n_trials=100,
    callbacks=[print_progress],
    show_progress_bar=True
)

# ✅ CHANGE 6: Updated results display
best_val_rmse = np.sqrt(-study.best_value)
print(f"\n=== XGBoost Results (Optimized for Validation) ===")
print(f"Best validation RMSE: ${best_val_rmse:,.2f}")
print("Best params:")
for key, value in study.best_trial.params.items():
    print(f"  {key}: {value}")

# Regularization analysis
best_l1 = study.best_trial.params['reg_alpha']
best_l2 = study.best_trial.params['reg_lambda']
print(f"\nRegularization: L1={best_l1:.3f}, L2={best_l2:.3f}")

# Train final model
best_xgb_model = xgb.XGBRegressor(
    **study.best_trial.params, 
    objective='reg:squarederror',
    random_state=42,
    verbosity=0,
    n_jobs=-1,
    enable_categorical=True
)

best_xgb_model.fit(X_train, y_train)

# Get predictions for all sets
train_predictions = best_xgb_model.predict(X_train)
val_predictions = best_xgb_model.predict(X_val)
test_predictions = best_xgb_model.predict(X_test)

# Calculate metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

train_r2 = r2_score(y_train, train_predictions)
val_r2 = r2_score(y_val, val_predictions)
test_r2 = r2_score(y_test, test_predictions)

print(f"\n=== Performance ===")
print(f"Training:   RMSE ${train_rmse:,.2f}, R² {train_r2:.4f}")
print(f"Validation: RMSE ${val_rmse:,.2f}, R² {val_r2:.4f}")
print(f"Test:       RMSE ${test_rmse:,.2f}, R² {test_r2:.4f}")

# Overfitting check
if train_rmse < val_rmse * 0.8:
    print(f"⚠️  Possible overfitting detected")
else:
    print(f"✅ Good generalization")

# Create results with TEST set only (true unseen data)
results_df = X_test.copy()
results_df['actual_price'] = y_test
results_df['xgb_predicted_price'] = test_predictions
results_df['prediction_error'] = results_df['actual_price'] - results_df['xgb_predicted_price']
results_df['absolute_error'] = abs(results_df['prediction_error'])

# Save TEST ONLY results
results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/xgb_predictions_regularized.csv', index=False)

# ✅ NEW: Create combined results with ALL data (train + val + test)
train_df = X_train.copy()
train_df['dataset'] = 'train'
train_df['actual_price'] = y_train
train_df['xgb_predicted_price'] = train_predictions
train_df['prediction_error'] = train_df['actual_price'] - train_df['xgb_predicted_price']
train_df['absolute_error'] = abs(train_df['prediction_error'])

val_df = X_val.copy()
val_df['dataset'] = 'validation'
val_df['actual_price'] = y_val
val_df['xgb_predicted_price'] = val_predictions
val_df['prediction_error'] = val_df['actual_price'] - val_df['xgb_predicted_price']
val_df['absolute_error'] = abs(val_df['prediction_error'])

test_df = X_test.copy()
test_df['dataset'] = 'test'
test_df['actual_price'] = y_test
test_df['xgb_predicted_price'] = test_predictions
test_df['prediction_error'] = test_df['actual_price'] - test_df['xgb_predicted_price']
test_df['absolute_error'] = abs(test_df['prediction_error'])

# Combine all three datasets
all_results_df = pd.concat([train_df, val_df, test_df], ignore_index=True)

# Save COMBINED results
all_results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/xgb_predictions_all_data.csv', index=False)

print(f"\n✅ Saved two CSV files:")
print(f"   1. Test only: lgb_predictions_regularized.csv ({len(results_df):,} rows)")
print(f"   2. All data: lgb_predictions_all_data.csv ({len(all_results_df):,} rows)")

# Feature importance
importance_df = pd.DataFrame({
    'feature': X_train.columns,
    'importance': best_xgb_model.feature_importances_
}).sort_values('importance', ascending=False)

importance_df['importance_pct'] = (importance_df['importance'] / importance_df['importance'].sum()) * 100

print(f"\n=== Top Features ===")
for idx, row in importance_df.head(10).iterrows():
    print(f"{row['feature']}: {row['importance_pct']:.1f}%")

importance_df.to_csv('/Users/clarencemarvin/Downloads/regularized2/xgb_feature_importance.csv', index=False)

print(f"\nFinal test RMSE: ${test_rmse:,.2f}")

[I 2025-11-11 12:50:26,499] A new study created in memory with name: no-name-a60f551c-f205-4e1e-9c5c-fcf8d697c68a


Starting XGBoost hyperparameter optimization...
🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)
Training samples: 3,693


Best trial: 0. Best value: -2.6686e+12:   1%|          | 1/100 [00:00<01:32,  1.07it/s]

[I 2025-11-11 12:50:27,439] Trial 0 finished with value: -2668601802752.0 and parameters: {'reg_alpha': 3.5179657189844993, 'reg_lambda': 4.772757441512988, 'max_depth': 7, 'min_child_weight': 7, 'subsample': 0.7861611456586872, 'colsample_bytree': 0.8458964844682395, 'learning_rate': 0.2909076896438328, 'n_estimators': 109}. Best is trial 0 with value: -2668601802752.0.
Completed 1 trials. Best validation RMSE: $1,633,585.57


Best trial: 1. Best value: -2.48223e+12:   2%|▏         | 2/100 [00:01<01:26,  1.13it/s]

[I 2025-11-11 12:50:28,289] Trial 1 finished with value: -2482232360960.0 and parameters: {'reg_alpha': 4.287382496385552, 'reg_lambda': 2.9552781436660007, 'max_depth': 7, 'min_child_weight': 7, 'subsample': 0.9608887221790804, 'colsample_bytree': 0.779057450111344, 'learning_rate': 0.07133524315682921, 'n_estimators': 118}. Best is trial 1 with value: -2482232360960.0.


Best trial: 1. Best value: -2.48223e+12:   3%|▎         | 3/100 [00:02<01:03,  1.53it/s]

[I 2025-11-11 12:50:28,664] Trial 2 finished with value: -2531399565312.0 and parameters: {'reg_alpha': 4.152565238607976, 'reg_lambda': 3.702253893942373, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8794622964823186, 'colsample_bytree': 0.7845870146030046, 'learning_rate': 0.19475686154909647, 'n_estimators': 126}. Best is trial 1 with value: -2482232360960.0.


Best trial: 3. Best value: -2.43638e+12:   4%|▍         | 4/100 [00:03<01:13,  1.30it/s]

[I 2025-11-11 12:50:29,605] Trial 3 finished with value: -2436384423936.0 and parameters: {'reg_alpha': 3.955804426905622, 'reg_lambda': 2.661824281816942, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.9366243951260883, 'colsample_bytree': 0.7777572090126624, 'learning_rate': 0.051917700019497005, 'n_estimators': 194}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:   5%|▌         | 5/100 [00:03<01:02,  1.51it/s]

[I 2025-11-11 12:50:30,078] Trial 4 finished with value: -2588318367744.0 and parameters: {'reg_alpha': 3.3989451393562775, 'reg_lambda': 3.555235766678689, 'max_depth': 4, 'min_child_weight': 7, 'subsample': 0.8244024025193813, 'colsample_bytree': 0.8481818753542736, 'learning_rate': 0.10334506151442373, 'n_estimators': 168}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:   6%|▌         | 6/100 [00:04<01:18,  1.20it/s]

[I 2025-11-11 12:50:31,243] Trial 5 finished with value: -2839137746944.0 and parameters: {'reg_alpha': 3.3846046476743146, 'reg_lambda': 4.420616454784077, 'max_depth': 8, 'min_child_weight': 2, 'subsample': 0.8315938668769255, 'colsample_bytree': 0.7580372630245108, 'learning_rate': 0.18303798198229482, 'n_estimators': 190}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:   7%|▋         | 7/100 [00:05<01:01,  1.50it/s]

[I 2025-11-11 12:50:31,569] Trial 6 finished with value: -2551592517632.0 and parameters: {'reg_alpha': 4.411513010087963, 'reg_lambda': 3.597395151787583, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.8992003740530207, 'colsample_bytree': 0.7319968516216683, 'learning_rate': 0.11055870518451619, 'n_estimators': 193}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:   8%|▊         | 8/100 [00:05<00:50,  1.83it/s]

[I 2025-11-11 12:50:31,860] Trial 7 finished with value: -2763293196288.0 and parameters: {'reg_alpha': 3.8559398221294527, 'reg_lambda': 3.08189229578495, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.7070378846963646, 'colsample_bytree': 0.837858180002949, 'learning_rate': 0.28023812053869623, 'n_estimators': 136}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:   9%|▉         | 9/100 [00:05<00:50,  1.80it/s]

[I 2025-11-11 12:50:32,436] Trial 8 finished with value: -2898582306816.0 and parameters: {'reg_alpha': 2.7903304841798233, 'reg_lambda': 2.6064892472178958, 'max_depth': 7, 'min_child_weight': 2, 'subsample': 0.7957243564904546, 'colsample_bytree': 0.8104996188365675, 'learning_rate': 0.24685054619055052, 'n_estimators': 127}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  10%|█         | 10/100 [00:06<00:53,  1.70it/s]

[I 2025-11-11 12:50:33,100] Trial 9 finished with value: -2757652381696.0 and parameters: {'reg_alpha': 3.2039657561931483, 'reg_lambda': 2.741416199055889, 'max_depth': 7, 'min_child_weight': 10, 'subsample': 0.858729848196418, 'colsample_bytree': 0.7022938351134923, 'learning_rate': 0.27151236808172763, 'n_estimators': 112}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  11%|█         | 11/100 [00:06<00:46,  1.91it/s]

[I 2025-11-11 12:50:33,473] Trial 10 finished with value: -4563695304704.0 and parameters: {'reg_alpha': 4.9814090170240775, 'reg_lambda': 4.182111624224218, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.9755268090279539, 'colsample_bytree': 0.7451429059928588, 'learning_rate': 0.010038287810283333, 'n_estimators': 163}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  12%|█▏        | 12/100 [00:07<00:45,  1.95it/s]

[I 2025-11-11 12:50:33,958] Trial 11 finished with value: -2825230483456.0 and parameters: {'reg_alpha': 4.645281309571747, 'reg_lambda': 3.10568399357944, 'max_depth': 6, 'min_child_weight': 8, 'subsample': 0.9863472959807484, 'colsample_bytree': 0.7867450607849152, 'learning_rate': 0.02337768352088379, 'n_estimators': 150}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  13%|█▎        | 13/100 [00:08<00:47,  1.83it/s]

[I 2025-11-11 12:50:34,586] Trial 12 finished with value: -2451805569024.0 and parameters: {'reg_alpha': 4.045142861928527, 'reg_lambda': 3.032809041867451, 'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.9228777455311591, 'colsample_bytree': 0.8036940596804598, 'learning_rate': 0.06752275600732234, 'n_estimators': 177}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  14%|█▍        | 14/100 [00:08<00:46,  1.84it/s]

[I 2025-11-11 12:50:35,120] Trial 13 finished with value: -2521290506240.0 and parameters: {'reg_alpha': 3.870732890732572, 'reg_lambda': 3.302331094441663, 'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.9235076911360658, 'colsample_bytree': 0.8083921528876304, 'learning_rate': 0.06030364240790825, 'n_estimators': 178}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  15%|█▌        | 15/100 [00:09<00:51,  1.66it/s]

[I 2025-11-11 12:50:35,864] Trial 14 finished with value: -2575294005248.0 and parameters: {'reg_alpha': 4.02491147413255, 'reg_lambda': 2.5374952398300072, 'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.9309456525342888, 'colsample_bytree': 0.8122937916132076, 'learning_rate': 0.13597367097325747, 'n_estimators': 200}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  16%|█▌        | 16/100 [00:10<00:57,  1.45it/s]

[I 2025-11-11 12:50:36,751] Trial 15 finished with value: -2519301619712.0 and parameters: {'reg_alpha': 4.547637662943853, 'reg_lambda': 4.082963019259012, 'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.920951685831415, 'colsample_bytree': 0.7636422499472172, 'learning_rate': 0.060260160315943403, 'n_estimators': 177}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  17%|█▋        | 17/100 [00:10<00:49,  1.66it/s]

[I 2025-11-11 12:50:37,150] Trial 16 finished with value: -2474175102976.0 and parameters: {'reg_alpha': 3.7395155535421574, 'reg_lambda': 3.3087980007114224, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9490643431240393, 'colsample_bytree': 0.800200021880692, 'learning_rate': 0.09728219098177673, 'n_estimators': 156}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  18%|█▊        | 18/100 [00:11<00:45,  1.80it/s]

[I 2025-11-11 12:50:37,598] Trial 17 finished with value: -2541600112640.0 and parameters: {'reg_alpha': 2.9791014514302403, 'reg_lambda': 2.81932455695779, 'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.8930810144046838, 'colsample_bytree': 0.830750649488196, 'learning_rate': 0.15303711106023826, 'n_estimators': 182}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  19%|█▉        | 19/100 [00:11<00:44,  1.84it/s]

[I 2025-11-11 12:50:38,114] Trial 18 finished with value: -2548524646400.0 and parameters: {'reg_alpha': 4.859619441928914, 'reg_lambda': 3.2270523109833786, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.987779071290105, 'colsample_bytree': 0.8263256314787547, 'learning_rate': 0.035718265310407706, 'n_estimators': 148}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  20%|██        | 20/100 [00:12<00:41,  1.93it/s]

[I 2025-11-11 12:50:38,574] Trial 19 finished with value: -2634699505664.0 and parameters: {'reg_alpha': 3.638956651491929, 'reg_lambda': 2.8502695103522435, 'max_depth': 5, 'min_child_weight': 6, 'subsample': 0.7023843823060123, 'colsample_bytree': 0.7239207235736379, 'learning_rate': 0.07431915362935726, 'n_estimators': 167}. Best is trial 3 with value: -2436384423936.0.


Best trial: 3. Best value: -2.43638e+12:  21%|██        | 21/100 [00:12<00:35,  2.19it/s]

[I 2025-11-11 12:50:38,885] Trial 20 finished with value: -3033547931648.0 and parameters: {'reg_alpha': 2.537393053404619, 'reg_lambda': 2.5023418481326605, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.8649757537048535, 'colsample_bytree': 0.7680890027044385, 'learning_rate': 0.0389327102168952, 'n_estimators': 187}. Best is trial 3 with value: -2436384423936.0.


Best trial: 21. Best value: -2.34499e+12:  22%|██▏       | 22/100 [00:12<00:36,  2.16it/s]

[I 2025-11-11 12:50:39,366] Trial 21 finished with value: -2344991850496.0 and parameters: {'reg_alpha': 4.100414915497453, 'reg_lambda': 3.408629146435299, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9477561420909133, 'colsample_bytree': 0.7986303167632675, 'learning_rate': 0.10265446240537503, 'n_estimators': 162}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  23%|██▎       | 23/100 [00:13<00:39,  1.93it/s]

[I 2025-11-11 12:50:40,012] Trial 22 finished with value: -2447898574848.0 and parameters: {'reg_alpha': 4.098643488358348, 'reg_lambda': 3.4302162891261236, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.9434542575598545, 'colsample_bytree': 0.7954593232988215, 'learning_rate': 0.12700252786125243, 'n_estimators': 175}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  24%|██▍       | 24/100 [00:14<00:41,  1.85it/s]

[I 2025-11-11 12:50:40,605] Trial 23 finished with value: -2472065892352.0 and parameters: {'reg_alpha': 4.25484448936276, 'reg_lambda': 3.945991223017982, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9982411014314587, 'colsample_bytree': 0.7930618964017021, 'learning_rate': 0.1320032048171646, 'n_estimators': 197}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  25%|██▌       | 25/100 [00:14<00:35,  2.10it/s]

[I 2025-11-11 12:50:40,932] Trial 24 finished with value: -2643459571712.0 and parameters: {'reg_alpha': 3.958150445276625, 'reg_lambda': 3.43809957309981, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.9504631901715481, 'colsample_bytree': 0.822846486139964, 'learning_rate': 0.1865317045746111, 'n_estimators': 160}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  26%|██▌       | 26/100 [00:15<00:42,  1.75it/s]

[I 2025-11-11 12:50:41,724] Trial 25 finished with value: -2508212142080.0 and parameters: {'reg_alpha': 4.390924591054534, 'reg_lambda': 3.8892028358147854, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.9013522685297977, 'colsample_bytree': 0.772117771987334, 'learning_rate': 0.12326842910565353, 'n_estimators': 172}. Best is trial 21 with value: -2344991850496.0.
Completed 26 trials. Best validation RMSE: $1,531,336.62


Best trial: 21. Best value: -2.34499e+12:  27%|██▋       | 27/100 [00:16<00:46,  1.55it/s]

[I 2025-11-11 12:50:42,538] Trial 26 finished with value: -2522471727104.0 and parameters: {'reg_alpha': 4.645805548068699, 'reg_lambda': 3.454096292645081, 'max_depth': 7, 'min_child_weight': 1, 'subsample': 0.9514585153575822, 'colsample_bytree': 0.7477092356922487, 'learning_rate': 0.09153335372925804, 'n_estimators': 100}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  28%|██▊       | 28/100 [00:16<00:41,  1.72it/s]

[I 2025-11-11 12:50:42,971] Trial 27 finished with value: -2554892910592.0 and parameters: {'reg_alpha': 4.13661056189099, 'reg_lambda': 3.818851809669211, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9645450526116203, 'colsample_bytree': 0.7953247446494777, 'learning_rate': 0.15371432890797715, 'n_estimators': 184}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  29%|██▉       | 29/100 [00:16<00:39,  1.82it/s]

[I 2025-11-11 12:50:43,444] Trial 28 finished with value: -2566954942464.0 and parameters: {'reg_alpha': 3.7237238864886875, 'reg_lambda': 4.471689960352744, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.9370711590439399, 'colsample_bytree': 0.7799447972161216, 'learning_rate': 0.21732115791350934, 'n_estimators': 147}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  30%|███       | 30/100 [00:17<00:41,  1.69it/s]

[I 2025-11-11 12:50:44,141] Trial 29 finished with value: -2625526824960.0 and parameters: {'reg_alpha': 3.5706049009022043, 'reg_lambda': 4.346365550578326, 'max_depth': 8, 'min_child_weight': 10, 'subsample': 0.7625408570761882, 'colsample_bytree': 0.7913002233972937, 'learning_rate': 0.16440738838219737, 'n_estimators': 156}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  31%|███       | 31/100 [00:18<00:40,  1.72it/s]

[I 2025-11-11 12:50:44,698] Trial 30 finished with value: -2586202603520.0 and parameters: {'reg_alpha': 4.5008769923389345, 'reg_lambda': 4.860745575979756, 'max_depth': 7, 'min_child_weight': 1, 'subsample': 0.8757730364341139, 'colsample_bytree': 0.8200154453550828, 'learning_rate': 0.04009472461206427, 'n_estimators': 140}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  32%|███▏      | 32/100 [00:18<00:40,  1.66it/s]

[I 2025-11-11 12:50:45,345] Trial 31 finished with value: -2512304996352.0 and parameters: {'reg_alpha': 4.08884524305837, 'reg_lambda': 3.0812414777624326, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.90743985748128, 'colsample_bytree': 0.8025959758846499, 'learning_rate': 0.0783156338515716, 'n_estimators': 174}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  33%|███▎      | 33/100 [00:19<00:40,  1.66it/s]

[I 2025-11-11 12:50:45,950] Trial 32 finished with value: -2512929947648.0 and parameters: {'reg_alpha': 4.24406023912918, 'reg_lambda': 3.017498881894456, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.9678342769454962, 'colsample_bytree': 0.7764384252122362, 'learning_rate': 0.05980145811383461, 'n_estimators': 181}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  34%|███▍      | 34/100 [00:20<00:44,  1.48it/s]

[I 2025-11-11 12:50:46,797] Trial 33 finished with value: -2655661064192.0 and parameters: {'reg_alpha': 3.9021601398511487, 'reg_lambda': 2.7522901682929453, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.9196790236820988, 'colsample_bytree': 0.8151001488123543, 'learning_rate': 0.08838592347402728, 'n_estimators': 192}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  35%|███▌      | 35/100 [00:20<00:38,  1.71it/s]

[I 2025-11-11 12:50:47,171] Trial 34 finished with value: -2395029635072.0 and parameters: {'reg_alpha': 4.064579895537997, 'reg_lambda': 2.9209810486552756, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9402255967060091, 'colsample_bytree': 0.7841548920343767, 'learning_rate': 0.05100689590062739, 'n_estimators': 169}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  36%|███▌      | 36/100 [00:21<00:35,  1.82it/s]

[I 2025-11-11 12:50:47,641] Trial 35 finished with value: -2420934443008.0 and parameters: {'reg_alpha': 4.307501981367139, 'reg_lambda': 3.6207146494239515, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9422568077780225, 'colsample_bytree': 0.7574570138140143, 'learning_rate': 0.11008359929044692, 'n_estimators': 168}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  37%|███▋      | 37/100 [00:21<00:30,  2.08it/s]

[I 2025-11-11 12:50:47,961] Trial 36 finished with value: -2571820597248.0 and parameters: {'reg_alpha': 4.334775361264548, 'reg_lambda': 3.7282034832191586, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.9750750051582564, 'colsample_bytree': 0.7494397516320682, 'learning_rate': 0.10983968517572526, 'n_estimators': 167}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  38%|███▊      | 38/100 [00:21<00:29,  2.12it/s]

[I 2025-11-11 12:50:48,411] Trial 37 finished with value: -2441062121472.0 and parameters: {'reg_alpha': 3.34850429803826, 'reg_lambda': 3.591480540924609, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8394893336250777, 'colsample_bytree': 0.7356538624408353, 'learning_rate': 0.04821943083155988, 'n_estimators': 159}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  39%|███▉      | 39/100 [00:22<00:25,  2.37it/s]

[I 2025-11-11 12:50:48,718] Trial 38 finished with value: -3390801707008.0 and parameters: {'reg_alpha': 4.260185216663325, 'reg_lambda': 2.6797200787809468, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.8849387398342494, 'colsample_bytree': 0.7627862130455328, 'learning_rate': 0.01983212527390875, 'n_estimators': 139}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  40%|████      | 40/100 [00:22<00:24,  2.41it/s]

[I 2025-11-11 12:50:49,118] Trial 39 finished with value: -2464489930752.0 and parameters: {'reg_alpha': 3.798915281589494, 'reg_lambda': 2.89496032026721, 'max_depth': 5, 'min_child_weight': 9, 'subsample': 0.7909564938311056, 'colsample_bytree': 0.7568525775889925, 'learning_rate': 0.08227776007248654, 'n_estimators': 169}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  41%|████      | 41/100 [00:22<00:22,  2.64it/s]

[I 2025-11-11 12:50:49,411] Trial 40 finished with value: -2492125675520.0 and parameters: {'reg_alpha': 4.450593689349794, 'reg_lambda': 3.2194934260039947, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.8086005467402908, 'colsample_bytree': 0.7832736837445733, 'learning_rate': 0.09987991702785765, 'n_estimators': 129}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  42%|████▏     | 42/100 [00:23<00:22,  2.59it/s]

[I 2025-11-11 12:50:49,813] Trial 41 finished with value: -2444404457472.0 and parameters: {'reg_alpha': 3.397602082070031, 'reg_lambda': 3.6020372829208136, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8368442808450868, 'colsample_bytree': 0.7265567124708008, 'learning_rate': 0.053425975997066284, 'n_estimators': 161}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  43%|████▎     | 43/100 [00:23<00:23,  2.44it/s]

[I 2025-11-11 12:50:50,279] Trial 42 finished with value: -2502248628224.0 and parameters: {'reg_alpha': 3.218211878446793, 'reg_lambda': 3.6246560686363853, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.7692591076675568, 'colsample_bytree': 0.7382246467293683, 'learning_rate': 0.04448923642808406, 'n_estimators': 163}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  44%|████▍     | 44/100 [00:24<00:20,  2.73it/s]

[I 2025-11-11 12:50:50,545] Trial 43 finished with value: -2943438290944.0 and parameters: {'reg_alpha': 3.2864528381493248, 'reg_lambda': 3.492904597429303, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.8492034476306627, 'colsample_bytree': 0.7100972476803313, 'learning_rate': 0.02586660069502824, 'n_estimators': 156}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  45%|████▌     | 45/100 [00:24<00:19,  2.75it/s]

[I 2025-11-11 12:50:50,899] Trial 44 finished with value: -2420673347584.0 and parameters: {'reg_alpha': 3.4869092676964915, 'reg_lambda': 3.7139403848406154, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.907749615778149, 'colsample_bytree': 0.7541937903920239, 'learning_rate': 0.0766206910609883, 'n_estimators': 153}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  46%|████▌     | 46/100 [00:24<00:19,  2.77it/s]

[I 2025-11-11 12:50:51,256] Trial 45 finished with value: -2619015168000.0 and parameters: {'reg_alpha': 3.5403029045246805, 'reg_lambda': 4.067573579935986, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.9132101823479811, 'colsample_bytree': 0.7591976548494699, 'learning_rate': 0.11079745434066123, 'n_estimators': 150}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  47%|████▋     | 47/100 [00:25<00:21,  2.48it/s]

[I 2025-11-11 12:50:51,756] Trial 46 finished with value: -2410068574208.0 and parameters: {'reg_alpha': 3.9559243190879596, 'reg_lambda': 2.6220890411956663, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9327391430009412, 'colsample_bytree': 0.7727911385966408, 'learning_rate': 0.07378230783561215, 'n_estimators': 170}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  48%|████▊     | 48/100 [00:25<00:20,  2.55it/s]

[I 2025-11-11 12:50:52,124] Trial 47 finished with value: -2385466097664.0 and parameters: {'reg_alpha': 3.646990959889958, 'reg_lambda': 3.7499027785848145, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9549255917917369, 'colsample_bytree': 0.773391395332422, 'learning_rate': 0.07287133734947544, 'n_estimators': 170}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  49%|████▉     | 49/100 [00:26<00:20,  2.44it/s]

[I 2025-11-11 12:50:52,572] Trial 48 finished with value: -2471358103552.0 and parameters: {'reg_alpha': 3.478636855918271, 'reg_lambda': 3.773425150765166, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9575439585118646, 'colsample_bytree': 0.772044907795152, 'learning_rate': 0.07551486825412253, 'n_estimators': 143}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  50%|█████     | 50/100 [00:26<00:19,  2.56it/s]

[I 2025-11-11 12:50:52,921] Trial 49 finished with value: -2724055220224.0 and parameters: {'reg_alpha': 3.6645475136721264, 'reg_lambda': 2.949733395270065, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.9791588548502635, 'colsample_bytree': 0.7836908743673685, 'learning_rate': 0.06612059006482793, 'n_estimators': 164}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  51%|█████     | 51/100 [00:26<00:19,  2.49it/s]

[I 2025-11-11 12:50:53,348] Trial 50 finished with value: -2427663941632.0 and parameters: {'reg_alpha': 3.9760253995499903, 'reg_lambda': 3.992093296809552, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9301318071313136, 'colsample_bytree': 0.7879972501970739, 'learning_rate': 0.08944750114849119, 'n_estimators': 153}. Best is trial 21 with value: -2344991850496.0.
Completed 51 trials. Best validation RMSE: $1,531,336.62


Best trial: 21. Best value: -2.34499e+12:  52%|█████▏    | 52/100 [00:27<00:20,  2.39it/s]

[I 2025-11-11 12:50:53,804] Trial 51 finished with value: -2428719857664.0 and parameters: {'reg_alpha': 3.816704996680926, 'reg_lambda': 3.703987785500496, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9375703007112308, 'colsample_bytree': 0.754778005564399, 'learning_rate': 0.12241391054185982, 'n_estimators': 172}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  53%|█████▎    | 53/100 [00:27<00:19,  2.47it/s]

[I 2025-11-11 12:50:54,176] Trial 52 finished with value: -2398028038144.0 and parameters: {'reg_alpha': 4.1818162683612945, 'reg_lambda': 3.3439619971539964, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9593853366095224, 'colsample_bytree': 0.7661796994296901, 'learning_rate': 0.10728723459384942, 'n_estimators': 168}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  54%|█████▍    | 54/100 [00:28<00:19,  2.42it/s]

[I 2025-11-11 12:50:54,612] Trial 53 finished with value: -2505255682048.0 and parameters: {'reg_alpha': 4.18257784230014, 'reg_lambda': 3.189346601591239, 'max_depth': 5, 'min_child_weight': 7, 'subsample': 0.961151223524098, 'colsample_bytree': 0.7770702235923423, 'learning_rate': 0.14420462895597663, 'n_estimators': 180}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  55%|█████▌    | 55/100 [00:28<00:18,  2.38it/s]

[I 2025-11-11 12:50:55,048] Trial 54 finished with value: -2470491455488.0 and parameters: {'reg_alpha': 3.0771185463468362, 'reg_lambda': 3.3385082294960586, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9964036254331959, 'colsample_bytree': 0.7644281919427123, 'learning_rate': 0.06767035017080802, 'n_estimators': 170}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  56%|█████▌    | 56/100 [00:28<00:17,  2.51it/s]

[I 2025-11-11 12:50:55,393] Trial 55 finished with value: -2659046916096.0 and parameters: {'reg_alpha': 3.8654686188108425, 'reg_lambda': 3.8730807631898605, 'max_depth': 4, 'min_child_weight': 6, 'subsample': 0.8889783314620524, 'colsample_bytree': 0.7670619030220269, 'learning_rate': 0.09797875877110124, 'n_estimators': 165}. Best is trial 21 with value: -2344991850496.0.


Best trial: 21. Best value: -2.34499e+12:  57%|█████▋    | 57/100 [00:29<00:17,  2.40it/s]

[I 2025-11-11 12:50:55,853] Trial 56 finished with value: -2529941520384.0 and parameters: {'reg_alpha': 4.008769639103079, 'reg_lambda': 3.354574125304735, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.9096200987471909, 'colsample_bytree': 0.7417048071278799, 'learning_rate': 0.03200285579995117, 'n_estimators': 186}. Best is trial 21 with value: -2344991850496.0.


Best trial: 57. Best value: -2.33613e+12:  58%|█████▊    | 58/100 [00:29<00:16,  2.57it/s]

[I 2025-11-11 12:50:56,180] Trial 57 finished with value: -2336134791168.0 and parameters: {'reg_alpha': 3.684410793923637, 'reg_lambda': 2.6264481483167406, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.973517713603637, 'colsample_bytree': 0.7513359410701771, 'learning_rate': 0.07929027941658047, 'n_estimators': 154}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  59%|█████▉    | 59/100 [00:30<00:15,  2.65it/s]

[I 2025-11-11 12:50:56,528] Trial 58 finished with value: -2616137875456.0 and parameters: {'reg_alpha': 3.6851660404653965, 'reg_lambda': 2.6847383484936485, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.9841762204632499, 'colsample_bytree': 0.7714448587939324, 'learning_rate': 0.16850422473616314, 'n_estimators': 177}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  60%|██████    | 60/100 [00:30<00:18,  2.19it/s]

[I 2025-11-11 12:50:57,167] Trial 59 finished with value: -2603955519488.0 and parameters: {'reg_alpha': 4.17140467211754, 'reg_lambda': 2.5856748813271184, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.971750532820359, 'colsample_bytree': 0.7807579692922358, 'learning_rate': 0.12005852692418884, 'n_estimators': 159}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  61%|██████    | 61/100 [00:31<00:18,  2.15it/s]

[I 2025-11-11 12:50:57,655] Trial 60 finished with value: -2462607212544.0 and parameters: {'reg_alpha': 3.787880746543009, 'reg_lambda': 2.8416780514689455, 'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.955567486628127, 'colsample_bytree': 0.788120340874289, 'learning_rate': 0.05412776529972296, 'n_estimators': 172}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  62%|██████▏   | 62/100 [00:31<00:16,  2.28it/s]

[I 2025-11-11 12:50:58,033] Trial 61 finished with value: -2408596897792.0 and parameters: {'reg_alpha': 3.575549714203669, 'reg_lambda': 2.736117112897888, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.934151928569718, 'colsample_bytree': 0.7545521525487547, 'learning_rate': 0.07981018499418907, 'n_estimators': 154}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  63%|██████▎   | 63/100 [00:31<00:14,  2.52it/s]

[I 2025-11-11 12:50:58,333] Trial 62 finished with value: -2387726565376.0 and parameters: {'reg_alpha': 3.628330782141403, 'reg_lambda': 2.763316463645087, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9258279482949265, 'colsample_bytree': 0.7989270785536665, 'learning_rate': 0.06545940663110643, 'n_estimators': 145}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  64%|██████▍   | 64/100 [00:32<00:13,  2.63it/s]

[I 2025-11-11 12:50:58,672] Trial 63 finished with value: -2438846218240.0 and parameters: {'reg_alpha': 3.5730004456346802, 'reg_lambda': 2.725955623939659, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.950202491619947, 'colsample_bytree': 0.7986533862919647, 'learning_rate': 0.08824424430870946, 'n_estimators': 145}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  65%|██████▌   | 65/100 [00:32<00:13,  2.68it/s]

[I 2025-11-11 12:50:59,028] Trial 64 finished with value: -2498968420352.0 and parameters: {'reg_alpha': 3.622124967787439, 'reg_lambda': 2.7950576630836665, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9883621083552498, 'colsample_bytree': 0.8049608460466049, 'learning_rate': 0.06248499031125879, 'n_estimators': 131}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  67%|██████▋   | 67/100 [00:33<00:10,  3.17it/s]

[I 2025-11-11 12:50:59,381] Trial 65 finished with value: -2497325563904.0 and parameters: {'reg_alpha': 3.432905642296946, 'reg_lambda': 2.9367224483404217, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.9245270626555047, 'colsample_bytree': 0.8414031816167425, 'learning_rate': 0.10517026645176843, 'n_estimators': 151}. Best is trial 57 with value: -2336134791168.0.
[I 2025-11-11 12:50:59,575] Trial 66 finished with value: -2765919879168.0 and parameters: {'reg_alpha': 3.9058021218763823, 'reg_lambda': 3.0998538284559904, 'max_depth': 3, 'min_child_weight': 2, 'subsample': 0.9663901663809251, 'colsample_bytree': 0.7481834198266039, 'learning_rate': 0.13815470753789755, 'n_estimators': 123}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  68%|██████▊   | 68/100 [00:33<00:10,  3.20it/s]

[I 2025-11-11 12:50:59,883] Trial 67 finished with value: -2657459372032.0 and parameters: {'reg_alpha': 3.7690147034776604, 'reg_lambda': 2.5671636088170775, 'max_depth': 4, 'min_child_weight': 8, 'subsample': 0.9466343319439079, 'colsample_bytree': 0.8092196350319972, 'learning_rate': 0.08404297975585662, 'n_estimators': 158}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  69%|██████▉   | 69/100 [00:33<00:10,  2.82it/s]

[I 2025-11-11 12:51:00,334] Trial 68 finished with value: -2560461111296.0 and parameters: {'reg_alpha': 4.642043765255633, 'reg_lambda': 3.003776300843345, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8976293260045749, 'colsample_bytree': 0.7913397651773729, 'learning_rate': 0.22322305681488486, 'n_estimators': 154}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  70%|███████   | 70/100 [00:34<00:11,  2.72it/s]

[I 2025-11-11 12:51:00,734] Trial 69 finished with value: -2457559629824.0 and parameters: {'reg_alpha': 4.064627945559192, 'reg_lambda': 2.871246603248648, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9592415082763226, 'colsample_bytree': 0.7983823984850319, 'learning_rate': 0.05228082066907393, 'n_estimators': 142}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  71%|███████   | 71/100 [00:34<00:10,  2.89it/s]

[I 2025-11-11 12:51:01,030] Trial 70 finished with value: -4881802330112.0 and parameters: {'reg_alpha': 3.7149786690063387, 'reg_lambda': 3.1478368191449357, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.9444132210408367, 'colsample_bytree': 0.7666474504186763, 'learning_rate': 0.011021301860623188, 'n_estimators': 135}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  72%|███████▏  | 72/100 [00:34<00:09,  2.82it/s]

[I 2025-11-11 12:51:01,402] Trial 71 finished with value: -2405322457088.0 and parameters: {'reg_alpha': 3.9207361267083396, 'reg_lambda': 2.634056984159671, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9307517339681977, 'colsample_bytree': 0.7618870508854928, 'learning_rate': 0.07107482960011849, 'n_estimators': 163}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  73%|███████▎  | 73/100 [00:35<00:10,  2.60it/s]

[I 2025-11-11 12:51:01,860] Trial 72 finished with value: -2425999327232.0 and parameters: {'reg_alpha': 3.887752797562906, 'reg_lambda': 2.770472528248086, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9217010591378574, 'colsample_bytree': 0.7532106793074912, 'learning_rate': 0.09576611972023716, 'n_estimators': 162}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  74%|███████▍  | 74/100 [00:35<00:10,  2.37it/s]

[I 2025-11-11 12:51:02,369] Trial 73 finished with value: -2427277017088.0 and parameters: {'reg_alpha': 3.6060979622030103, 'reg_lambda': 2.5333357181988814, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9365317798051342, 'colsample_bytree': 0.7608624320322112, 'learning_rate': 0.07030027949415624, 'n_estimators': 166}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  75%|███████▌  | 75/100 [00:36<00:10,  2.45it/s]

[I 2025-11-11 12:51:02,742] Trial 74 finished with value: -2390249701376.0 and parameters: {'reg_alpha': 4.043900770383514, 'reg_lambda': 2.698731217686374, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8737211103177681, 'colsample_bytree': 0.7516344275079861, 'learning_rate': 0.05878068359303206, 'n_estimators': 147}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  76%|███████▌  | 76/100 [00:36<00:10,  2.30it/s]

[I 2025-11-11 12:51:03,242] Trial 75 finished with value: -2436960354304.0 and parameters: {'reg_alpha': 4.200669255082184, 'reg_lambda': 2.688986167170659, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.8216238468898294, 'colsample_bytree': 0.8160290734380014, 'learning_rate': 0.042227421621601056, 'n_estimators': 147}. Best is trial 57 with value: -2336134791168.0.
Completed 76 trials. Best validation RMSE: $1,528,441.95


Best trial: 57. Best value: -2.33613e+12:  77%|███████▋  | 77/100 [00:37<00:09,  2.45it/s]

[I 2025-11-11 12:51:03,586] Trial 76 finished with value: -2500684939264.0 and parameters: {'reg_alpha': 4.105640260244904, 'reg_lambda': 2.5000278360600867, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8737296817987016, 'colsample_bytree': 0.7504835052791712, 'learning_rate': 0.03258253797315639, 'n_estimators': 150}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  78%|███████▊  | 78/100 [00:37<00:08,  2.56it/s]

[I 2025-11-11 12:51:03,936] Trial 77 finished with value: -2486211706880.0 and parameters: {'reg_alpha': 4.057243650607784, 'reg_lambda': 2.607746088017614, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.9758269680542699, 'colsample_bytree': 0.7432168298703091, 'learning_rate': 0.058901119316511895, 'n_estimators': 136}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  79%|███████▉  | 79/100 [00:37<00:08,  2.58it/s]

[I 2025-11-11 12:51:04,315] Trial 78 finished with value: -2613407907840.0 and parameters: {'reg_alpha': 4.0010309205461985, 'reg_lambda': 3.258884034057312, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.7270874986327766, 'colsample_bytree': 0.7694134684107115, 'learning_rate': 0.04995836264942893, 'n_estimators': 174}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  80%|████████  | 80/100 [00:38<00:08,  2.34it/s]

[I 2025-11-11 12:51:04,835] Trial 79 finished with value: -2641807540224.0 and parameters: {'reg_alpha': 4.343755674006333, 'reg_lambda': 4.212685499215637, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.9156903821856918, 'colsample_bytree': 0.7758554694280408, 'learning_rate': 0.29637206590480103, 'n_estimators': 157}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  81%|████████  | 81/100 [00:38<00:07,  2.47it/s]

[I 2025-11-11 12:51:05,186] Trial 80 finished with value: -2447074394112.0 and parameters: {'reg_alpha': 3.837791771382251, 'reg_lambda': 3.5193987992227354, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.9681781452435477, 'colsample_bytree': 0.782745385554406, 'learning_rate': 0.11637086720075068, 'n_estimators': 161}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  82%|████████▏ | 82/100 [00:39<00:07,  2.54it/s]

[I 2025-11-11 12:51:05,557] Trial 81 finished with value: -2414232207360.0 and parameters: {'reg_alpha': 3.696974410854173, 'reg_lambda': 2.7812190657912708, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9321068560507508, 'colsample_bytree': 0.762551400783164, 'learning_rate': 0.08503540575274748, 'n_estimators': 146}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  83%|████████▎ | 83/100 [00:39<00:06,  2.51it/s]

[I 2025-11-11 12:51:05,965] Trial 82 finished with value: -2449146642432.0 and parameters: {'reg_alpha': 3.929668415832183, 'reg_lambda': 2.6424433776302942, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9276399374983235, 'colsample_bytree': 0.7298311382091449, 'learning_rate': 0.07955372680131553, 'n_estimators': 163}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  84%|████████▍ | 84/100 [00:39<00:06,  2.48it/s]

[I 2025-11-11 12:51:06,378] Trial 83 finished with value: -2422483451904.0 and parameters: {'reg_alpha': 3.3241332703910214, 'reg_lambda': 2.894053797231348, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9432459727037618, 'colsample_bytree': 0.7456688472819014, 'learning_rate': 0.06893061821293353, 'n_estimators': 155}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  85%|████████▌ | 85/100 [00:40<00:06,  2.41it/s]

[I 2025-11-11 12:51:06,823] Trial 84 finished with value: -2473406496768.0 and parameters: {'reg_alpha': 3.750936982682092, 'reg_lambda': 2.7177566365060253, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9021061435764726, 'colsample_bytree': 0.7520138186888271, 'learning_rate': 0.10483024158123683, 'n_estimators': 166}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  86%|████████▌ | 86/100 [00:40<00:06,  2.30it/s]

[I 2025-11-11 12:51:07,303] Trial 85 finished with value: -2372791959552.0 and parameters: {'reg_alpha': 4.12899573833607, 'reg_lambda': 2.962205058158254, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9543039331455311, 'colsample_bytree': 0.7568620905822588, 'learning_rate': 0.06122486877023667, 'n_estimators': 151}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  87%|████████▋ | 87/100 [00:41<00:05,  2.39it/s]

[I 2025-11-11 12:51:07,681] Trial 86 finished with value: -2426576830464.0 and parameters: {'reg_alpha': 4.390796704779155, 'reg_lambda': 2.964761375924984, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.9550412790356761, 'colsample_bytree': 0.7390190447601404, 'learning_rate': 0.058744396868889465, 'n_estimators': 143}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  88%|████████▊ | 88/100 [00:41<00:05,  2.36it/s]

[I 2025-11-11 12:51:08,120] Trial 87 finished with value: -2494734532608.0 and parameters: {'reg_alpha': 4.1416084992738345, 'reg_lambda': 3.4034936007145253, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9822371354140794, 'colsample_bytree': 0.8049113240620616, 'learning_rate': 0.09375474323022912, 'n_estimators': 152}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  89%|████████▉ | 89/100 [00:41<00:04,  2.59it/s]

[I 2025-11-11 12:51:08,416] Trial 88 finished with value: -2703022620672.0 and parameters: {'reg_alpha': 4.221768191140893, 'reg_lambda': 3.054149250153843, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.964100961035446, 'colsample_bytree': 0.75884883223802, 'learning_rate': 0.04474136209789213, 'n_estimators': 148}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  90%|█████████ | 90/100 [00:42<00:03,  2.60it/s]

[I 2025-11-11 12:51:08,799] Trial 89 finished with value: -2560965738496.0 and parameters: {'reg_alpha': 4.281796310644443, 'reg_lambda': 2.9072323788430237, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8650616919207367, 'colsample_bytree': 0.7954506974457011, 'learning_rate': 0.027849643167851615, 'n_estimators': 168}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  91%|█████████ | 91/100 [00:42<00:03,  2.47it/s]

[I 2025-11-11 12:51:09,248] Trial 90 finished with value: -2558583111680.0 and parameters: {'reg_alpha': 4.01555805803757, 'reg_lambda': 2.831596957791903, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.9929167960704876, 'colsample_bytree': 0.7648826126368459, 'learning_rate': 0.037244947656434095, 'n_estimators': 159}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  92%|█████████▏| 92/100 [00:43<00:03,  2.64it/s]

[I 2025-11-11 12:51:09,568] Trial 91 finished with value: -2464148619264.0 and parameters: {'reg_alpha': 3.512901426616117, 'reg_lambda': 2.63770041348677, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9408210669327101, 'colsample_bytree': 0.7566165292096869, 'learning_rate': 0.0738436656453885, 'n_estimators': 141}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  93%|█████████▎| 93/100 [00:43<00:02,  2.69it/s]

[I 2025-11-11 12:51:09,924] Trial 92 finished with value: -2443128340480.0 and parameters: {'reg_alpha': 4.133630016473078, 'reg_lambda': 2.744340628207714, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9492865286595698, 'colsample_bytree': 0.7620235898748536, 'learning_rate': 0.06195603430459397, 'n_estimators': 138}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  94%|█████████▍| 94/100 [00:43<00:02,  2.68it/s]

[I 2025-11-11 12:51:10,300] Trial 93 finished with value: -2451709886464.0 and parameters: {'reg_alpha': 2.5034414489248893, 'reg_lambda': 3.1428472755363033, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9183798257585809, 'colsample_bytree': 0.768305976760829, 'learning_rate': 0.07974109706897105, 'n_estimators': 150}. Best is trial 57 with value: -2336134791168.0.


Best trial: 57. Best value: -2.33613e+12:  95%|█████████▌| 95/100 [00:44<00:01,  2.64it/s]

[I 2025-11-11 12:51:10,690] Trial 94 finished with value: -2466183380992.0 and parameters: {'reg_alpha': 3.450052737585915, 'reg_lambda': 4.555074802864078, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9715769847955144, 'colsample_bytree': 0.7740092176106773, 'learning_rate': 0.05483508878924846, 'n_estimators': 155}. Best is trial 57 with value: -2336134791168.0.


Best trial: 95. Best value: -2.32929e+12:  96%|█████████▌| 96/100 [00:44<00:01,  2.60it/s]

[I 2025-11-11 12:51:11,089] Trial 95 finished with value: -2329292046336.0 and parameters: {'reg_alpha': 3.638081265794082, 'reg_lambda': 3.009519908270443, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9556575005381591, 'colsample_bytree': 0.7456983077358467, 'learning_rate': 0.06602073692424248, 'n_estimators': 145}. Best is trial 95 with value: -2329292046336.0.


Best trial: 95. Best value: -2.32929e+12:  97%|█████████▋| 97/100 [00:44<00:01,  2.58it/s]

[I 2025-11-11 12:51:11,485] Trial 96 finished with value: -2350842380288.0 and parameters: {'reg_alpha': 3.9469563163723453, 'reg_lambda': 3.26696775833512, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9563823093890513, 'colsample_bytree': 0.7342891917935847, 'learning_rate': 0.06582976889425238, 'n_estimators': 172}. Best is trial 95 with value: -2329292046336.0.


Best trial: 95. Best value: -2.32929e+12:  98%|█████████▊| 98/100 [00:45<00:00,  2.45it/s]

[I 2025-11-11 12:51:11,943] Trial 97 finished with value: -2372233330688.0 and parameters: {'reg_alpha': 3.6547688049931706, 'reg_lambda': 3.1895420970756097, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9601629001033741, 'colsample_bytree': 0.7337447318371207, 'learning_rate': 0.0655467355605208, 'n_estimators': 180}. Best is trial 95 with value: -2329292046336.0.


Best trial: 95. Best value: -2.32929e+12:  99%|█████████▉| 99/100 [00:45<00:00,  2.40it/s]

[I 2025-11-11 12:51:12,377] Trial 98 finished with value: -2559537053696.0 and parameters: {'reg_alpha': 3.660383763652677, 'reg_lambda': 3.009652715836197, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.9760727812715254, 'colsample_bytree': 0.7210103626333216, 'learning_rate': 0.06497604597928144, 'n_estimators': 183}. Best is trial 95 with value: -2329292046336.0.


Best trial: 99. Best value: -2.31135e+12: 100%|██████████| 100/100 [00:46<00:00,  2.16it/s]


[I 2025-11-11 12:51:12,865] Trial 99 finished with value: -2311348813824.0 and parameters: {'reg_alpha': 3.826317569224855, 'reg_lambda': 3.168769064516178, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.9535895592211322, 'colsample_bytree': 0.7176601248730557, 'learning_rate': 0.046684073520362704, 'n_estimators': 189}. Best is trial 99 with value: -2311348813824.0.

=== XGBoost Results (Optimized for Validation) ===
Best validation RMSE: $1,520,312.08
Best params:
  reg_alpha: 3.826317569224855
  reg_lambda: 3.168769064516178
  max_depth: 5
  min_child_weight: 2
  subsample: 0.9535895592211322
  colsample_bytree: 0.7176601248730557
  learning_rate: 0.046684073520362704
  n_estimators: 189

Regularization: L1=3.826, L2=3.169

=== Performance ===
Training:   RMSE $1,107,670.38, R² 0.9530
Validation: RMSE $1,520,312.08, R² 0.8992
Test:       RMSE $2,006,796.92, R² 0.8456
⚠️  Possible overfitting detected

✅ Saved two CSV files:
   1. Test only: lgb_predictions_regularized.csv (1,

In [51]:
import optuna
from sklearn.model_selection import cross_val_score
import catboost as cb
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score

def catboost_objective(trial):
    # CatBoost uses different regularization parameter names
    l2_leaf_reg = trial.suggest_float('l2_leaf_reg', 1.0, 10.0)  # L2 regularization
    
    params = {
        'depth': trial.suggest_int('depth', 4, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.03, 0.15),
        'iterations': trial.suggest_int('iterations', 100, 300),
        'l2_leaf_reg': l2_leaf_reg,  # Use CatBoost's L2 parameter name
        'border_count': trial.suggest_int('border_count', 32, 128),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_float('random_strength', 0.0, 2.0),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 20, 100),
        
        # Fixed parameters
        'objective': 'RMSE',
        'random_state': 42,
        'verbose': False,
        'thread_count': -1,
        'cat_features': ['district']  # Specify categorical features
    }
    
    try:
        model = cb.CatBoostRegressor(**params)
        
        # ✅ CHANGE 1: Train on training set only
        model.fit(X_train, y_train)
        
        # ✅ CHANGE 2: Evaluate on validation set (not training!)
        val_predictions = model.predict(X_val)
        val_mse = mean_squared_error(y_val, val_predictions)
        
        # ✅ CHANGE 3: Return negative MSE for maximization (Optuna maximizes)
        return -val_mse
        
    except Exception as e:
        print(f"Trial {trial.number} failed: {e}")
        return float('-inf')

def print_progress(study, trial):
    if trial.number % 25 == 0:
        # ✅ CHANGE 4: Show validation RMSE instead of raw score
        val_rmse = np.sqrt(-study.best_value)
        print(f"Completed {trial.number + 1} trials. Best validation RMSE: ${val_rmse:,.2f}")

print("Starting CatBoost hyperparameter optimization...")
print("🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)")  # ✅ CHANGE 5: Clear messaging
print(f"Training samples: {len(X_train):,}")

study = optuna.create_study(direction='maximize')
study.optimize(
    catboost_objective, 
    n_trials=100,
    callbacks=[print_progress],
    show_progress_bar=True
)

# ✅ CHANGE 6: Updated results display
best_val_rmse = np.sqrt(-study.best_value)
print(f"\n=== CatBoost Results (Optimized for Validation) ===")
print(f"Best validation RMSE: ${best_val_rmse:,.2f}")
print("Best params:")
for key, value in study.best_trial.params.items():
    print(f"  {key}: {value}")

# Train final model with corrected parameters
best_params = study.best_trial.params.copy()
best_params.update({
    'objective': 'RMSE',
    'random_state': 42,
    'verbose': False,
    'thread_count': -1,
    'cat_features': ['district']
})

best_catboost_model = cb.CatBoostRegressor(**best_params)

# Fit on training data only
best_catboost_model.fit(X_train, y_train)

# Get predictions for all sets
train_predictions = best_catboost_model.predict(X_train)
val_predictions = best_catboost_model.predict(X_val)
test_predictions = best_catboost_model.predict(X_test)

# Calculate metrics
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

train_r2 = r2_score(y_train, train_predictions)
val_r2 = r2_score(y_val, val_predictions)
test_r2 = r2_score(y_test, test_predictions)

print(f"\n=== Performance ===")
print(f"Training:   RMSE ${train_rmse:,.2f}, R² {train_r2:.4f}")
print(f"Validation: RMSE ${val_rmse:,.2f}, R² {val_r2:.4f}")
print(f"Test:       RMSE ${test_rmse:,.2f}, R² {test_r2:.4f}")

# Overfitting check
if train_rmse < val_rmse * 0.8:
    print(f"⚠️  Possible overfitting detected")
else:
    print(f"✅ Good generalization")


# Create results with TEST set only (true unseen data)
results_df = X_test.copy()
results_df['actual_price'] = y_test
results_df['catboost_predicted_price'] = test_predictions
results_df['prediction_error'] = results_df['actual_price'] - results_df['catboost_predicted_price']
results_df['absolute_error'] = abs(results_df['prediction_error'])

# Save TEST ONLY results
results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/catboost_predicted_price.csv', index=False)

# ✅ NEW: Create combined results with ALL data (train + val + test)
train_df = X_train.copy()
train_df['dataset'] = 'train'
train_df['actual_price'] = y_train
train_df['catboost_predicted_price'] = train_predictions
train_df['prediction_error'] = train_df['actual_price'] - train_df['catboost_predicted_price']
train_df['absolute_error'] = abs(train_df['prediction_error'])

val_df = X_val.copy()
val_df['dataset'] = 'validation'
val_df['actual_price'] = y_val
val_df['catboost_predicted_price'] = val_predictions
val_df['prediction_error'] = val_df['actual_price'] - val_df['catboost_predicted_price']
val_df['absolute_error'] = abs(val_df['prediction_error'])

test_df = X_test.copy()
test_df['dataset'] = 'test'
test_df['actual_price'] = y_test
test_df['catboost_predicted_price'] = test_predictions
test_df['prediction_error'] = test_df['actual_price'] - test_df['catboost_predicted_price']
test_df['absolute_error'] = abs(test_df['prediction_error'])

# Combine all three datasets
all_results_df = pd.concat([train_df, val_df, test_df], ignore_index=True)

# Save COMBINED results
all_results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/catboost_predictions_all_data.csv', index=False)

print(f"\n✅ Saved two CSV files:")
print(f"   1. Test only: lgb_predictions_regularized.csv ({len(results_df):,} rows)")
print(f"   2. All data: lgb_predictions_all_data.csv ({len(all_results_df):,} rows)")

# Feature importance
feature_names = [col for col in X_train.columns if col != 'district'] + ['district']
importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': best_catboost_model.feature_importances_
}).sort_values('importance', ascending=False)

importance_df['importance_pct'] = (importance_df['importance'] / importance_df['importance'].sum()) * 100

print(f"\n=== Top Features ===")
for idx, row in importance_df.head(10).iterrows():
    print(f"{row['feature']}: {row['importance_pct']:.1f}%")

importance_df.to_csv('/Users/clarencemarvin/Downloads/regularized2/catboost_feature_importance.csv', index=False)

print(f"\nFinal test RMSE: ${test_rmse:,.2f}")

[I 2025-11-11 12:51:44,727] A new study created in memory with name: no-name-9084f875-89c2-4315-bd50-d8f3a24114e7


Starting CatBoost hyperparameter optimization...
🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)
Training samples: 3,693


Best trial: 0. Best value: -2.82937e+12:   2%|▏         | 2/100 [00:01<00:53,  1.85it/s]

[I 2025-11-11 12:51:45,815] Trial 0 finished with value: -2829368688887.069 and parameters: {'l2_leaf_reg': 3.5435971033157525, 'depth': 8, 'learning_rate': 0.049105786732791666, 'iterations': 292, 'border_count': 38, 'bagging_temperature': 0.8354598338078925, 'random_strength': 1.1556698031184143, 'min_data_in_leaf': 33}. Best is trial 0 with value: -2829368688887.069.
Completed 1 trials. Best validation RMSE: $1,682,072.74
[I 2025-11-11 12:51:45,973] Trial 1 finished with value: -3059000157999.721 and parameters: {'l2_leaf_reg': 5.627436551428138, 'depth': 5, 'learning_rate': 0.12586608602349572, 'iterations': 135, 'border_count': 68, 'bagging_temperature': 0.7135468402451469, 'random_strength': 0.9912498582615117, 'min_data_in_leaf': 26}. Best is trial 0 with value: -2829368688887.069.


Best trial: 0. Best value: -2.82937e+12:   3%|▎         | 3/100 [00:01<00:37,  2.60it/s]

[I 2025-11-11 12:51:46,172] Trial 2 finished with value: -3136287734431.1743 and parameters: {'l2_leaf_reg': 6.877762719015905, 'depth': 5, 'learning_rate': 0.13938205324355932, 'iterations': 137, 'border_count': 96, 'bagging_temperature': 0.4942305500507117, 'random_strength': 1.5737727679954776, 'min_data_in_leaf': 47}. Best is trial 0 with value: -2829368688887.069.


Best trial: 0. Best value: -2.82937e+12:   4%|▍         | 4/100 [00:01<00:34,  2.76it/s]

[I 2025-11-11 12:51:46,496] Trial 3 finished with value: -3335540198022.7964 and parameters: {'l2_leaf_reg': 3.4194396920682566, 'depth': 4, 'learning_rate': 0.06796956706326628, 'iterations': 219, 'border_count': 61, 'bagging_temperature': 0.6803117186432804, 'random_strength': 1.9036565881865957, 'min_data_in_leaf': 38}. Best is trial 0 with value: -2829368688887.069.


Best trial: 4. Best value: -2.67329e+12:   5%|▌         | 5/100 [00:02<00:53,  1.76it/s]

[I 2025-11-11 12:51:47,430] Trial 4 finished with value: -2673292275190.71 and parameters: {'l2_leaf_reg': 4.439432419178928, 'depth': 8, 'learning_rate': 0.10217429769964582, 'iterations': 294, 'border_count': 121, 'bagging_temperature': 0.7288723420672159, 'random_strength': 0.5295351708090796, 'min_data_in_leaf': 25}. Best is trial 4 with value: -2673292275190.71.


Best trial: 4. Best value: -2.67329e+12:   7%|▋         | 7/100 [00:03<00:38,  2.41it/s]

[I 2025-11-11 12:51:47,872] Trial 5 finished with value: -3083087492893.267 and parameters: {'l2_leaf_reg': 4.548003549753755, 'depth': 8, 'learning_rate': 0.07678518038640791, 'iterations': 178, 'border_count': 32, 'bagging_temperature': 0.4973071880248847, 'random_strength': 0.6705692009513311, 'min_data_in_leaf': 40}. Best is trial 4 with value: -2673292275190.71.
[I 2025-11-11 12:51:48,063] Trial 6 finished with value: -3435260523632.9023 and parameters: {'l2_leaf_reg': 9.548269150851652, 'depth': 5, 'learning_rate': 0.07536183732163845, 'iterations': 151, 'border_count': 125, 'bagging_temperature': 0.23015964830705782, 'random_strength': 1.4071678045821334, 'min_data_in_leaf': 78}. Best is trial 4 with value: -2673292275190.71.


Best trial: 4. Best value: -2.67329e+12:   8%|▊         | 8/100 [00:03<00:34,  2.64it/s]

[I 2025-11-11 12:51:48,362] Trial 7 finished with value: -3291215309045.2656 and parameters: {'l2_leaf_reg': 2.855637708618374, 'depth': 4, 'learning_rate': 0.0639236945122366, 'iterations': 234, 'border_count': 53, 'bagging_temperature': 0.9199035875200375, 'random_strength': 0.9469281241197365, 'min_data_in_leaf': 89}. Best is trial 4 with value: -2673292275190.71.


Best trial: 4. Best value: -2.67329e+12:   9%|▉         | 9/100 [00:03<00:31,  2.92it/s]

[I 2025-11-11 12:51:48,627] Trial 8 finished with value: -2855681211855.388 and parameters: {'l2_leaf_reg': 5.712185727484651, 'depth': 8, 'learning_rate': 0.12302441434434895, 'iterations': 129, 'border_count': 46, 'bagging_temperature': 0.44590952310910204, 'random_strength': 1.6297140795669518, 'min_data_in_leaf': 44}. Best is trial 4 with value: -2673292275190.71.


Best trial: 4. Best value: -2.67329e+12:  10%|█         | 10/100 [00:04<00:38,  2.33it/s]

[I 2025-11-11 12:51:49,251] Trial 9 finished with value: -2802418706177.316 and parameters: {'l2_leaf_reg': 9.15680763333491, 'depth': 8, 'learning_rate': 0.08546940806773261, 'iterations': 215, 'border_count': 68, 'bagging_temperature': 0.520421864962188, 'random_strength': 1.262579281034402, 'min_data_in_leaf': 32}. Best is trial 4 with value: -2673292275190.71.


Best trial: 10. Best value: -2.6035e+12:  11%|█         | 11/100 [00:05<00:46,  1.90it/s]

[I 2025-11-11 12:51:49,992] Trial 10 finished with value: -2603502602253.3247 and parameters: {'l2_leaf_reg': 1.8590449674711662, 'depth': 7, 'learning_rate': 0.11175070368113157, 'iterations': 292, 'border_count': 127, 'bagging_temperature': 0.12940069907187435, 'random_strength': 0.09522542143033536, 'min_data_in_leaf': 64}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  12%|█▏        | 12/100 [00:05<00:51,  1.70it/s]

[I 2025-11-11 12:51:50,720] Trial 11 finished with value: -2636090286582.4727 and parameters: {'l2_leaf_reg': 1.522587667943605, 'depth': 7, 'learning_rate': 0.11312476627661336, 'iterations': 295, 'border_count': 125, 'bagging_temperature': 0.11892041459894948, 'random_strength': 0.08224712843838655, 'min_data_in_leaf': 65}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  13%|█▎        | 13/100 [00:06<00:53,  1.63it/s]

[I 2025-11-11 12:51:51,389] Trial 12 finished with value: -2730128156224.5054 and parameters: {'l2_leaf_reg': 1.052199907673081, 'depth': 7, 'learning_rate': 0.10621716995670025, 'iterations': 268, 'border_count': 102, 'bagging_temperature': 0.09078278675524296, 'random_strength': 0.02826080422399553, 'min_data_in_leaf': 64}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  14%|█▍        | 14/100 [00:07<00:55,  1.54it/s]

[I 2025-11-11 12:51:52,121] Trial 13 finished with value: -2694679195661.26 and parameters: {'l2_leaf_reg': 1.1358994809149356, 'depth': 7, 'learning_rate': 0.14874855997013914, 'iterations': 252, 'border_count': 109, 'bagging_temperature': 0.0026658242745693783, 'random_strength': 0.007550668927262444, 'min_data_in_leaf': 62}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  15%|█▌        | 15/100 [00:07<00:45,  1.85it/s]

[I 2025-11-11 12:51:52,398] Trial 14 finished with value: -2970071128061.2607 and parameters: {'l2_leaf_reg': 2.115198878349474, 'depth': 7, 'learning_rate': 0.10570485177911454, 'iterations': 101, 'border_count': 88, 'bagging_temperature': 0.25661384075021343, 'random_strength': 0.28886680303668033, 'min_data_in_leaf': 74}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  16%|█▌        | 16/100 [00:08<00:49,  1.71it/s]

[I 2025-11-11 12:51:53,101] Trial 15 finished with value: -2710809504371.147 and parameters: {'l2_leaf_reg': 2.1026381035891326, 'depth': 6, 'learning_rate': 0.1224873617278713, 'iterations': 267, 'border_count': 112, 'bagging_temperature': 0.23732887430199334, 'random_strength': 0.348681367080235, 'min_data_in_leaf': 100}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  17%|█▋        | 17/100 [00:09<00:53,  1.55it/s]

[I 2025-11-11 12:51:53,883] Trial 16 finished with value: -2714866389580.0244 and parameters: {'l2_leaf_reg': 2.091856035680589, 'depth': 6, 'learning_rate': 0.09492301694758937, 'iterations': 299, 'border_count': 127, 'bagging_temperature': 0.11423787203661966, 'random_strength': 0.70227384249071, 'min_data_in_leaf': 52}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  18%|█▊        | 18/100 [00:09<00:46,  1.75it/s]

[I 2025-11-11 12:51:54,290] Trial 17 finished with value: -2809338544475.2114 and parameters: {'l2_leaf_reg': 7.3695011109293045, 'depth': 7, 'learning_rate': 0.11655951242407309, 'iterations': 186, 'border_count': 82, 'bagging_temperature': 0.3876356824128155, 'random_strength': 0.29607991361362107, 'min_data_in_leaf': 73}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  19%|█▉        | 19/100 [00:10<00:50,  1.62it/s]

[I 2025-11-11 12:51:55,015] Trial 18 finished with value: -2747277860543.3022 and parameters: {'l2_leaf_reg': 4.491895885849361, 'depth': 6, 'learning_rate': 0.14132208156538845, 'iterations': 268, 'border_count': 115, 'bagging_temperature': 0.34405215809771417, 'random_strength': 0.18494147349029555, 'min_data_in_leaf': 55}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  20%|██        | 20/100 [00:10<00:50,  1.57it/s]

[I 2025-11-11 12:51:55,693] Trial 19 finished with value: -2982933582261.11 and parameters: {'l2_leaf_reg': 3.0590388408815117, 'depth': 7, 'learning_rate': 0.03958319249819562, 'iterations': 234, 'border_count': 98, 'bagging_temperature': 0.13476120523133212, 'random_strength': 0.4671298808399609, 'min_data_in_leaf': 84}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 10. Best value: -2.6035e+12:  21%|██        | 21/100 [00:11<00:52,  1.52it/s]

[I 2025-11-11 12:51:56,406] Trial 20 finished with value: -2737909813441.1006 and parameters: {'l2_leaf_reg': 1.623817753568922, 'depth': 6, 'learning_rate': 0.0900254997383291, 'iterations': 275, 'border_count': 106, 'bagging_temperature': 0.0032174840888372636, 'random_strength': 0.8330246822629082, 'min_data_in_leaf': 67}. Best is trial 10 with value: -2603502602253.3247.


Best trial: 21. Best value: -2.5716e+12:  22%|██▏       | 22/100 [00:12<01:05,  1.20it/s]

[I 2025-11-11 12:51:57,649] Trial 21 finished with value: -2571603510827.7983 and parameters: {'l2_leaf_reg': 4.370229875457133, 'depth': 8, 'learning_rate': 0.10227488811408995, 'iterations': 300, 'border_count': 120, 'bagging_temperature': 0.6323876413279278, 'random_strength': 0.5672179861511917, 'min_data_in_leaf': 57}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  23%|██▎       | 23/100 [00:13<01:08,  1.12it/s]

[I 2025-11-11 12:51:58,675] Trial 22 finished with value: -2674501657053.483 and parameters: {'l2_leaf_reg': 2.7183178828238646, 'depth': 7, 'learning_rate': 0.11170994901237699, 'iterations': 284, 'border_count': 117, 'bagging_temperature': 0.5845034320436042, 'random_strength': 0.1535656687495991, 'min_data_in_leaf': 55}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  24%|██▍       | 24/100 [00:14<01:04,  1.18it/s]

[I 2025-11-11 12:51:59,409] Trial 23 finished with value: -2786158075925.263 and parameters: {'l2_leaf_reg': 3.829948071078113, 'depth': 7, 'learning_rate': 0.1346293161469216, 'iterations': 248, 'border_count': 128, 'bagging_temperature': 0.623175833031946, 'random_strength': 0.5275001256136138, 'min_data_in_leaf': 68}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  25%|██▌       | 25/100 [00:15<01:05,  1.15it/s]

[I 2025-11-11 12:52:00,335] Trial 24 finished with value: -2684167318282.9814 and parameters: {'l2_leaf_reg': 6.7285638599818185, 'depth': 8, 'learning_rate': 0.10163482843939073, 'iterations': 256, 'border_count': 120, 'bagging_temperature': 0.3177913863001612, 'random_strength': 0.12938216303294647, 'min_data_in_leaf': 58}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  26%|██▌       | 26/100 [00:16<01:03,  1.16it/s]

[I 2025-11-11 12:52:01,169] Trial 25 finished with value: -2665462470360.311 and parameters: {'l2_leaf_reg': 2.376584895697371, 'depth': 7, 'learning_rate': 0.11616683443449356, 'iterations': 300, 'border_count': 89, 'bagging_temperature': 0.1788225448166392, 'random_strength': 0.4132296750053204, 'min_data_in_leaf': 50}. Best is trial 21 with value: -2571603510827.7983.
Completed 26 trials. Best validation RMSE: $1,603,622.00


Best trial: 21. Best value: -2.5716e+12:  27%|██▋       | 27/100 [00:17<01:05,  1.12it/s]

[I 2025-11-11 12:52:02,153] Trial 26 finished with value: -2663054293326.8916 and parameters: {'l2_leaf_reg': 8.331491851712943, 'depth': 8, 'learning_rate': 0.09554462188411826, 'iterations': 281, 'border_count': 112, 'bagging_temperature': 0.7777478467594064, 'random_strength': 0.6787968033659879, 'min_data_in_leaf': 79}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  28%|██▊       | 28/100 [00:18<01:01,  1.17it/s]

[I 2025-11-11 12:52:02,909] Trial 27 finished with value: -2652273019101.1006 and parameters: {'l2_leaf_reg': 1.5572176976067968, 'depth': 7, 'learning_rate': 0.08317989126993611, 'iterations': 240, 'border_count': 120, 'bagging_temperature': 0.4058366030003607, 'random_strength': 0.23505649229150877, 'min_data_in_leaf': 68}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  29%|██▉       | 29/100 [00:19<01:03,  1.12it/s]

[I 2025-11-11 12:52:03,904] Trial 28 finished with value: -2776838080970.2524 and parameters: {'l2_leaf_reg': 5.179362784779841, 'depth': 8, 'learning_rate': 0.129931728255975, 'iterations': 279, 'border_count': 104, 'bagging_temperature': 0.06439819963139098, 'random_strength': 0.00483081930815521, 'min_data_in_leaf': 90}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  30%|███       | 30/100 [00:20<01:05,  1.07it/s]

[I 2025-11-11 12:52:04,927] Trial 29 finished with value: -2642403733376.826 and parameters: {'l2_leaf_reg': 3.535234107715693, 'depth': 8, 'learning_rate': 0.1118427032939342, 'iterations': 287, 'border_count': 92, 'bagging_temperature': 0.9757582661411762, 'random_strength': 0.8211233227684792, 'min_data_in_leaf': 61}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  31%|███       | 31/100 [00:20<00:58,  1.17it/s]

[I 2025-11-11 12:52:05,595] Trial 30 finished with value: -2894852583133.314 and parameters: {'l2_leaf_reg': 3.971168443906693, 'depth': 6, 'learning_rate': 0.06011304436904272, 'iterations': 261, 'border_count': 128, 'bagging_temperature': 0.2973538174027396, 'random_strength': 1.1676245761961566, 'min_data_in_leaf': 58}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  32%|███▏      | 32/100 [00:21<01:00,  1.12it/s]

[I 2025-11-11 12:52:06,572] Trial 31 finished with value: -2725197814244.257 and parameters: {'l2_leaf_reg': 3.3525097118555562, 'depth': 8, 'learning_rate': 0.11272453625832723, 'iterations': 293, 'border_count': 74, 'bagging_temperature': 0.9398848306032254, 'random_strength': 0.7990798092441762, 'min_data_in_leaf': 62}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  33%|███▎      | 33/100 [00:22<01:02,  1.08it/s]

[I 2025-11-11 12:52:07,583] Trial 32 finished with value: -2696039999599.0933 and parameters: {'l2_leaf_reg': 5.166653647974669, 'depth': 8, 'learning_rate': 0.09680035987500492, 'iterations': 290, 'border_count': 94, 'bagging_temperature': 0.8151846548754433, 'random_strength': 1.0471576678468892, 'min_data_in_leaf': 74}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  34%|███▍      | 34/100 [00:23<01:03,  1.04it/s]

[I 2025-11-11 12:52:08,620] Trial 33 finished with value: -2589122441010.7246 and parameters: {'l2_leaf_reg': 2.627177364796559, 'depth': 8, 'learning_rate': 0.1219789553501405, 'iterations': 283, 'border_count': 114, 'bagging_temperature': 0.8793464579997108, 'random_strength': 0.4926891954883257, 'min_data_in_leaf': 45}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  35%|███▌      | 35/100 [00:24<01:02,  1.04it/s]

[I 2025-11-11 12:52:09,588] Trial 34 finished with value: -2616720279585.1675 and parameters: {'l2_leaf_reg': 1.6060007464546788, 'depth': 7, 'learning_rate': 0.1298828643006354, 'iterations': 277, 'border_count': 122, 'bagging_temperature': 0.8797072938046591, 'random_strength': 0.5803095148532684, 'min_data_in_leaf': 47}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  36%|███▌      | 36/100 [00:25<00:59,  1.08it/s]

[I 2025-11-11 12:52:10,436] Trial 35 finished with value: -2684609501620.564 and parameters: {'l2_leaf_reg': 2.669590669881149, 'depth': 8, 'learning_rate': 0.12958414477213115, 'iterations': 214, 'border_count': 115, 'bagging_temperature': 0.8861917447838997, 'random_strength': 0.5824328879256936, 'min_data_in_leaf': 32}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  37%|███▋      | 37/100 [00:26<00:51,  1.22it/s]

[I 2025-11-11 12:52:11,006] Trial 36 finished with value: -2738071483834.716 and parameters: {'l2_leaf_reg': 4.010981895391549, 'depth': 5, 'learning_rate': 0.14224289140486324, 'iterations': 275, 'border_count': 120, 'bagging_temperature': 0.8465677433493093, 'random_strength': 0.39820691821624726, 'min_data_in_leaf': 41}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  38%|███▊      | 38/100 [00:27<00:51,  1.21it/s]

[I 2025-11-11 12:52:11,851] Trial 37 finished with value: -2620297771198.683 and parameters: {'l2_leaf_reg': 5.967186548436638, 'depth': 8, 'learning_rate': 0.12552914213905503, 'iterations': 247, 'border_count': 110, 'bagging_temperature': 0.6920624794974783, 'random_strength': 0.5576462591018995, 'min_data_in_leaf': 45}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  39%|███▉      | 39/100 [00:27<00:47,  1.28it/s]

[I 2025-11-11 12:52:12,517] Trial 38 finished with value: -2768245634971.532 and parameters: {'l2_leaf_reg': 1.7739278263841236, 'depth': 7, 'learning_rate': 0.14928529839875843, 'iterations': 228, 'border_count': 122, 'bagging_temperature': 0.7528255523774701, 'random_strength': 1.0185427930662274, 'min_data_in_leaf': 35}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  40%|████      | 40/100 [00:28<00:49,  1.22it/s]

[I 2025-11-11 12:52:13,422] Trial 39 finished with value: -2614573528082.5674 and parameters: {'l2_leaf_reg': 3.0790419231112325, 'depth': 8, 'learning_rate': 0.11956557586366481, 'iterations': 262, 'border_count': 102, 'bagging_temperature': 0.9978437862332254, 'random_strength': 0.6439429294116189, 'min_data_in_leaf': 48}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  41%|████      | 41/100 [00:29<00:47,  1.24it/s]

[I 2025-11-11 12:52:14,199] Trial 40 finished with value: -2631575385882.9746 and parameters: {'l2_leaf_reg': 2.9551461189164803, 'depth': 8, 'learning_rate': 0.1187038249321553, 'iterations': 205, 'border_count': 106, 'bagging_temperature': 0.5943322694468639, 'random_strength': 0.8893972575721676, 'min_data_in_leaf': 28}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  42%|████▏     | 42/100 [00:30<00:48,  1.19it/s]

[I 2025-11-11 12:52:15,121] Trial 41 finished with value: -2656414978204.135 and parameters: {'l2_leaf_reg': 2.4931902920186038, 'depth': 8, 'learning_rate': 0.1377771313682568, 'iterations': 261, 'border_count': 117, 'bagging_temperature': 0.9750910520313346, 'random_strength': 0.6364210714914955, 'min_data_in_leaf': 20}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  43%|████▎     | 43/100 [00:31<00:50,  1.12it/s]

[I 2025-11-11 12:52:16,138] Trial 42 finished with value: -2734634254653.2104 and parameters: {'l2_leaf_reg': 3.0941246637021615, 'depth': 8, 'learning_rate': 0.13165954831612123, 'iterations': 283, 'border_count': 101, 'bagging_temperature': 0.8781939427877435, 'random_strength': 0.7062149496605705, 'min_data_in_leaf': 50}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  44%|████▍     | 44/100 [00:32<00:51,  1.08it/s]

[I 2025-11-11 12:52:17,142] Trial 43 finished with value: -2708124485274.4624 and parameters: {'l2_leaf_reg': 4.211821340002982, 'depth': 8, 'learning_rate': 0.10577584778195061, 'iterations': 272, 'border_count': 123, 'bagging_temperature': 0.8060675098695601, 'random_strength': 0.41452812192134614, 'min_data_in_leaf': 42}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  45%|████▌     | 45/100 [00:33<00:49,  1.10it/s]

[I 2025-11-11 12:52:18,010] Trial 44 finished with value: -2755836112902.2793 and parameters: {'l2_leaf_reg': 1.0392665935055598, 'depth': 7, 'learning_rate': 0.1217166901536958, 'iterations': 290, 'border_count': 109, 'bagging_temperature': 0.9152814538636976, 'random_strength': 0.5101560106890338, 'min_data_in_leaf': 47}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  46%|████▌     | 46/100 [00:34<00:48,  1.12it/s]

[I 2025-11-11 12:52:18,876] Trial 45 finished with value: -2743068756778.9326 and parameters: {'l2_leaf_reg': 4.7996550787242205, 'depth': 8, 'learning_rate': 0.12853349815092782, 'iterations': 178, 'border_count': 115, 'bagging_temperature': 0.9952550377315155, 'random_strength': 0.7412336156638015, 'min_data_in_leaf': 36}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  47%|████▋     | 47/100 [00:35<00:49,  1.07it/s]

[I 2025-11-11 12:52:19,893] Trial 46 finished with value: -2860355921664.4795 and parameters: {'l2_leaf_reg': 3.6017325598175667, 'depth': 5, 'learning_rate': 0.10127172475627597, 'iterations': 258, 'border_count': 124, 'bagging_temperature': 0.7208119057698649, 'random_strength': 1.700451810956371, 'min_data_in_leaf': 54}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  48%|████▊     | 48/100 [00:36<00:54,  1.04s/it]

[I 2025-11-11 12:52:21,184] Trial 47 finished with value: -2620210638672.3364 and parameters: {'l2_leaf_reg': 1.9614665188265688, 'depth': 7, 'learning_rate': 0.10767908392470506, 'iterations': 299, 'border_count': 44, 'bagging_temperature': 0.6519590730125464, 'random_strength': 0.9252591725652465, 'min_data_in_leaf': 47}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 21. Best value: -2.5716e+12:  49%|████▉     | 49/100 [00:37<00:45,  1.12it/s]

[I 2025-11-11 12:52:21,741] Trial 48 finished with value: -2834700850866.605 and parameters: {'l2_leaf_reg': 1.3836148802285941, 'depth': 4, 'learning_rate': 0.1204250979318046, 'iterations': 269, 'border_count': 57, 'bagging_temperature': 0.8588947106801746, 'random_strength': 0.6100565728538192, 'min_data_in_leaf': 38}. Best is trial 21 with value: -2571603510827.7983.


Best trial: 49. Best value: -2.56772e+12:  50%|█████     | 50/100 [00:38<00:52,  1.05s/it]

[I 2025-11-11 12:52:23,136] Trial 49 finished with value: -2567719497450.7456 and parameters: {'l2_leaf_reg': 2.313682109842207, 'depth': 8, 'learning_rate': 0.08774333972589103, 'iterations': 287, 'border_count': 82, 'bagging_temperature': 0.46838196183865244, 'random_strength': 0.3161124635244368, 'min_data_in_leaf': 50}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  51%|█████     | 51/100 [00:39<00:47,  1.03it/s]

[I 2025-11-11 12:52:23,949] Trial 50 finished with value: -2780741593478.239 and parameters: {'l2_leaf_reg': 2.311076599788524, 'depth': 8, 'learning_rate': 0.07544164447205232, 'iterations': 161, 'border_count': 80, 'bagging_temperature': 0.5584644251369837, 'random_strength': 0.27116974560622514, 'min_data_in_leaf': 51}. Best is trial 49 with value: -2567719497450.7456.
Completed 51 trials. Best validation RMSE: $1,602,410.53


Best trial: 49. Best value: -2.56772e+12:  52%|█████▏    | 52/100 [00:40<00:51,  1.08s/it]

[I 2025-11-11 12:52:25,267] Trial 51 finished with value: -2632132066011.956 and parameters: {'l2_leaf_reg': 3.3541735860483226, 'depth': 8, 'learning_rate': 0.08865961304041645, 'iterations': 285, 'border_count': 67, 'bagging_temperature': 0.46049048990818403, 'random_strength': 0.3299842991278312, 'min_data_in_leaf': 45}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  53%|█████▎    | 53/100 [00:42<00:59,  1.27s/it]

[I 2025-11-11 12:52:26,976] Trial 52 finished with value: -2584382610065.887 and parameters: {'l2_leaf_reg': 1.841112112705852, 'depth': 8, 'learning_rate': 0.07994241457341207, 'iterations': 277, 'border_count': 98, 'bagging_temperature': 0.9279889099891465, 'random_strength': 0.4614283272787586, 'min_data_in_leaf': 56}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  54%|█████▍    | 54/100 [00:43<01:01,  1.34s/it]

[I 2025-11-11 12:52:28,488] Trial 53 finished with value: -2679958086389.988 and parameters: {'l2_leaf_reg': 2.524731974026783, 'depth': 8, 'learning_rate': 0.07828873380332682, 'iterations': 294, 'border_count': 100, 'bagging_temperature': 0.9544918363556815, 'random_strength': 0.2032135860730987, 'min_data_in_leaf': 57}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  55%|█████▌    | 55/100 [00:44<00:54,  1.21s/it]

[I 2025-11-11 12:52:29,393] Trial 54 finished with value: -2669761244934.244 and parameters: {'l2_leaf_reg': 2.0317774968157636, 'depth': 8, 'learning_rate': 0.06962967273160596, 'iterations': 262, 'border_count': 83, 'bagging_temperature': 0.9282937847030991, 'random_strength': 0.09427000667424391, 'min_data_in_leaf': 65}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  56%|█████▌    | 56/100 [00:45<00:51,  1.16s/it]

[I 2025-11-11 12:52:30,451] Trial 55 finished with value: -2677796207104.021 and parameters: {'l2_leaf_reg': 2.9520895032077354, 'depth': 8, 'learning_rate': 0.0820720894603704, 'iterations': 288, 'border_count': 86, 'bagging_temperature': 0.7928622106514067, 'random_strength': 0.4582319197490331, 'min_data_in_leaf': 52}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  57%|█████▋    | 57/100 [00:46<00:47,  1.10s/it]

[I 2025-11-11 12:52:31,388] Trial 56 finished with value: -2704971893030.13 and parameters: {'l2_leaf_reg': 2.329629668418102, 'depth': 8, 'learning_rate': 0.05872828247490407, 'iterations': 269, 'border_count': 76, 'bagging_temperature': 0.529245320868627, 'random_strength': 0.33562573578520505, 'min_data_in_leaf': 71}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  58%|█████▊    | 58/100 [00:47<00:45,  1.07s/it]

[I 2025-11-11 12:52:32,409] Trial 57 finished with value: -2661705318066.0093 and parameters: {'l2_leaf_reg': 3.206104977249268, 'depth': 8, 'learning_rate': 0.09071983876561883, 'iterations': 300, 'border_count': 97, 'bagging_temperature': 0.6625109737121869, 'random_strength': 0.47431487744068784, 'min_data_in_leaf': 59}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  59%|█████▉    | 59/100 [00:48<00:43,  1.06s/it]

[I 2025-11-11 12:52:33,447] Trial 58 finished with value: -2783580301154.1357 and parameters: {'l2_leaf_reg': 1.8853111538364145, 'depth': 7, 'learning_rate': 0.0694284829430939, 'iterations': 245, 'border_count': 106, 'bagging_temperature': 0.1904979186236439, 'random_strength': 0.37482392107049006, 'min_data_in_leaf': 54}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  60%|██████    | 60/100 [00:49<00:40,  1.01s/it]

[I 2025-11-11 12:52:34,345] Trial 59 finished with value: -2724602898933.323 and parameters: {'l2_leaf_reg': 1.3205709959239134, 'depth': 8, 'learning_rate': 0.09812875284627079, 'iterations': 253, 'border_count': 93, 'bagging_temperature': 0.45689268436800645, 'random_strength': 0.22905076000553687, 'min_data_in_leaf': 49}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  61%|██████    | 61/100 [00:49<00:31,  1.22it/s]

[I 2025-11-11 12:52:34,711] Trial 60 finished with value: -2771904269457.8335 and parameters: {'l2_leaf_reg': 2.7473522602202287, 'depth': 8, 'learning_rate': 0.10940189061335347, 'iterations': 112, 'border_count': 110, 'bagging_temperature': 0.741455614519228, 'random_strength': 0.08904189888612651, 'min_data_in_leaf': 56}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 49. Best value: -2.56772e+12:  62%|██████▏   | 62/100 [00:50<00:29,  1.29it/s]

[I 2025-11-11 12:52:35,374] Trial 61 finished with value: -2758267542436.137 and parameters: {'l2_leaf_reg': 1.6567342360412205, 'depth': 6, 'learning_rate': 0.11569858953397917, 'iterations': 279, 'border_count': 117, 'bagging_temperature': 0.9003159048367625, 'random_strength': 0.5565919698657671, 'min_data_in_leaf': 43}. Best is trial 49 with value: -2567719497450.7456.


Best trial: 62. Best value: -2.53658e+12:  63%|██████▎   | 63/100 [00:51<00:29,  1.25it/s]

[I 2025-11-11 12:52:36,239] Trial 62 finished with value: -2536578785454.5786 and parameters: {'l2_leaf_reg': 1.3959841858247966, 'depth': 7, 'learning_rate': 0.12582254186448188, 'iterations': 274, 'border_count': 125, 'bagging_temperature': 0.8410542555619, 'random_strength': 0.6480231368678006, 'min_data_in_leaf': 48}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  64%|██████▍   | 64/100 [00:52<00:27,  1.31it/s]

[I 2025-11-11 12:52:36,912] Trial 63 finished with value: -2661894622423.606 and parameters: {'l2_leaf_reg': 1.256228671765827, 'depth': 6, 'learning_rate': 0.10276256302455716, 'iterations': 282, 'border_count': 126, 'bagging_temperature': 0.9929019180367478, 'random_strength': 0.7442369534458079, 'min_data_in_leaf': 40}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  65%|██████▌   | 65/100 [00:53<00:27,  1.25it/s]

[I 2025-11-11 12:52:37,791] Trial 64 finished with value: -2544561198452.4824 and parameters: {'l2_leaf_reg': 2.1830040326116555, 'depth': 7, 'learning_rate': 0.12452610968408198, 'iterations': 293, 'border_count': 114, 'bagging_temperature': 0.8449823049663187, 'random_strength': 0.4694156277714869, 'min_data_in_leaf': 61}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  66%|██████▌   | 66/100 [00:54<00:28,  1.19it/s]

[I 2025-11-11 12:52:38,733] Trial 65 finished with value: -2760030500358.9116 and parameters: {'l2_leaf_reg': 2.203695685872473, 'depth': 7, 'learning_rate': 0.0923881930298446, 'iterations': 294, 'border_count': 112, 'bagging_temperature': 0.8325871883324886, 'random_strength': 0.286824366022378, 'min_data_in_leaf': 63}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  67%|██████▋   | 67/100 [00:54<00:27,  1.21it/s]

[I 2025-11-11 12:52:39,533] Trial 66 finished with value: -2737758539676.809 and parameters: {'l2_leaf_reg': 6.0359178253009995, 'depth': 7, 'learning_rate': 0.08527395352118884, 'iterations': 275, 'border_count': 125, 'bagging_temperature': 0.761472744858604, 'random_strength': 0.484438966186639, 'min_data_in_leaf': 53}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  68%|██████▊   | 68/100 [00:55<00:26,  1.19it/s]

[I 2025-11-11 12:52:40,399] Trial 67 finished with value: -2600397074728.462 and parameters: {'l2_leaf_reg': 1.035004009114683, 'depth': 7, 'learning_rate': 0.13635905439875812, 'iterations': 290, 'border_count': 119, 'bagging_temperature': 0.37839675083931823, 'random_strength': 0.19064965734607975, 'min_data_in_leaf': 61}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  69%|██████▉   | 69/100 [00:56<00:25,  1.20it/s]

[I 2025-11-11 12:52:41,226] Trial 68 finished with value: -2581483739318.4736 and parameters: {'l2_leaf_reg': 1.0124976976284528, 'depth': 7, 'learning_rate': 0.13741823006509454, 'iterations': 287, 'border_count': 119, 'bagging_temperature': 0.3990659849462369, 'random_strength': 1.9975728643581947, 'min_data_in_leaf': 59}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  70%|███████   | 70/100 [00:57<00:23,  1.29it/s]

[I 2025-11-11 12:52:41,866] Trial 69 finished with value: -2754103013450.499 and parameters: {'l2_leaf_reg': 1.4025653264992461, 'depth': 6, 'learning_rate': 0.14431718713828962, 'iterations': 266, 'border_count': 114, 'bagging_temperature': 0.48432203330389056, 'random_strength': 1.5455286987059171, 'min_data_in_leaf': 58}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  71%|███████   | 71/100 [00:58<00:23,  1.24it/s]

[I 2025-11-11 12:52:42,744] Trial 70 finished with value: -2608031183817.1123 and parameters: {'l2_leaf_reg': 1.8284738897415598, 'depth': 7, 'learning_rate': 0.12583579306005152, 'iterations': 285, 'border_count': 118, 'bagging_temperature': 0.3460010299318274, 'random_strength': 1.7932197884885643, 'min_data_in_leaf': 70}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  72%|███████▏  | 72/100 [00:58<00:23,  1.20it/s]

[I 2025-11-11 12:52:43,640] Trial 71 finished with value: -2656315692235.2905 and parameters: {'l2_leaf_reg': 1.2386092423818154, 'depth': 7, 'learning_rate': 0.13770829916227698, 'iterations': 293, 'border_count': 119, 'bagging_temperature': 0.406697042919684, 'random_strength': 0.4347127214240389, 'min_data_in_leaf': 62}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  73%|███████▎  | 73/100 [00:59<00:23,  1.17it/s]

[I 2025-11-11 12:52:44,552] Trial 72 finished with value: -2672288180161.116 and parameters: {'l2_leaf_reg': 1.0523342490026326, 'depth': 7, 'learning_rate': 0.1355908748382266, 'iterations': 287, 'border_count': 107, 'bagging_temperature': 0.3819200480723492, 'random_strength': 0.1634802339922211, 'min_data_in_leaf': 60}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  74%|███████▍  | 74/100 [01:00<00:23,  1.10it/s]

[I 2025-11-11 12:52:45,586] Trial 73 finished with value: -2576803508965.523 and parameters: {'l2_leaf_reg': 1.5320739704361124, 'depth': 7, 'learning_rate': 0.13283853125942882, 'iterations': 298, 'border_count': 114, 'bagging_temperature': 0.49505839370572263, 'random_strength': 1.3091704158562782, 'min_data_in_leaf': 66}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  75%|███████▌  | 75/100 [01:01<00:21,  1.14it/s]

[I 2025-11-11 12:52:46,377] Trial 74 finished with value: -2691905009994.9624 and parameters: {'l2_leaf_reg': 1.6104997859056305, 'depth': 6, 'learning_rate': 0.12411481857436903, 'iterations': 300, 'border_count': 115, 'bagging_temperature': 0.5189835808498648, 'random_strength': 1.3042244654283452, 'min_data_in_leaf': 67}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  76%|███████▌  | 76/100 [01:02<00:21,  1.14it/s]

[I 2025-11-11 12:52:47,267] Trial 75 finished with value: -2578644475364.666 and parameters: {'l2_leaf_reg': 2.136051298722331, 'depth': 7, 'learning_rate': 0.13365492022644582, 'iterations': 279, 'border_count': 122, 'bagging_temperature': 0.4330907210627315, 'random_strength': 1.4126263665213123, 'min_data_in_leaf': 77}. Best is trial 62 with value: -2536578785454.5786.
Completed 76 trials. Best validation RMSE: $1,592,664.05


Best trial: 62. Best value: -2.53658e+12:  77%|███████▋  | 77/100 [01:03<00:19,  1.16it/s]

[I 2025-11-11 12:52:48,083] Trial 76 finished with value: -2673778955490.0664 and parameters: {'l2_leaf_reg': 2.11403328866738, 'depth': 7, 'learning_rate': 0.1444799186809724, 'iterations': 275, 'border_count': 128, 'bagging_temperature': 0.421995892993706, 'random_strength': 1.8884115982730385, 'min_data_in_leaf': 80}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  78%|███████▊  | 78/100 [01:04<00:18,  1.16it/s]

[I 2025-11-11 12:52:48,953] Trial 77 finished with value: -2589425334741.237 and parameters: {'l2_leaf_reg': 8.288502767571359, 'depth': 7, 'learning_rate': 0.14091561408242773, 'iterations': 296, 'border_count': 122, 'bagging_temperature': 0.5498479816373822, 'random_strength': 1.3864725848821866, 'min_data_in_leaf': 89}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  79%|███████▉  | 79/100 [01:05<00:17,  1.17it/s]

[I 2025-11-11 12:52:49,778] Trial 78 finished with value: -2561637015527.023 and parameters: {'l2_leaf_reg': 1.4701653431472976, 'depth': 7, 'learning_rate': 0.1326528179774208, 'iterations': 279, 'border_count': 112, 'bagging_temperature': 0.4713114817314255, 'random_strength': 1.0777998732246648, 'min_data_in_leaf': 76}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  80%|████████  | 80/100 [01:05<00:16,  1.22it/s]

[I 2025-11-11 12:52:50,515] Trial 79 finished with value: -2845826071191.04 and parameters: {'l2_leaf_reg': 1.5049398231482738, 'depth': 7, 'learning_rate': 0.13298062724877602, 'iterations': 280, 'border_count': 35, 'bagging_temperature': 0.493371628744954, 'random_strength': 1.2154301638111482, 'min_data_in_leaf': 83}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  81%|████████  | 81/100 [01:06<00:15,  1.23it/s]

[I 2025-11-11 12:52:51,321] Trial 80 finished with value: -2802467377928.1294 and parameters: {'l2_leaf_reg': 9.948475749065125, 'depth': 7, 'learning_rate': 0.12782242758977308, 'iterations': 271, 'border_count': 112, 'bagging_temperature': 0.27870724851017864, 'random_strength': 1.1213919390250684, 'min_data_in_leaf': 74}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  82%|████████▏ | 82/100 [01:07<00:14,  1.24it/s]

[I 2025-11-11 12:52:52,102] Trial 81 finished with value: -2557457663641.1147 and parameters: {'l2_leaf_reg': 1.7841296486531848, 'depth': 7, 'learning_rate': 0.13370651518052062, 'iterations': 288, 'border_count': 125, 'bagging_temperature': 0.4322882644830724, 'random_strength': 1.5614061053095574, 'min_data_in_leaf': 76}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  83%|████████▎ | 83/100 [01:08<00:14,  1.21it/s]

[I 2025-11-11 12:52:52,980] Trial 82 finished with value: -2635450469698.977 and parameters: {'l2_leaf_reg': 1.4003747560354898, 'depth': 7, 'learning_rate': 0.13300672080981785, 'iterations': 290, 'border_count': 123, 'bagging_temperature': 0.4295536244792531, 'random_strength': 1.4788528857095353, 'min_data_in_leaf': 77}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  84%|████████▍ | 84/100 [01:09<00:13,  1.18it/s]

[I 2025-11-11 12:52:53,866] Trial 83 finished with value: -2638585898024.17 and parameters: {'l2_leaf_reg': 2.1328404776484833, 'depth': 7, 'learning_rate': 0.13958384696718087, 'iterations': 293, 'border_count': 125, 'bagging_temperature': 0.4730681060810406, 'random_strength': 1.356887843385826, 'min_data_in_leaf': 85}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  85%|████████▌ | 85/100 [01:09<00:12,  1.18it/s]

[I 2025-11-11 12:52:54,714] Trial 84 finished with value: -2749448614561.935 and parameters: {'l2_leaf_reg': 1.699846196421594, 'depth': 7, 'learning_rate': 0.1314700397410269, 'iterations': 296, 'border_count': 121, 'bagging_temperature': 0.3426504124348355, 'random_strength': 1.4654639133255278, 'min_data_in_leaf': 76}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  86%|████████▌ | 86/100 [01:10<00:11,  1.19it/s]

[I 2025-11-11 12:52:55,531] Trial 85 finished with value: -2589280081525.7026 and parameters: {'l2_leaf_reg': 2.592346402760902, 'depth': 7, 'learning_rate': 0.1465733170909809, 'iterations': 286, 'border_count': 117, 'bagging_temperature': 0.4420133404923216, 'random_strength': 1.696087341181945, 'min_data_in_leaf': 93}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  87%|████████▋ | 87/100 [01:11<00:10,  1.22it/s]

[I 2025-11-11 12:52:56,317] Trial 86 finished with value: -2716176891034.012 and parameters: {'l2_leaf_reg': 2.326985628086106, 'depth': 7, 'learning_rate': 0.1274306693149801, 'iterations': 265, 'border_count': 128, 'bagging_temperature': 0.6018478969367478, 'random_strength': 1.9788608904821396, 'min_data_in_leaf': 81}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  88%|████████▊ | 88/100 [01:12<00:09,  1.28it/s]

[I 2025-11-11 12:52:57,015] Trial 87 finished with value: -2926469996876.6353 and parameters: {'l2_leaf_reg': 1.2208782642696345, 'depth': 6, 'learning_rate': 0.14278118921657076, 'iterations': 281, 'border_count': 48, 'bagging_temperature': 0.5124881682451393, 'random_strength': 1.100702709377606, 'min_data_in_leaf': 71}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  89%|████████▉ | 89/100 [01:13<00:08,  1.26it/s]

[I 2025-11-11 12:52:57,832] Trial 88 finished with value: -3082903388874.095 and parameters: {'l2_leaf_reg': 1.9670455142436174, 'depth': 7, 'learning_rate': 0.03228788863511036, 'iterations': 274, 'border_count': 124, 'bagging_temperature': 0.554727325435524, 'random_strength': 1.2325071072315705, 'min_data_in_leaf': 66}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  90%|█████████ | 90/100 [01:13<00:07,  1.38it/s]

[I 2025-11-11 12:52:58,385] Trial 89 finished with value: -2755919281428.239 and parameters: {'l2_leaf_reg': 7.20172028215366, 'depth': 7, 'learning_rate': 0.13401631532130823, 'iterations': 200, 'border_count': 112, 'bagging_temperature': 0.3642167829049272, 'random_strength': 1.5878024083135869, 'min_data_in_leaf': 76}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  91%|█████████ | 91/100 [01:14<00:05,  1.59it/s]

[I 2025-11-11 12:52:58,801] Trial 90 finished with value: -2823796676688.431 and parameters: {'l2_leaf_reg': 1.555910558812217, 'depth': 7, 'learning_rate': 0.11808895177452794, 'iterations': 146, 'border_count': 68, 'bagging_temperature': 0.41161799354510914, 'random_strength': 1.455554910312046, 'min_data_in_leaf': 69}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  92%|█████████▏| 92/100 [01:14<00:05,  1.48it/s]

[I 2025-11-11 12:52:59,588] Trial 91 finished with value: -2647019378915.731 and parameters: {'l2_leaf_reg': 1.8567402720663138, 'depth': 7, 'learning_rate': 0.0814806467308793, 'iterations': 277, 'border_count': 120, 'bagging_temperature': 0.47357745528020523, 'random_strength': 1.5410906860943927, 'min_data_in_leaf': 56}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  93%|█████████▎| 93/100 [01:15<00:05,  1.37it/s]

[I 2025-11-11 12:53:00,440] Trial 92 finished with value: -2650023624606.9478 and parameters: {'l2_leaf_reg': 1.8228421914915687, 'depth': 7, 'learning_rate': 0.07287653992205292, 'iterations': 296, 'border_count': 89, 'bagging_temperature': 0.43733462006254187, 'random_strength': 1.282988055646535, 'min_data_in_leaf': 64}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  94%|█████████▍| 94/100 [01:16<00:04,  1.32it/s]

[I 2025-11-11 12:53:01,271] Trial 93 finished with value: -2703035667711.418 and parameters: {'l2_leaf_reg': 2.4382410792085185, 'depth': 7, 'learning_rate': 0.1385715070236764, 'iterations': 289, 'border_count': 104, 'bagging_temperature': 0.5392653764059313, 'random_strength': 1.0395917608734973, 'min_data_in_leaf': 52}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  95%|█████████▌| 95/100 [01:17<00:04,  1.22it/s]

[I 2025-11-11 12:53:02,239] Trial 94 finished with value: -2688422938584.7583 and parameters: {'l2_leaf_reg': 4.760960567695741, 'depth': 8, 'learning_rate': 0.0871286222366343, 'iterations': 283, 'border_count': 116, 'bagging_temperature': 0.5805096787709334, 'random_strength': 1.3316605546371951, 'min_data_in_leaf': 60}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  96%|█████████▌| 96/100 [01:18<00:03,  1.25it/s]

[I 2025-11-11 12:53:02,988] Trial 95 finished with value: -2800322984355.955 and parameters: {'l2_leaf_reg': 1.4681218906663038, 'depth': 6, 'learning_rate': 0.13021648084427115, 'iterations': 272, 'border_count': 108, 'bagging_temperature': 0.5049976336644119, 'random_strength': 1.4247437369493114, 'min_data_in_leaf': 55}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  97%|█████████▋| 97/100 [01:19<00:02,  1.27it/s]

[I 2025-11-11 12:53:03,752] Trial 96 finished with value: -2643500330682.288 and parameters: {'l2_leaf_reg': 1.0178905144646735, 'depth': 7, 'learning_rate': 0.07898646747793225, 'iterations': 255, 'border_count': 63, 'bagging_temperature': 0.6264698234247712, 'random_strength': 0.9693914437650474, 'min_data_in_leaf': 72}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  98%|█████████▊| 98/100 [01:19<00:01,  1.19it/s]

[I 2025-11-11 12:53:04,719] Trial 97 finished with value: -2686624872795.7256 and parameters: {'l2_leaf_reg': 2.258180619569552, 'depth': 8, 'learning_rate': 0.12213608913128615, 'iterations': 278, 'border_count': 95, 'bagging_temperature': 0.705304459188597, 'random_strength': 0.8572400751048852, 'min_data_in_leaf': 78}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12:  99%|█████████▉| 99/100 [01:20<00:00,  1.18it/s]

[I 2025-11-11 12:53:05,587] Trial 98 finished with value: -2773940346223.319 and parameters: {'l2_leaf_reg': 2.8414957307571, 'depth': 7, 'learning_rate': 0.14724933783880917, 'iterations': 286, 'border_count': 113, 'bagging_temperature': 0.3970932475347412, 'random_strength': 0.7845474685342476, 'min_data_in_leaf': 50}. Best is trial 62 with value: -2536578785454.5786.


Best trial: 62. Best value: -2.53658e+12: 100%|██████████| 100/100 [01:21<00:00,  1.22it/s]


[I 2025-11-11 12:53:06,631] Trial 99 finished with value: -2598565541227.576 and parameters: {'l2_leaf_reg': 1.973947793115125, 'depth': 8, 'learning_rate': 0.1390253101435614, 'iterations': 300, 'border_count': 104, 'bagging_temperature': 0.3110531766854997, 'random_strength': 1.1791277764221055, 'min_data_in_leaf': 57}. Best is trial 62 with value: -2536578785454.5786.

=== CatBoost Results (Optimized for Validation) ===
Best validation RMSE: $1,592,664.05
Best params:
  l2_leaf_reg: 1.3959841858247966
  depth: 7
  learning_rate: 0.12582254186448188
  iterations: 274
  border_count: 125
  bagging_temperature: 0.8410542555619
  random_strength: 0.6480231368678006
  min_data_in_leaf: 48

=== Performance ===
Training:   RMSE $1,086,688.53, R² 0.9548
Validation: RMSE $1,592,664.05, R² 0.8894
Test:       RMSE $2,032,497.50, R² 0.8417
⚠️  Possible overfitting detected

✅ Saved two CSV files:
   1. Test only: lgb_predictions_regularized.csv (1,232 rows)
   2. All data: lgb_predictions_all_d

In [52]:
import optuna
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pandas as pd

# Prepare data for Random Forest with proper train/val/test splits
print(f"=== Random Forest Data Preparation ===")
print(f"Original data shapes:")
print(f"  X_train: {X_train.shape}")
print(f"  X_val: {X_val.shape}")
print(f"  X_test: {X_test.shape}")

# Encode district for all splits
X_train_rf = X_train.copy()
X_val_rf = X_val.copy()
X_test_rf = X_test.copy()

# Convert district to numeric codes for all splits
X_train_rf['district_encoded'] = X_train_rf['district'].cat.codes
X_val_rf['district_encoded'] = X_val_rf['district'].cat.codes
X_test_rf['district_encoded'] = X_test_rf['district'].cat.codes

# Remove original categorical district
X_train_rf = X_train_rf.drop('district', axis=1)
X_val_rf = X_val_rf.drop('district', axis=1)
X_test_rf = X_test_rf.drop('district', axis=1)

print(f"RF data shapes after encoding:")
print(f"  X_train_rf: {X_train_rf.shape}")
print(f"  X_val_rf: {X_val_rf.shape}")
print(f"  X_test_rf: {X_test_rf.shape}")
print(f"Features: {list(X_train_rf.columns)}")
print(f"District codes range: {X_train_rf['district_encoded'].min()} to {X_train_rf['district_encoded'].max()}")

def rf_objective(trial):
    # Test regularization via controlling complexity
    max_depth_choice = trial.suggest_categorical('max_depth_type', ['int', 'none'])
    if max_depth_choice == 'none':
        max_depth = None
    else:
        max_depth = trial.suggest_int('max_depth', 3, 12)
    
    # Regularization through tree complexity control
    min_samples_split = trial.suggest_int('min_samples_split', 10, 50)   # Increased for more regularization
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 5, 20)     # Increased for more regularization
    max_leaf_nodes = trial.suggest_categorical('max_leaf_nodes_type', ['int', 'none'])
    if max_leaf_nodes == 'none':
        max_leaf_nodes_val = None
    else:
        max_leaf_nodes_val = trial.suggest_int('max_leaf_nodes', 10, 100)
    
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),     # More conservative range
        'max_features': trial.suggest_categorical('max_features', [0.3, 0.5, 0.7]),
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_leaf_nodes': max_leaf_nodes_val,
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        
        # Fixed parameters
        'random_state': 42,
        'n_jobs': -1
    }
    
    try:
        # Create Random Forest model
        model = RandomForestRegressor(**params)
        
        # Train on training set only
        model.fit(X_train_rf, y_train)
        
        # Evaluate on VALIDATION set (not training!)
        val_predictions = model.predict(X_val_rf)
        val_mse = mean_squared_error(y_val, val_predictions)
        
        # Return negative MSE for maximization (Optuna maximizes)
        return -val_mse
    
    except Exception as e:
        print(f"Trial {trial.number} failed: {e}")
        return float('-inf')

def print_progress(study, trial):
    if trial.number % 25 == 0:
        val_rmse = np.sqrt(-study.best_value)
        print(f"Completed {trial.number + 1} trials. Best validation RMSE: ${val_rmse:,.2f}")

# Create Optuna study
print("\nStarting Random Forest hyperparameter optimization...")
print("🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)")
print("Using proper train/validation/test splits")
print("Testing 150 different parameter combinations...")
print("Regularization parameter ranges:")
print("  min_samples_split: 5-50 (higher = more regularization)")
print("  min_samples_leaf: 5-30 (higher = more regularization)")
print("  max_leaf_nodes: 10-100 or None (lower = more regularization)")

study = optuna.create_study(direction='maximize')
study.optimize(
    rf_objective, 
    n_trials=150,
    callbacks=[print_progress],
    show_progress_bar=True
)

# Print results
best_val_rmse = np.sqrt(-study.best_value)
print(f"\n=== Random Forest Optuna Results (Optimized for Validation) ===")
print(f"Completed {len(study.trials)} trials")
print("Best trial:")
print(f"  Value (neg_MSE): {study.best_value}")
print(f"  Validation RMSE: ${best_val_rmse:,.2f}")
print("  Best params:")
for key, value in study.best_trial.params.items():
    if not key.endswith('_type'):
        print(f"    {key}: {value}")

# Analyze regularization
best_min_split = study.best_trial.params['min_samples_split']
best_min_leaf = study.best_trial.params['min_samples_leaf']
best_max_leaf = study.best_trial.params.get('max_leaf_nodes', 'None')

print(f"\n=== Regularization Analysis ===")
print(f"min_samples_split: {best_min_split} (higher = more regularization)")
print(f"min_samples_leaf: {best_min_leaf} (higher = more regularization)")
print(f"max_leaf_nodes: {best_max_leaf} (lower = more regularization)")

reg_strength = (best_min_split - 5) + (best_min_leaf - 5)
if best_max_leaf != 'None' and best_max_leaf < 50:
    reg_strength += 2

if reg_strength > 20:
    print("✨ Strong regularization preferred (very conservative trees)")
elif reg_strength > 10:
    print("✨ Moderate regularization preferred (balanced complexity)")
else:
    print("✨ Light regularization preferred (more flexible trees)")

# Train final model with best parameters
print("\nTraining final Random Forest model with best parameters...")

best_params = study.best_trial.params.copy()
if best_params['max_depth_type'] == 'none':
    best_params['max_depth'] = None
if best_params.get('max_leaf_nodes_type') == 'none':
    best_params['max_leaf_nodes'] = None

# Remove helper parameters
best_params.pop('max_depth_type', None)
best_params.pop('max_leaf_nodes_type', None)

best_rf_model = RandomForestRegressor(**best_params, 
                                      random_state=42,
                                      n_jobs=-1)

# Fit on training data only
best_rf_model.fit(X_train_rf, y_train)

# Make predictions on all sets
train_predictions = best_rf_model.predict(X_train_rf)
val_predictions = best_rf_model.predict(X_val_rf)
test_predictions = best_rf_model.predict(X_test_rf)

# Calculate metrics for all sets
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

train_mae = mean_absolute_error(y_train, train_predictions)
val_mae = mean_absolute_error(y_val, val_predictions)
test_mae = mean_absolute_error(y_test, test_predictions)

train_r2 = r2_score(y_train, train_predictions)
val_r2 = r2_score(y_val, val_predictions)
test_r2 = r2_score(y_test, test_predictions)

print(f"\n=== Model Performance Across All Sets ===")
print("Training Set:")
print(f"  RMSE: ${train_rmse:,.2f}")
print(f"  MAE: ${train_mae:,.2f}")
print(f"  R²: {train_r2:.4f}")

print("\nValidation Set:")
print(f"  RMSE: ${val_rmse:,.2f}")
print(f"  MAE: ${val_mae:,.2f}")
print(f"  R²: {val_r2:.4f}")

print("\nTest Set (UNSEEN DATA):")
print(f"  RMSE: ${test_rmse:,.2f}")
print(f"  MAE: ${test_mae:,.2f}")
print(f"  R²: {test_r2:.4f}")

# Overfitting analysis
print(f"\n=== Overfitting Analysis ===")
if train_rmse < val_rmse * 0.8:
    print(f"⚠️  WARNING: Possible overfitting detected!")
    print(f"   Training RMSE (${train_rmse:,.2f}) much lower than validation RMSE (${val_rmse:,.2f})")
elif train_rmse < val_rmse * 0.9:
    print(f"⚠️  Mild overfitting detected")
    print(f"   Training RMSE: ${train_rmse:,.2f}, Validation RMSE: ${val_rmse:,.2f}")
else:
    print(f"✅ Good generalization!")
    print(f"   Training RMSE: ${train_rmse:,.2f}, Validation RMSE: ${val_rmse:,.2f}")

if abs(val_rmse - test_rmse) > val_rmse * 0.2:
    print(f"⚠️  Large difference between validation and test performance")
else:
    print(f"✅ Consistent performance between validation and test sets")


# Create results with TEST set only (true unseen data)
results_df = X_test.copy()
results_df['actual_price'] = y_test
results_df['rf_predicted_price'] = test_predictions
results_df['prediction_error'] = results_df['actual_price'] - results_df['rf_predicted_price']
results_df['absolute_error'] = abs(results_df['prediction_error'])

# Save TEST ONLY results
results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/rf_predictions_regularized.csv', index=False)

# ✅ NEW: Create combined results with ALL data (train + val + test)
train_df = X_train.copy()
train_df['dataset'] = 'train'
train_df['actual_price'] = y_train
train_df['rf_predicted_price'] = train_predictions
train_df['prediction_error'] = train_df['actual_price'] - train_df['rf_predicted_price']
train_df['absolute_error'] = abs(train_df['prediction_error'])

val_df = X_val.copy()
val_df['dataset'] = 'validation'
val_df['actual_price'] = y_val
val_df['rf_predicted_price'] = val_predictions
val_df['prediction_error'] = val_df['actual_price'] - val_df['rf_predicted_price']
val_df['absolute_error'] = abs(val_df['prediction_error'])

test_df = X_test.copy()
test_df['dataset'] = 'test'
test_df['actual_price'] = y_test
test_df['rf_predicted_price'] = test_predictions
test_df['prediction_error'] = test_df['actual_price'] - test_df['rf_predicted_price']
test_df['absolute_error'] = abs(test_df['prediction_error'])

# Combine all three datasets
all_results_df = pd.concat([train_df, val_df, test_df], ignore_index=True)

# Save COMBINED results
all_results_df.to_csv('/Users/clarencemarvin/Downloads/regularized/rf_predictions_regularized.csv', index=False)

print(f"\n✅ Saved two CSV files:")
print(f"   1. Test only: lgb_predictions_regularized.csv ({len(results_df):,} rows)")
print(f"   2. All data: lgb_predictions_all_data.csv ({len(all_results_df):,} rows)")

# Feature Importance Analysis
print(f"\n=== Feature Importance Analysis ===")
feature_importance = best_rf_model.feature_importances_
feature_names = X_train_rf.columns

importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

importance_df['importance_pct'] = (importance_df['importance'] / importance_df['importance'].sum()) * 100

print("Top 10 features by importance:")
for idx, row in importance_df.head(10).iterrows():
    print(f"  {row['feature']}: {row['importance_pct']:.2f}%")

importance_df.to_csv('/Users/clarencemarvin/Downloads/regularized2/rf_feature_importance.csv', index=False)

print(f"\nOptimization completed! Final test RMSE: ${test_rmse:,.2f}")
print(f"Results saved to CSV files.")

[I 2025-11-11 12:53:18,535] A new study created in memory with name: no-name-ec24c1b4-7af9-40f6-999b-013b331fb216


=== Random Forest Data Preparation ===
Original data shapes:
  X_train: (3693, 10)
  X_val: (1231, 10)
  X_test: (1232, 10)
RF data shapes after encoding:
  X_train_rf: (3693, 10)
  X_val_rf: (1231, 10)
  X_test_rf: (1232, 10)
Features: ['bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary', 'district_encoded']
District codes range: 0 to 143

Starting Random Forest hyperparameter optimization...
🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)
Using proper train/validation/test splits
Testing 150 different parameter combinations...
Regularization parameter ranges:
  min_samples_split: 5-50 (higher = more regularization)
  min_samples_leaf: 5-30 (higher = more regularization)
  max_leaf_nodes: 10-100 or None (lower = more regularization)


Best trial: 0. Best value: -4.18879e+12:   1%|          | 1/150 [00:00<00:32,  4.63it/s]

[I 2025-11-11 12:53:18,758] Trial 0 finished with value: -4188793726154.143 and parameters: {'max_depth_type': 'none', 'min_samples_split': 34, 'min_samples_leaf': 16, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 92, 'n_estimators': 113, 'max_features': 0.3, 'bootstrap': False}. Best is trial 0 with value: -4188793726154.143.
Completed 1 trials. Best validation RMSE: $2,046,654.28


Best trial: 0. Best value: -4.18879e+12:   2%|▏         | 3/150 [00:00<00:33,  4.44it/s]

[I 2025-11-11 12:53:19,039] Trial 1 finished with value: -4404668478985.917 and parameters: {'max_depth_type': 'none', 'min_samples_split': 20, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 58, 'n_estimators': 176, 'max_features': 0.3, 'bootstrap': True}. Best is trial 0 with value: -4188793726154.143.
[I 2025-11-11 12:53:19,229] Trial 2 finished with value: -4347828395215.9595 and parameters: {'max_depth_type': 'int', 'max_depth': 12, 'min_samples_split': 26, 'min_samples_leaf': 10, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 72, 'n_estimators': 112, 'max_features': 0.3, 'bootstrap': True}. Best is trial 0 with value: -4188793726154.143.


                                                                                        

[I 2025-11-11 12:53:19,461] Trial 3 finished with value: -5201222064931.508 and parameters: {'max_depth_type': 'int', 'max_depth': 5, 'min_samples_split': 36, 'min_samples_leaf': 20, 'max_leaf_nodes_type': 'none', 'n_estimators': 87, 'max_features': 0.5, 'bootstrap': True}. Best is trial 0 with value: -4188793726154.143.


Best trial: 0. Best value: -4.18879e+12:   3%|▎         | 5/150 [00:01<00:31,  4.57it/s]

[I 2025-11-11 12:53:19,663] Trial 4 finished with value: -5217235237550.741 and parameters: {'max_depth_type': 'int', 'max_depth': 10, 'min_samples_split': 40, 'min_samples_leaf': 10, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 33, 'n_estimators': 82, 'max_features': 0.3, 'bootstrap': False}. Best is trial 0 with value: -4188793726154.143.


Best trial: 5. Best value: -4.074e+12:   4%|▍         | 6/150 [00:01<00:39,  3.67it/s]  

[I 2025-11-11 12:53:20,040] Trial 5 finished with value: -4073996239516.7456 and parameters: {'max_depth_type': 'none', 'min_samples_split': 30, 'min_samples_leaf': 10, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 69, 'n_estimators': 97, 'max_features': 0.7, 'bootstrap': True}. Best is trial 5 with value: -4073996239516.7456.


Best trial: 5. Best value: -4.074e+12:   5%|▍         | 7/150 [00:01<00:36,  3.93it/s]

[I 2025-11-11 12:53:20,259] Trial 6 finished with value: -8918960242420.068 and parameters: {'max_depth_type': 'int', 'max_depth': 3, 'min_samples_split': 28, 'min_samples_leaf': 11, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 44, 'n_estimators': 154, 'max_features': 0.3, 'bootstrap': False}. Best is trial 5 with value: -4073996239516.7456.


Best trial: 5. Best value: -4.074e+12:   5%|▌         | 8/150 [00:01<00:34,  4.09it/s]

[I 2025-11-11 12:53:20,480] Trial 7 finished with value: -4319055229716.6753 and parameters: {'max_depth_type': 'none', 'min_samples_split': 17, 'min_samples_leaf': 14, 'max_leaf_nodes_type': 'none', 'n_estimators': 82, 'max_features': 0.3, 'bootstrap': True}. Best is trial 5 with value: -4073996239516.7456.


Best trial: 5. Best value: -4.074e+12:   6%|▌         | 9/150 [00:02<00:50,  2.79it/s]

[I 2025-11-11 12:53:21,089] Trial 8 finished with value: -4260570037783.6943 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 20, 'max_leaf_nodes_type': 'none', 'n_estimators': 167, 'max_features': 0.7, 'bootstrap': True}. Best is trial 5 with value: -4073996239516.7456.


Best trial: 5. Best value: -4.074e+12:   7%|▋         | 10/150 [00:02<00:49,  2.84it/s]

[I 2025-11-11 12:53:21,426] Trial 9 finished with value: -5172742241744.138 and parameters: {'max_depth_type': 'int', 'max_depth': 5, 'min_samples_split': 31, 'min_samples_leaf': 15, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 46, 'n_estimators': 143, 'max_features': 0.5, 'bootstrap': True}. Best is trial 5 with value: -4073996239516.7456.


Best trial: 10. Best value: -3.78347e+12:   7%|▋         | 11/150 [00:03<00:47,  2.95it/s]

[I 2025-11-11 12:53:21,735] Trial 10 finished with value: -3783468421626.5107 and parameters: {'max_depth_type': 'none', 'min_samples_split': 47, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 53, 'max_features': 0.7, 'bootstrap': False}. Best is trial 10 with value: -3783468421626.5107.


Best trial: 11. Best value: -3.70684e+12:   8%|▊         | 12/150 [00:03<00:43,  3.17it/s]

[I 2025-11-11 12:53:21,979] Trial 11 finished with value: -3706835605942.658 and parameters: {'max_depth_type': 'none', 'min_samples_split': 50, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 50, 'max_features': 0.7, 'bootstrap': False}. Best is trial 11 with value: -3706835605942.658.


Best trial: 11. Best value: -3.70684e+12:   9%|▊         | 13/150 [00:03<00:43,  3.14it/s]

[I 2025-11-11 12:53:22,324] Trial 12 finished with value: -3706835605942.658 and parameters: {'max_depth_type': 'none', 'min_samples_split': 50, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 50, 'max_features': 0.7, 'bootstrap': False}. Best is trial 11 with value: -3706835605942.658.


Best trial: 11. Best value: -3.70684e+12:   9%|▉         | 14/150 [00:04<00:41,  3.29it/s]

[I 2025-11-11 12:53:22,593] Trial 13 finished with value: -3731888377022.5254 and parameters: {'max_depth_type': 'none', 'min_samples_split': 50, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 54, 'max_features': 0.7, 'bootstrap': False}. Best is trial 11 with value: -3706835605942.658.


Best trial: 14. Best value: -3.67771e+12:  10%|█         | 15/150 [00:04<01:06,  2.03it/s]

[I 2025-11-11 12:53:23,527] Trial 14 finished with value: -3677706230034.021 and parameters: {'max_depth_type': 'none', 'min_samples_split': 43, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 196, 'max_features': 0.7, 'bootstrap': False}. Best is trial 14 with value: -3677706230034.021.


Best trial: 14. Best value: -3.67771e+12:  11%|█         | 16/150 [00:05<01:26,  1.55it/s]

[I 2025-11-11 12:53:24,526] Trial 15 finished with value: -3705019982151.0015 and parameters: {'max_depth_type': 'none', 'min_samples_split': 43, 'min_samples_leaf': 8, 'max_leaf_nodes_type': 'none', 'n_estimators': 198, 'max_features': 0.7, 'bootstrap': False}. Best is trial 14 with value: -3677706230034.021.


Best trial: 14. Best value: -3.67771e+12:  11%|█▏        | 17/150 [00:06<01:34,  1.41it/s]

[I 2025-11-11 12:53:25,354] Trial 16 finished with value: -3678513151575.641 and parameters: {'max_depth_type': 'none', 'min_samples_split': 42, 'min_samples_leaf': 8, 'max_leaf_nodes_type': 'none', 'n_estimators': 196, 'max_features': 0.7, 'bootstrap': False}. Best is trial 14 with value: -3677706230034.021.


Best trial: 14. Best value: -3.67771e+12:  12%|█▏        | 18/150 [00:07<01:44,  1.27it/s]

[I 2025-11-11 12:53:26,352] Trial 17 finished with value: -3683002241174.1626 and parameters: {'max_depth_type': 'none', 'min_samples_split': 40, 'min_samples_leaf': 8, 'max_leaf_nodes_type': 'none', 'n_estimators': 197, 'max_features': 0.7, 'bootstrap': False}. Best is trial 14 with value: -3677706230034.021.


Best trial: 14. Best value: -3.67771e+12:  13%|█▎        | 19/150 [00:08<01:32,  1.41it/s]

[I 2025-11-11 12:53:26,872] Trial 18 finished with value: -3768328210109.969 and parameters: {'max_depth_type': 'none', 'min_samples_split': 44, 'min_samples_leaf': 12, 'max_leaf_nodes_type': 'none', 'n_estimators': 183, 'max_features': 0.5, 'bootstrap': False}. Best is trial 14 with value: -3677706230034.021.


Best trial: 19. Best value: -3.63906e+12:  13%|█▎        | 20/150 [00:08<01:21,  1.59it/s]

[I 2025-11-11 12:53:27,322] Trial 19 finished with value: -3639056902983.315 and parameters: {'max_depth_type': 'none', 'min_samples_split': 37, 'min_samples_leaf': 8, 'max_leaf_nodes_type': 'none', 'n_estimators': 138, 'max_features': 0.7, 'bootstrap': False}. Best is trial 19 with value: -3639056902983.315.


Best trial: 20. Best value: -3.54561e+12:  14%|█▍        | 21/150 [00:09<01:14,  1.72it/s]

[I 2025-11-11 12:53:27,786] Trial 20 finished with value: -3545608148705.035 and parameters: {'max_depth_type': 'none', 'min_samples_split': 37, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 142, 'max_features': 0.7, 'bootstrap': False}. Best is trial 20 with value: -3545608148705.035.


Best trial: 20. Best value: -3.54561e+12:  15%|█▍        | 22/150 [00:09<01:08,  1.86it/s]

[I 2025-11-11 12:53:28,224] Trial 21 finished with value: -3575798401938.616 and parameters: {'max_depth_type': 'none', 'min_samples_split': 36, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 137, 'max_features': 0.7, 'bootstrap': False}. Best is trial 20 with value: -3545608148705.035.


Best trial: 22. Best value: -3.53816e+12:  15%|█▌        | 23/150 [00:10<01:04,  1.98it/s]

[I 2025-11-11 12:53:28,657] Trial 22 finished with value: -3538163317252.46 and parameters: {'max_depth_type': 'none', 'min_samples_split': 37, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 139, 'max_features': 0.7, 'bootstrap': False}. Best is trial 22 with value: -3538163317252.46.


Best trial: 23. Best value: -3.49258e+12:  16%|█▌        | 24/150 [00:10<01:00,  2.09it/s]

[I 2025-11-11 12:53:29,074] Trial 23 finished with value: -3492576183380.2236 and parameters: {'max_depth_type': 'none', 'min_samples_split': 34, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 129, 'max_features': 0.7, 'bootstrap': False}. Best is trial 23 with value: -3492576183380.2236.


Best trial: 23. Best value: -3.49258e+12:  17%|█▋        | 25/150 [00:10<00:57,  2.19it/s]

[I 2025-11-11 12:53:29,481] Trial 24 finished with value: -3590417324818.648 and parameters: {'max_depth_type': 'none', 'min_samples_split': 33, 'min_samples_leaf': 9, 'max_leaf_nodes_type': 'none', 'n_estimators': 124, 'max_features': 0.7, 'bootstrap': False}. Best is trial 23 with value: -3492576183380.2236.


Best trial: 23. Best value: -3.49258e+12:  17%|█▋        | 26/150 [00:11<00:53,  2.31it/s]

[I 2025-11-11 12:53:29,858] Trial 25 finished with value: -3546792220472.7437 and parameters: {'max_depth_type': 'none', 'min_samples_split': 38, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 154, 'max_features': 0.5, 'bootstrap': False}. Best is trial 23 with value: -3492576183380.2236.
Completed 26 trials. Best validation RMSE: $1,868,843.54


Best trial: 23. Best value: -3.49258e+12:  18%|█▊        | 27/150 [00:11<00:50,  2.44it/s]

[I 2025-11-11 12:53:30,212] Trial 26 finished with value: -3746120903119.662 and parameters: {'max_depth_type': 'int', 'max_depth': 8, 'min_samples_split': 27, 'min_samples_leaf': 12, 'max_leaf_nodes_type': 'none', 'n_estimators': 125, 'max_features': 0.7, 'bootstrap': False}. Best is trial 23 with value: -3492576183380.2236.


Best trial: 27. Best value: -3.26595e+12:  19%|█▊        | 28/150 [00:12<00:55,  2.19it/s]

[I 2025-11-11 12:53:30,780] Trial 27 finished with value: -3265954645145.792 and parameters: {'max_depth_type': 'none', 'min_samples_split': 24, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 158, 'max_features': 0.7, 'bootstrap': False}. Best is trial 27 with value: -3265954645145.792.


Best trial: 28. Best value: -3.2521e+12:  19%|█▉        | 29/150 [00:12<00:57,  2.09it/s] 

[I 2025-11-11 12:53:31,310] Trial 28 finished with value: -3252096980735.303 and parameters: {'max_depth_type': 'none', 'min_samples_split': 22, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 158, 'max_features': 0.7, 'bootstrap': False}. Best is trial 28 with value: -3252096980735.303.


Best trial: 28. Best value: -3.2521e+12:  20%|██        | 30/150 [00:13<00:53,  2.24it/s]

[I 2025-11-11 12:53:31,680] Trial 29 finished with value: -3844820172487.9316 and parameters: {'max_depth_type': 'none', 'min_samples_split': 23, 'min_samples_leaf': 18, 'max_leaf_nodes_type': 'none', 'n_estimators': 160, 'max_features': 0.5, 'bootstrap': False}. Best is trial 28 with value: -3252096980735.303.


Best trial: 30. Best value: -3.13075e+12:  21%|██        | 31/150 [00:13<00:58,  2.03it/s]

[I 2025-11-11 12:53:32,283] Trial 30 finished with value: -3130749662465.1 and parameters: {'max_depth_type': 'none', 'min_samples_split': 16, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 176, 'max_features': 0.7, 'bootstrap': False}. Best is trial 30 with value: -3130749662465.1.


Best trial: 31. Best value: -3.12811e+12:  21%|██▏       | 32/150 [00:14<01:05,  1.80it/s]

[I 2025-11-11 12:53:32,985] Trial 31 finished with value: -3128107815940.2153 and parameters: {'max_depth_type': 'none', 'min_samples_split': 16, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 178, 'max_features': 0.7, 'bootstrap': False}. Best is trial 31 with value: -3128107815940.2153.


Best trial: 31. Best value: -3.12811e+12:  22%|██▏       | 33/150 [00:15<01:10,  1.67it/s]

[I 2025-11-11 12:53:33,690] Trial 32 finished with value: -3130454450971.2256 and parameters: {'max_depth_type': 'none', 'min_samples_split': 16, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 177, 'max_features': 0.7, 'bootstrap': False}. Best is trial 31 with value: -3128107815940.2153.


Best trial: 33. Best value: -3.12251e+12:  23%|██▎       | 34/150 [00:15<01:11,  1.62it/s]

[I 2025-11-11 12:53:34,343] Trial 33 finished with value: -3122513085549.385 and parameters: {'max_depth_type': 'none', 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 181, 'max_features': 0.7, 'bootstrap': False}. Best is trial 33 with value: -3122513085549.385.


Best trial: 33. Best value: -3.12251e+12:  23%|██▎       | 35/150 [00:16<01:02,  1.84it/s]

[I 2025-11-11 12:53:34,720] Trial 34 finished with value: -6455674710933.033 and parameters: {'max_depth_type': 'none', 'min_samples_split': 15, 'min_samples_leaf': 9, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 14, 'n_estimators': 182, 'max_features': 0.7, 'bootstrap': False}. Best is trial 33 with value: -3122513085549.385.


Best trial: 35. Best value: -3.09664e+12:  24%|██▍       | 36/150 [00:16<00:55,  2.05it/s]

[I 2025-11-11 12:53:35,078] Trial 35 finished with value: -3096636406626.2886 and parameters: {'max_depth_type': 'int', 'max_depth': 12, 'min_samples_split': 12, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 173, 'max_features': 0.3, 'bootstrap': False}. Best is trial 35 with value: -3096636406626.2886.


Best trial: 35. Best value: -3.09664e+12:  25%|██▍       | 37/150 [00:16<00:49,  2.29it/s]

[I 2025-11-11 12:53:35,396] Trial 36 finished with value: -3986918041995.8896 and parameters: {'max_depth_type': 'int', 'max_depth': 12, 'min_samples_split': 10, 'min_samples_leaf': 9, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 96, 'n_estimators': 186, 'max_features': 0.3, 'bootstrap': True}. Best is trial 35 with value: -3096636406626.2886.


Best trial: 35. Best value: -3.09664e+12:  25%|██▌       | 38/150 [00:17<00:44,  2.53it/s]

[I 2025-11-11 12:53:35,692] Trial 37 finished with value: -3442635018810.2373 and parameters: {'max_depth_type': 'int', 'max_depth': 9, 'min_samples_split': 13, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 168, 'max_features': 0.3, 'bootstrap': False}. Best is trial 35 with value: -3096636406626.2886.


Best trial: 35. Best value: -3.09664e+12:  26%|██▌       | 39/150 [00:17<00:40,  2.76it/s]

[I 2025-11-11 12:53:35,978] Trial 38 finished with value: -7891452703245.144 and parameters: {'max_depth_type': 'int', 'max_depth': 10, 'min_samples_split': 13, 'min_samples_leaf': 11, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 10, 'n_estimators': 171, 'max_features': 0.3, 'bootstrap': True}. Best is trial 35 with value: -3096636406626.2886.


Best trial: 35. Best value: -3.09664e+12:  27%|██▋       | 40/150 [00:17<00:36,  3.02it/s]

[I 2025-11-11 12:53:36,235] Trial 39 finished with value: -4932617737501.493 and parameters: {'max_depth_type': 'int', 'max_depth': 6, 'min_samples_split': 20, 'min_samples_leaf': 14, 'max_leaf_nodes_type': 'none', 'n_estimators': 189, 'max_features': 0.3, 'bootstrap': False}. Best is trial 35 with value: -3096636406626.2886.


Best trial: 35. Best value: -3.09664e+12:  27%|██▋       | 41/150 [00:18<00:35,  3.08it/s]

[I 2025-11-11 12:53:36,545] Trial 40 finished with value: -3991348569701.2427 and parameters: {'max_depth_type': 'int', 'max_depth': 11, 'min_samples_split': 20, 'min_samples_leaf': 10, 'max_leaf_nodes_type': 'none', 'n_estimators': 177, 'max_features': 0.3, 'bootstrap': True}. Best is trial 35 with value: -3096636406626.2886.


Best trial: 35. Best value: -3.09664e+12:  28%|██▊       | 42/150 [00:18<00:33,  3.20it/s]

[I 2025-11-11 12:53:36,829] Trial 41 finished with value: -3715458539220.918 and parameters: {'max_depth_type': 'int', 'max_depth': 8, 'min_samples_split': 17, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 172, 'max_features': 0.3, 'bootstrap': False}. Best is trial 35 with value: -3096636406626.2886.


Best trial: 42. Best value: -3.06965e+12:  29%|██▊       | 43/150 [00:18<00:44,  2.39it/s]

[I 2025-11-11 12:53:37,493] Trial 42 finished with value: -3069650623178.4214 and parameters: {'max_depth_type': 'none', 'min_samples_split': 18, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 190, 'max_features': 0.7, 'bootstrap': False}. Best is trial 42 with value: -3069650623178.4214.


Best trial: 42. Best value: -3.06965e+12:  29%|██▉       | 44/150 [00:19<00:37,  2.83it/s]

[I 2025-11-11 12:53:37,694] Trial 43 finished with value: -8918732199634.242 and parameters: {'max_depth_type': 'int', 'max_depth': 3, 'min_samples_split': 19, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 188, 'max_features': 0.3, 'bootstrap': False}. Best is trial 42 with value: -3069650623178.4214.


Best trial: 44. Best value: -2.96488e+12:  30%|███       | 45/150 [00:20<00:57,  1.81it/s]

[I 2025-11-11 12:53:38,708] Trial 44 finished with value: -2964880516533.655 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 163, 'max_features': 0.5, 'bootstrap': False}. Best is trial 44 with value: -2964880516533.655.


Best trial: 44. Best value: -2.96488e+12:  31%|███       | 46/150 [00:20<00:50,  2.06it/s]

[I 2025-11-11 12:53:39,036] Trial 45 finished with value: -5096611303474.268 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 27, 'n_estimators': 150, 'max_features': 0.5, 'bootstrap': False}. Best is trial 44 with value: -2964880516533.655.


Best trial: 44. Best value: -2.96488e+12:  31%|███▏      | 47/150 [00:20<00:42,  2.40it/s]

[I 2025-11-11 12:53:39,294] Trial 46 finished with value: -4185491170816.9766 and parameters: {'max_depth_type': 'int', 'max_depth': 7, 'min_samples_split': 11, 'min_samples_leaf': 17, 'max_leaf_nodes_type': 'none', 'n_estimators': 104, 'max_features': 0.5, 'bootstrap': False}. Best is trial 44 with value: -2964880516533.655.


Best trial: 44. Best value: -2.96488e+12:  32%|███▏      | 48/150 [00:21<00:44,  2.28it/s]

[I 2025-11-11 12:53:39,783] Trial 47 finished with value: -3442370354138.419 and parameters: {'max_depth_type': 'none', 'min_samples_split': 18, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 164, 'max_features': 0.5, 'bootstrap': True}. Best is trial 44 with value: -2964880516533.655.


Best trial: 48. Best value: -2.89644e+12:  33%|███▎      | 49/150 [00:21<00:48,  2.10it/s]

[I 2025-11-11 12:53:40,346] Trial 48 finished with value: -2896442370092.197 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 188, 'max_features': 0.5, 'bootstrap': False}. Best is trial 48 with value: -2896442370092.197.


Best trial: 48. Best value: -2.89644e+12:  33%|███▎      | 50/150 [00:22<00:46,  2.17it/s]

[I 2025-11-11 12:53:40,771] Trial 49 finished with value: -3672992856798.213 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 80, 'n_estimators': 192, 'max_features': 0.5, 'bootstrap': False}. Best is trial 48 with value: -2896442370092.197.


Best trial: 48. Best value: -2.89644e+12:  34%|███▍      | 51/150 [00:22<00:47,  2.10it/s]

[I 2025-11-11 12:53:41,288] Trial 50 finished with value: -3283938593776.519 and parameters: {'max_depth_type': 'int', 'max_depth': 10, 'min_samples_split': 12, 'min_samples_leaf': 8, 'max_leaf_nodes_type': 'none', 'n_estimators': 200, 'max_features': 0.5, 'bootstrap': False}. Best is trial 48 with value: -2896442370092.197.
Completed 51 trials. Best validation RMSE: $1,701,893.76


Best trial: 48. Best value: -2.89644e+12:  35%|███▍      | 52/150 [00:23<00:47,  2.05it/s]

[I 2025-11-11 12:53:41,801] Trial 51 finished with value: -2998338904943.9346 and parameters: {'max_depth_type': 'none', 'min_samples_split': 15, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 181, 'max_features': 0.5, 'bootstrap': False}. Best is trial 48 with value: -2896442370092.197.


Best trial: 48. Best value: -2.89644e+12:  35%|███▌      | 53/150 [00:23<00:47,  2.03it/s]

[I 2025-11-11 12:53:42,306] Trial 52 finished with value: -3004013306567.5786 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 181, 'max_features': 0.5, 'bootstrap': False}. Best is trial 48 with value: -2896442370092.197.


Best trial: 48. Best value: -2.89644e+12:  36%|███▌      | 54/150 [00:24<00:49,  1.96it/s]

[I 2025-11-11 12:53:42,860] Trial 53 finished with value: -2916501830398.653 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 192, 'max_features': 0.5, 'bootstrap': False}. Best is trial 48 with value: -2896442370092.197.


Best trial: 54. Best value: -2.88542e+12:  37%|███▋      | 55/150 [00:24<00:51,  1.84it/s]

[I 2025-11-11 12:53:43,475] Trial 54 finished with value: -2885423917471.532 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 192, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  37%|███▋      | 56/150 [00:25<00:51,  1.84it/s]

[I 2025-11-11 12:53:44,022] Trial 55 finished with value: -3134087006567.191 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 193, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  39%|███▊      | 58/150 [00:26<00:39,  2.31it/s]

[I 2025-11-11 12:53:44,554] Trial 56 finished with value: -3003397339060.4507 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 186, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.
[I 2025-11-11 12:53:44,739] Trial 57 finished with value: -3255098053793.8984 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 73, 'max_features': 0.5, 'bootstrap': True}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  39%|███▉      | 59/150 [00:26<00:41,  2.20it/s]

[I 2025-11-11 12:53:45,239] Trial 58 finished with value: -3130893949235.78 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 185, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  40%|████      | 60/150 [00:27<00:43,  2.06it/s]

[I 2025-11-11 12:53:45,798] Trial 59 finished with value: -2898309206000.6294 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 194, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  41%|████      | 61/150 [00:27<00:43,  2.04it/s]

[I 2025-11-11 12:53:46,304] Trial 60 finished with value: -3243656885170.073 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 8, 'max_leaf_nodes_type': 'none', 'n_estimators': 200, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  41%|████▏     | 62/150 [00:28<00:44,  1.96it/s]

[I 2025-11-11 12:53:46,855] Trial 61 finished with value: -2982239603418.6353 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 193, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  42%|████▏     | 63/150 [00:28<00:44,  1.94it/s]

[I 2025-11-11 12:53:47,386] Trial 62 finished with value: -2900426868176.761 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 192, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  43%|████▎     | 64/150 [00:29<00:44,  1.94it/s]

[I 2025-11-11 12:53:47,900] Trial 63 finished with value: -3134087006567.191 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 193, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  43%|████▎     | 65/150 [00:29<00:44,  1.91it/s]

[I 2025-11-11 12:53:48,444] Trial 64 finished with value: -2970677564418.038 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 194, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  44%|████▍     | 66/150 [00:30<00:42,  1.98it/s]

[I 2025-11-11 12:53:48,903] Trial 65 finished with value: -2975118109890.8335 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 165, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  45%|████▍     | 67/150 [00:30<00:42,  1.96it/s]

[I 2025-11-11 12:53:49,430] Trial 66 finished with value: -3136650655314.7534 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 196, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  45%|████▌     | 68/150 [00:31<00:43,  1.90it/s]

[I 2025-11-11 12:53:49,991] Trial 67 finished with value: -2972278643598.043 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 196, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  46%|████▌     | 69/150 [00:32<00:46,  1.73it/s]

[I 2025-11-11 12:53:50,689] Trial 68 finished with value: -2977583201689.547 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 187, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  47%|████▋     | 70/150 [00:32<00:45,  1.77it/s]

[I 2025-11-11 12:53:51,222] Trial 69 finished with value: -3873588231261.5254 and parameters: {'max_depth_type': 'none', 'min_samples_split': 17, 'min_samples_leaf': 19, 'max_leaf_nodes_type': 'none', 'n_estimators': 200, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  47%|████▋     | 71/150 [00:32<00:37,  2.09it/s]

[I 2025-11-11 12:53:51,499] Trial 70 finished with value: -4235547137920.593 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 15, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 58, 'n_estimators': 118, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  48%|████▊     | 72/150 [00:33<00:39,  2.00it/s]

[I 2025-11-11 12:53:52,053] Trial 71 finished with value: -2970403269904.816 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 195, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  49%|████▊     | 73/150 [00:34<00:40,  1.88it/s]

[I 2025-11-11 12:53:52,657] Trial 72 finished with value: -2968497716940.014 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 193, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  49%|████▉     | 74/150 [00:34<00:39,  1.91it/s]

[I 2025-11-11 12:53:53,160] Trial 73 finished with value: -3135823867705.809 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 189, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  50%|█████     | 75/150 [00:35<00:40,  1.87it/s]

[I 2025-11-11 12:53:53,725] Trial 74 finished with value: -3000919799053.6094 and parameters: {'max_depth_type': 'none', 'min_samples_split': 15, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 184, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  51%|█████     | 76/150 [00:35<00:38,  1.94it/s]

[I 2025-11-11 12:53:54,193] Trial 75 finished with value: -3066326253920.365 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 173, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.
Completed 76 trials. Best validation RMSE: $1,698,653.56


Best trial: 54. Best value: -2.88542e+12:  51%|█████▏    | 77/150 [00:36<00:36,  2.01it/s]

[I 2025-11-11 12:53:54,647] Trial 76 finished with value: -3343088890393.3057 and parameters: {'max_depth_type': 'none', 'min_samples_split': 29, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 179, 'max_features': 0.5, 'bootstrap': False}. Best is trial 54 with value: -2885423917471.532.


Best trial: 54. Best value: -2.88542e+12:  52%|█████▏    | 78/150 [00:36<00:33,  2.13it/s]

[I 2025-11-11 12:53:55,050] Trial 77 finished with value: -3436055093398.716 and parameters: {'max_depth_type': 'none', 'min_samples_split': 16, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 191, 'max_features': 0.5, 'bootstrap': True}. Best is trial 54 with value: -2885423917471.532.


Best trial: 78. Best value: -2.8748e+12:  53%|█████▎    | 79/150 [00:36<00:32,  2.19it/s] 

[I 2025-11-11 12:53:55,481] Trial 78 finished with value: -2874800400812.698 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 149, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  53%|█████▎    | 80/150 [00:37<00:33,  2.09it/s]

[I 2025-11-11 12:53:56,012] Trial 79 finished with value: -2898442012434.3115 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 155, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  54%|█████▍    | 81/150 [00:37<00:31,  2.17it/s]

[I 2025-11-11 12:53:56,431] Trial 80 finished with value: -3023775564082.047 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 148, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  55%|█████▍    | 82/150 [00:38<00:30,  2.24it/s]

[I 2025-11-11 12:53:56,846] Trial 81 finished with value: -2907327525103.9165 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 146, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  55%|█████▌    | 83/150 [00:38<00:29,  2.29it/s]

[I 2025-11-11 12:53:57,260] Trial 82 finished with value: -2908905675769.938 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 145, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  56%|█████▌    | 84/150 [00:39<00:28,  2.30it/s]

[I 2025-11-11 12:53:57,692] Trial 83 finished with value: -2901883239995.618 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 131, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  57%|█████▋    | 85/150 [00:39<00:28,  2.28it/s]

[I 2025-11-11 12:53:58,139] Trial 84 finished with value: -2903903969348.06 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 130, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  57%|█████▋    | 86/150 [00:40<00:27,  2.33it/s]

[I 2025-11-11 12:53:58,547] Trial 85 finished with value: -2981078785767.3423 and parameters: {'max_depth_type': 'none', 'min_samples_split': 16, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 129, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  58%|█████▊    | 87/150 [00:40<00:25,  2.44it/s]

[I 2025-11-11 12:53:58,910] Trial 86 finished with value: -3055563905810.656 and parameters: {'max_depth_type': 'none', 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 131, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  59%|█████▊    | 88/150 [00:40<00:22,  2.73it/s]

[I 2025-11-11 12:53:59,174] Trial 87 finished with value: -5082584112592.45 and parameters: {'max_depth_type': 'none', 'min_samples_split': 48, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 26, 'n_estimators': 154, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  59%|█████▉    | 89/150 [00:41<00:22,  2.66it/s]

[I 2025-11-11 12:53:59,574] Trial 88 finished with value: -3060646315226.8354 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 133, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  60%|██████    | 90/150 [00:41<00:21,  2.80it/s]

[I 2025-11-11 12:53:59,886] Trial 89 finished with value: -3535400699291.288 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 121, 'max_features': 0.5, 'bootstrap': True}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  61%|██████    | 91/150 [00:41<00:20,  2.93it/s]

[I 2025-11-11 12:54:00,189] Trial 90 finished with value: -3723808603656.4956 and parameters: {'max_depth_type': 'none', 'min_samples_split': 32, 'min_samples_leaf': 13, 'max_leaf_nodes_type': 'none', 'n_estimators': 115, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  61%|██████▏   | 92/150 [00:42<00:21,  2.70it/s]

[I 2025-11-11 12:54:00,630] Trial 91 finished with value: -2910580933766.204 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 144, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  62%|██████▏   | 93/150 [00:42<00:21,  2.64it/s]

[I 2025-11-11 12:54:01,026] Trial 92 finished with value: -3028308745153.9863 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 137, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  63%|██████▎   | 94/150 [00:42<00:21,  2.55it/s]

[I 2025-11-11 12:54:01,449] Trial 93 finished with value: -2912519757189.837 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 146, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  63%|██████▎   | 95/150 [00:43<00:21,  2.57it/s]

[I 2025-11-11 12:54:01,834] Trial 94 finished with value: -3116513281232.953 and parameters: {'max_depth_type': 'none', 'min_samples_split': 17, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 141, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  64%|██████▍   | 96/150 [00:43<00:21,  2.53it/s]

[I 2025-11-11 12:54:02,241] Trial 95 finished with value: -3218240936073.1064 and parameters: {'max_depth_type': 'none', 'min_samples_split': 26, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 152, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  65%|██████▍   | 97/150 [00:44<00:22,  2.34it/s]

[I 2025-11-11 12:54:02,743] Trial 96 finished with value: -2901963012259.526 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 157, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  65%|██████▌   | 98/150 [00:44<00:21,  2.39it/s]

[I 2025-11-11 12:54:03,140] Trial 97 finished with value: -3073009671349.886 and parameters: {'max_depth_type': 'none', 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 109, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  66%|██████▌   | 99/150 [00:45<00:21,  2.32it/s]

[I 2025-11-11 12:54:03,598] Trial 98 finished with value: -3442426886964.6997 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 11, 'max_leaf_nodes_type': 'none', 'n_estimators': 159, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  67%|██████▋   | 100/150 [00:45<00:20,  2.40it/s]

[I 2025-11-11 12:54:03,983] Trial 99 finished with value: -3684062109278.0903 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 81, 'n_estimators': 156, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  67%|██████▋   | 101/150 [00:45<00:20,  2.45it/s]

[I 2025-11-11 12:54:04,372] Trial 100 finished with value: -3188778346504.9966 and parameters: {'max_depth_type': 'none', 'min_samples_split': 21, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 134, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.
Completed 101 trials. Best validation RMSE: $1,695,523.64


Best trial: 78. Best value: -2.8748e+12:  68%|██████▊   | 102/150 [00:46<00:20,  2.38it/s]

[I 2025-11-11 12:54:04,823] Trial 101 finished with value: -2902991300300.683 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 149, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  69%|██████▊   | 103/150 [00:46<00:20,  2.33it/s]

[I 2025-11-11 12:54:05,269] Trial 102 finished with value: -3018849681895.924 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 149, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 78. Best value: -2.8748e+12:  69%|██████▉   | 104/150 [00:47<00:18,  2.44it/s]

[I 2025-11-11 12:54:05,633] Trial 103 finished with value: -2960692569906.9204 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 125, 'max_features': 0.5, 'bootstrap': False}. Best is trial 78 with value: -2874800400812.698.


Best trial: 104. Best value: -2.86956e+12:  70%|███████   | 105/150 [00:47<00:18,  2.46it/s]

[I 2025-11-11 12:54:06,034] Trial 104 finished with value: -2869562595677.921 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 140, 'max_features': 0.5, 'bootstrap': False}. Best is trial 104 with value: -2869562595677.921.


Best trial: 105. Best value: -2.86646e+12:  71%|███████   | 106/150 [00:47<00:18,  2.38it/s]

[I 2025-11-11 12:54:06,489] Trial 105 finished with value: -2866462622745.4785 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 139, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  71%|███████▏  | 107/150 [00:48<00:19,  2.16it/s]

[I 2025-11-11 12:54:07,047] Trial 106 finished with value: -2871392400419.1025 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 142, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  72%|███████▏  | 108/150 [00:48<00:19,  2.18it/s]

[I 2025-11-11 12:54:07,495] Trial 107 finished with value: -2975922732574.9116 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 140, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  73%|███████▎  | 109/150 [00:49<00:17,  2.38it/s]

[I 2025-11-11 12:54:07,825] Trial 108 finished with value: -3287435992689.064 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 134, 'max_features': 0.5, 'bootstrap': True}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  73%|███████▎  | 110/150 [00:49<00:17,  2.29it/s]

[I 2025-11-11 12:54:08,304] Trial 109 finished with value: -2970437909900.0303 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 169, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  74%|███████▍  | 111/150 [00:50<00:16,  2.34it/s]

[I 2025-11-11 12:54:08,707] Trial 110 finished with value: -3237631689557.9834 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 8, 'max_leaf_nodes_type': 'none', 'n_estimators': 142, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  75%|███████▍  | 112/150 [00:50<00:16,  2.25it/s]

[I 2025-11-11 12:54:09,192] Trial 111 finished with value: -2880023802771.8447 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 162, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  75%|███████▌  | 113/150 [00:51<00:16,  2.24it/s]

[I 2025-11-11 12:54:09,645] Trial 112 finished with value: -2871563624568.3896 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 152, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  76%|███████▌  | 114/150 [00:51<00:16,  2.24it/s]

[I 2025-11-11 12:54:10,088] Trial 113 finished with value: -2871563624568.389 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 152, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  77%|███████▋  | 115/150 [00:52<00:15,  2.22it/s]

[I 2025-11-11 12:54:10,549] Trial 114 finished with value: -2975869203827.284 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 152, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  77%|███████▋  | 116/150 [00:52<00:15,  2.19it/s]

[I 2025-11-11 12:54:11,022] Trial 115 finished with value: -2878297961843.814 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 161, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  78%|███████▊  | 117/150 [00:52<00:13,  2.37it/s]

[I 2025-11-11 12:54:11,360] Trial 116 finished with value: -3157523918746.609 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 160, 'max_features': 0.3, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  79%|███████▊  | 118/150 [00:53<00:13,  2.33it/s]

[I 2025-11-11 12:54:11,806] Trial 117 finished with value: -3125599558906.9673 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 7, 'max_leaf_nodes_type': 'none', 'n_estimators': 161, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  79%|███████▉  | 119/150 [00:53<00:13,  2.23it/s]

[I 2025-11-11 12:54:12,298] Trial 118 finished with value: -2884339162692.5596 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 166, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  80%|████████  | 120/150 [00:54<00:11,  2.55it/s]

[I 2025-11-11 12:54:12,562] Trial 119 finished with value: -6166551350890.3545 and parameters: {'max_depth_type': 'int', 'max_depth': 4, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 44, 'n_estimators': 165, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  81%|████████  | 121/150 [00:54<00:12,  2.39it/s]

[I 2025-11-11 12:54:13,040] Trial 120 finished with value: -2880023802771.845 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 162, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  81%|████████▏ | 122/150 [00:54<00:12,  2.28it/s]

[I 2025-11-11 12:54:13,529] Trial 121 finished with value: -2887091596395.3438 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 167, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  82%|████████▏ | 123/150 [00:55<00:12,  2.21it/s]

[I 2025-11-11 12:54:14,011] Trial 122 finished with value: -2889847764095.027 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 168, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  83%|████████▎ | 124/150 [00:55<00:12,  2.14it/s]

[I 2025-11-11 12:54:14,513] Trial 123 finished with value: -2887091596395.344 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 167, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  83%|████████▎ | 125/150 [00:56<00:11,  2.12it/s]

[I 2025-11-11 12:54:14,997] Trial 124 finished with value: -2901980903556.935 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 163, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  84%|████████▍ | 126/150 [00:56<00:11,  2.10it/s]

[I 2025-11-11 12:54:15,479] Trial 125 finished with value: -2968975776541.7236 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 171, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.
Completed 126 trials. Best validation RMSE: $1,693,063.09


Best trial: 105. Best value: -2.86646e+12:  85%|████████▍ | 127/150 [00:57<00:11,  2.09it/s]

[I 2025-11-11 12:54:15,968] Trial 126 finished with value: -2914774243288.2607 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 174, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  85%|████████▌ | 128/150 [00:57<00:10,  2.13it/s]

[I 2025-11-11 12:54:16,415] Trial 127 finished with value: -2975421268145.0596 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 163, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  86%|████████▌ | 129/150 [00:58<00:09,  2.20it/s]

[I 2025-11-11 12:54:16,833] Trial 128 finished with value: -2965144662669.7783 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 152, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  87%|████████▋ | 130/150 [00:58<00:08,  2.32it/s]

[I 2025-11-11 12:54:17,214] Trial 129 finished with value: -3329021569351.2056 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 168, 'max_features': 0.5, 'bootstrap': True}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  87%|████████▋ | 131/150 [00:59<00:07,  2.49it/s]

[I 2025-11-11 12:54:17,545] Trial 130 finished with value: -3146014972938.2837 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 157, 'max_features': 0.3, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  88%|████████▊ | 132/150 [00:59<00:07,  2.34it/s]

[I 2025-11-11 12:54:18,032] Trial 131 finished with value: -2887091596395.3438 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 167, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  89%|████████▊ | 133/150 [00:59<00:07,  2.26it/s]

[I 2025-11-11 12:54:18,508] Trial 132 finished with value: -2881837048192.7334 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 165, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  89%|████████▉ | 134/150 [01:00<00:07,  2.22it/s]

[I 2025-11-11 12:54:18,976] Trial 133 finished with value: -2900978660090.4287 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 161, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  90%|█████████ | 135/150 [01:00<00:07,  2.13it/s]

[I 2025-11-11 12:54:19,489] Trial 134 finished with value: -2893870094567.8374 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 175, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  91%|█████████ | 136/150 [01:01<00:06,  2.16it/s]

[I 2025-11-11 12:54:19,940] Trial 135 finished with value: -2910339347104.493 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 152, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  91%|█████████▏| 137/150 [01:01<00:05,  2.26it/s]

[I 2025-11-11 12:54:20,335] Trial 136 finished with value: -3534939668700.796 and parameters: {'max_depth_type': 'none', 'min_samples_split': 40, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 138, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  92%|█████████▏| 138/150 [01:02<00:05,  2.27it/s]

[I 2025-11-11 12:54:20,768] Trial 137 finished with value: -3801264620346.973 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 16, 'max_leaf_nodes_type': 'none', 'n_estimators': 155, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  93%|█████████▎| 139/150 [01:02<00:05,  2.19it/s]

[I 2025-11-11 12:54:21,265] Trial 138 finished with value: -2973919324330.327 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 147, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  93%|█████████▎| 140/150 [01:03<00:04,  2.15it/s]

[I 2025-11-11 12:54:21,751] Trial 139 finished with value: -2975151499900.837 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 164, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  94%|█████████▍| 141/150 [01:03<00:04,  2.23it/s]

[I 2025-11-11 12:54:22,160] Trial 140 finished with value: -3714644596519.438 and parameters: {'max_depth_type': 'int', 'max_depth': 7, 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 143, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  95%|█████████▍| 142/150 [01:04<00:03,  2.05it/s]

[I 2025-11-11 12:54:22,736] Trial 141 finished with value: -2888647552433.809 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 169, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  95%|█████████▌| 143/150 [01:04<00:03,  2.02it/s]

[I 2025-11-11 12:54:23,251] Trial 142 finished with value: -2902520742773.065 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 159, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  96%|█████████▌| 144/150 [01:05<00:02,  2.00it/s]

[I 2025-11-11 12:54:23,759] Trial 143 finished with value: -2884339162692.5596 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 166, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  97%|█████████▋| 145/150 [01:05<00:02,  2.04it/s]

[I 2025-11-11 12:54:24,230] Trial 144 finished with value: -2905683854242.858 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 166, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  97%|█████████▋| 146/150 [01:06<00:01,  2.03it/s]

[I 2025-11-11 12:54:24,728] Trial 145 finished with value: -2890720904247.8354 and parameters: {'max_depth_type': 'none', 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 171, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  98%|█████████▊| 147/150 [01:06<00:01,  2.13it/s]

[I 2025-11-11 12:54:25,145] Trial 146 finished with value: -3875573834937.8613 and parameters: {'max_depth_type': 'none', 'min_samples_split': 13, 'min_samples_leaf': 20, 'max_leaf_nodes_type': 'none', 'n_estimators': 162, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12:  99%|█████████▊| 148/150 [01:06<00:00,  2.26it/s]

[I 2025-11-11 12:54:25,525] Trial 147 finished with value: -3624817908784.2114 and parameters: {'max_depth_type': 'none', 'min_samples_split': 12, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 85, 'n_estimators': 151, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.


Best trial: 105. Best value: -2.86646e+12: 100%|██████████| 150/150 [01:07<00:00,  2.22it/s]


[I 2025-11-11 12:54:25,987] Trial 148 finished with value: -2978329977205.8643 and parameters: {'max_depth_type': 'none', 'min_samples_split': 11, 'min_samples_leaf': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 155, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.
[I 2025-11-11 12:54:26,172] Trial 149 finished with value: -3065896219427.446 and parameters: {'max_depth_type': 'none', 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_leaf_nodes_type': 'none', 'n_estimators': 61, 'max_features': 0.5, 'bootstrap': False}. Best is trial 105 with value: -2866462622745.4785.

=== Random Forest Optuna Results (Optimized for Validation) ===
Completed 150 trials
Best trial:
  Value (neg_MSE): -2866462622745.4785
  Validation RMSE: $1,693,063.09
  Best params:
    min_samples_split: 10
    min_samples_leaf: 5
    n_estimators: 139
    max_features: 0.5
    bootstrap: False

=== Regularization Analysis ===
min_samples_split: 10 (higher = more re

In [53]:
import optuna
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np
import pandas as pd

# ✅ CHANGE 1: Prepare data for SVR using proper splits
print(f"=== SVR Data Preparation ===")

# Prepare training data
X_svr_train = X_train.copy()
X_svr_train['district_encoded'] = X_svr_train['district'].cat.codes
X_svr_train = X_svr_train.drop('district', axis=1)

# Prepare validation data
X_svr_val = X_val.copy()
X_svr_val['district_encoded'] = X_svr_val['district'].cat.codes
X_svr_val = X_svr_val.drop('district', axis=1)

# Prepare test data
X_svr_test = X_test.copy()
X_svr_test['district_encoded'] = X_svr_test['district'].cat.codes
X_svr_test = X_svr_test.drop('district', axis=1)

print(f"Training data shape: {X_svr_train.shape}")
print(f"Validation data shape: {X_svr_val.shape}")
print(f"Test data shape: {X_svr_test.shape}")
print(f"Features: {list(X_svr_train.columns)}")

def svr_objective(trial):
    params = {
        'C': trial.suggest_float('C', 0.1, 1000, log=True),           # Regularization parameter
        'epsilon': trial.suggest_float('epsilon', 0.01, 1.0),         # Epsilon-tube width
        'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']), # Kernel coefficient
        'kernel': trial.suggest_categorical('kernel', ['rbf', 'poly', 'linear']), # Kernel type
    }
    
    # Add degree parameter only for polynomial kernel
    if params['kernel'] == 'poly':
        params['degree'] = trial.suggest_int('degree', 2, 5)
    
    try:
        # ✅ CHANGE 2: Create SVR pipeline with scaling
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('svr', SVR(**params))
        ])
        
        # ✅ CHANGE 3: Train on training set only
        pipeline.fit(X_svr_train, y_train)
        
        # ✅ CHANGE 4: Evaluate on validation set
        val_predictions = pipeline.predict(X_svr_val)
        val_mse = mean_squared_error(y_val, val_predictions)
        
        # ✅ CHANGE 5: Return negative MSE for maximization
        return -val_mse
        
    except Exception as e:
        print(f"Trial {trial.number} failed: {e}")
        return float('-inf')

# Progress callback
def print_progress(study, trial):
    if trial.number % 25 == 0:
        # ✅ CHANGE 6: Show validation RMSE
        val_rmse = np.sqrt(-study.best_value)
        print(f"Completed {trial.number + 1} trials. Best validation RMSE: ${val_rmse:,.2f}")

print("\nStarting SVR hyperparameter optimization with Optuna...")
print("🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)")  # ✅ CHANGE 7
print("Using StandardScaler + SVR pipeline")
print("Testing 100 different parameter combinations...")
print("Parameter ranges:")
print("  C: 0.1-1000 (log scale)")
print("  epsilon: 0.01-1.0") 
print("  gamma: 'scale' or 'auto'")
print("  kernel: 'rbf', 'poly', or 'linear'")
print("  degree: 2-5 (for poly kernel only)")

print(f"\nUsing proper train/validation/test splits:")
print(f"  Training samples: {len(X_svr_train):,}")
print(f"  Validation samples: {len(X_svr_val):,}")
print(f"  Test samples: {len(X_svr_test):,}")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.NopPruner()
)

study.optimize(
    svr_objective, 
    n_trials=100,
    timeout=7200,
    callbacks=[print_progress],
    show_progress_bar=True,
    n_jobs=1
)

# ✅ CHANGE 8: Updated results display
best_val_rmse = np.sqrt(-study.best_value)
print(f"\n=== SVR Optuna Results (Optimized for Validation) ===")
print(f"Completed {len(study.trials)} out of 100 trials")
print("Best trial:")
print(f"  Trial number: {study.best_trial.number}")
print(f"  Value (neg_MSE): {study.best_trial.value}")
print(f"  Validation RMSE: ${best_val_rmse:,.2f}")
print("  Best params:")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

print("\nTraining final SVR model with best parameters...")

# Create best pipeline
best_svr_params = study.best_trial.params.copy()
best_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR(**best_svr_params))
])

# ✅ CHANGE 9: Fit on training data only
best_pipeline.fit(X_svr_train, y_train)

# ✅ CHANGE 10: Get predictions for all three sets
train_predictions = best_pipeline.predict(X_svr_train)
val_predictions = best_pipeline.predict(X_svr_val)
test_predictions = best_pipeline.predict(X_svr_test)

# ✅ CHANGE 11: Calculate metrics for all three sets
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

train_mae = mean_absolute_error(y_train, train_predictions)
val_mae = mean_absolute_error(y_val, val_predictions)
test_mae = mean_absolute_error(y_test, test_predictions)

train_r2 = r2_score(y_train, train_predictions)
val_r2 = r2_score(y_val, val_predictions)
test_r2 = r2_score(y_test, test_predictions)

# ✅ CHANGE 12: Display performance across all sets
print(f"\n=== Model Performance Across All Sets ===")
print(f"Training Set:")
print(f"  RMSE: ${train_rmse:,.2f}")
print(f"  MAE: ${train_mae:,.2f}")
print(f"  R²: {train_r2:.4f}")

print(f"\nValidation Set:")
print(f"  RMSE: ${val_rmse:,.2f}")
print(f"  MAE: ${val_mae:,.2f}")
print(f"  R²: {val_r2:.4f}")

print(f"\nTest Set (UNSEEN DATA):")
print(f"  RMSE: ${test_rmse:,.2f}")
print(f"  MAE: ${test_mae:,.2f}")
print(f"  R²: {test_r2:.4f}")

# ✅ CHANGE 13: Check for overfitting
print(f"\n=== Overfitting Analysis ===")
if train_rmse < val_rmse * 0.8:
    print(f"⚠️  WARNING: Possible overfitting detected!")
    print(f"   Training RMSE ({train_rmse:,.2f}) much lower than validation RMSE ({val_rmse:,.2f})")
else:
    print(f"✅ Model generalizes well - training and validation performance similar")

if abs(val_rmse - test_rmse) / val_rmse < 0.1:
    print(f"✅ Validation set is good proxy for test performance")
else:
    print(f"⚠️  Large difference between validation and test performance")

# ✅ CHANGE 14: Create results with TEST set only (true unseen data)
results_df = X_svr_test.copy()
results_df['actual_price'] = y_test
results_df['svr_predicted_price'] = test_predictions
results_df['prediction_error'] = results_df['actual_price'] - results_df['svr_predicted_price']
results_df['absolute_error'] = abs(results_df['prediction_error'])

# Save to CSV
results_df.to_csv('/Users/clarencemarvin/Downloads/regularized2/svr_predictions.csv', index=False)

# Trial summary
failed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.FAIL]
print(f"\nTrial Summary:")
print(f"  Total trials: {len(study.trials)}")
print(f"  Successful trials: {len(study.trials) - len(failed_trials)}")
print(f"  Failed trials: {len(failed_trials)}")

print(f"\nSVR Details:")
print(f"  Best kernel: {study.best_trial.params['kernel']}")
print(f"  Number of support vectors: {best_pipeline.named_steps['svr'].n_support_}")
print(f"  Support vector ratio: {sum(best_pipeline.named_steps['svr'].n_support_) / len(X_svr_train):.2%}")

print(f"\n🎯 FINAL RESULT: Test RMSE ${test_rmse:,.2f} on truly unseen data!")

[I 2025-11-11 12:55:09,060] A new study created in memory with name: no-name-1801bae4-64e8-4c1d-a4f2-899406896a52


=== SVR Data Preparation ===
Training data shape: (3693, 10)
Validation data shape: (1231, 10)
Test data shape: (1232, 10)
Features: ['bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary', 'district_encoded']

Starting SVR hyperparameter optimization with Optuna...
🎯 OPTIMIZING FOR VALIDATION PERFORMANCE (not training!)
Using StandardScaler + SVR pipeline
Testing 100 different parameter combinations...
Parameter ranges:
  C: 0.1-1000 (log scale)
  epsilon: 0.01-1.0
  gamma: 'scale' or 'auto'
  kernel: 'rbf', 'poly', or 'linear'
  degree: 2-5 (for poly kernel only)

Using proper train/validation/test splits:
  Training samples: 3,693
  Validation samples: 1,231
  Test samples: 1,232


Best trial: 0. Best value: -2.39496e+13:   1%|          | 1/100 [00:00<01:17,  1.28it/s, 0.78/7200 seconds]

[I 2025-11-11 12:55:09,845] Trial 0 finished with value: -23949648974474.086 and parameters: {'C': 1.3893207134677135, 'epsilon': 0.5088024139453727, 'gamma': 'auto', 'kernel': 'poly', 'degree': 2}. Best is trial 0 with value: -23949648974474.086.
Completed 1 trials. Best validation RMSE: $4,893,837.86


Best trial: 1. Best value: -2.39494e+13:   2%|▏         | 2/100 [00:01<01:28,  1.11it/s, 1.77/7200 seconds]

[I 2025-11-11 12:55:10,834] Trial 1 finished with value: -23949412664240.53 and parameters: {'C': 0.1786015216475857, 'epsilon': 0.9917187053386948, 'gamma': 'auto', 'kernel': 'rbf'}. Best is trial 1 with value: -23949412664240.53.


Best trial: 2. Best value: -1.80597e+13:   3%|▎         | 3/100 [00:02<01:20,  1.21it/s, 2.50/7200 seconds]

[I 2025-11-11 12:55:11,566] Trial 2 finished with value: -18059701221166.973 and parameters: {'C': 268.59933109068606, 'epsilon': 0.06643143293251538, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:   4%|▍         | 4/100 [00:03<01:17,  1.23it/s, 3.30/7200 seconds]

[I 2025-11-11 12:55:12,358] Trial 3 finished with value: -23276857706342.68 and parameters: {'C': 487.54814381792255, 'epsilon': 0.08303538989581741, 'gamma': 'auto', 'kernel': 'poly', 'degree': 5}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:   5%|▌         | 5/100 [00:04<01:24,  1.13it/s, 4.32/7200 seconds]

[I 2025-11-11 12:55:13,381] Trial 4 finished with value: -23949459843883.48 and parameters: {'C': 0.1328110336916793, 'epsilon': 0.4849137629624455, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:   6%|▌         | 6/100 [00:05<01:26,  1.08it/s, 5.31/7200 seconds]

[I 2025-11-11 12:55:14,376] Trial 5 finished with value: -23949104690581.71 and parameters: {'C': 0.5001403745281604, 'epsilon': 0.566706807274897, 'gamma': 'auto', 'kernel': 'rbf'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:   7%|▋         | 7/100 [00:06<01:26,  1.07it/s, 6.27/7200 seconds]

[I 2025-11-11 12:55:15,329] Trial 6 finished with value: -23949397854251.688 and parameters: {'C': 0.19371608783287736, 'epsilon': 0.8541502065800651, 'gamma': 'auto', 'kernel': 'rbf'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:   8%|▊         | 8/100 [00:07<01:29,  1.03it/s, 7.31/7200 seconds]

[I 2025-11-11 12:55:16,367] Trial 7 finished with value: -23930951772261.074 and parameters: {'C': 19.52016155035384, 'epsilon': 0.5012971136080809, 'gamma': 'auto', 'kernel': 'rbf'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:   9%|▉         | 9/100 [00:08<01:27,  1.04it/s, 8.25/7200 seconds]

[I 2025-11-11 12:55:17,309] Trial 8 finished with value: -23920435103983.66 and parameters: {'C': 896.0602329374042, 'epsilon': 0.9905121989507489, 'gamma': 'auto', 'kernel': 'poly', 'degree': 2}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  10%|█         | 10/100 [00:09<01:33,  1.04s/it, 9.48/7200 seconds]

[I 2025-11-11 12:55:18,540] Trial 9 finished with value: -23766791322357.156 and parameters: {'C': 195.5333333952396, 'epsilon': 0.9887617078040949, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  11%|█         | 11/100 [00:10<01:33,  1.05s/it, 10.53/7200 seconds]

[I 2025-11-11 12:55:19,596] Trial 10 finished with value: -22967075200934.895 and parameters: {'C': 40.22399593285067, 'epsilon': 0.019348061288153272, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  12%|█▏        | 12/100 [00:11<01:29,  1.02s/it, 11.49/7200 seconds]

[I 2025-11-11 12:55:20,555] Trial 11 finished with value: -22449117284165.094 and parameters: {'C': 60.50765523181672, 'epsilon': 0.04061751675206059, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  13%|█▎        | 13/100 [00:12<01:26,  1.00it/s, 12.43/7200 seconds]

[I 2025-11-11 12:55:21,494] Trial 12 finished with value: -21681785995000.258 and parameters: {'C': 90.63670447814242, 'epsilon': 0.22129907195191556, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  14%|█▍        | 14/100 [00:13<01:23,  1.03it/s, 13.36/7200 seconds]

[I 2025-11-11 12:55:22,421] Trial 13 finished with value: -23859881880630.258 and parameters: {'C': 4.183678179456051, 'epsilon': 0.24920530137232247, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  15%|█▌        | 15/100 [00:14<01:21,  1.04it/s, 14.29/7200 seconds]

[I 2025-11-11 12:55:23,353] Trial 14 finished with value: -20313087715920.008 and parameters: {'C': 151.12283146552795, 'epsilon': 0.2380933160023497, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  16%|█▌        | 16/100 [00:15<01:19,  1.05it/s, 15.21/7200 seconds]

[I 2025-11-11 12:55:24,276] Trial 15 finished with value: -19614520003882.36 and parameters: {'C': 182.91997282949183, 'epsilon': 0.25799643117703774, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 2. Best value: -1.80597e+13:  17%|█▋        | 17/100 [00:16<01:18,  1.06it/s, 16.13/7200 seconds]

[I 2025-11-11 12:55:25,192] Trial 16 finished with value: -23749634342107.836 and parameters: {'C': 8.703301749767721, 'epsilon': 0.33689598686078936, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 2 with value: -18059701221166.973.


Best trial: 17. Best value: -1.66587e+13:  18%|█▊        | 18/100 [00:17<01:16,  1.07it/s, 17.05/7200 seconds]

[I 2025-11-11 12:55:26,113] Trial 17 finished with value: -16658716460848.494 and parameters: {'C': 361.8308212627763, 'epsilon': 0.14498156317130045, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 17 with value: -16658716460848.494.


Best trial: 18. Best value: -1.54628e+13:  19%|█▉        | 19/100 [00:17<01:15,  1.08it/s, 17.96/7200 seconds]

[I 2025-11-11 12:55:27,027] Trial 18 finished with value: -15462836034898.418 and parameters: {'C': 455.76306438253596, 'epsilon': 0.13640997434986624, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 18 with value: -15462836034898.418.


Best trial: 19. Best value: -1.31255e+13:  20%|██        | 20/100 [00:18<01:13,  1.08it/s, 18.88/7200 seconds]

[I 2025-11-11 12:55:27,942] Trial 19 finished with value: -13125457740638.33 and parameters: {'C': 719.1554550565278, 'epsilon': 0.38601209749513665, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 19 with value: -13125457740638.33.


Best trial: 20. Best value: -1.19267e+13:  21%|██        | 21/100 [00:19<01:13,  1.08it/s, 19.82/7200 seconds]

[I 2025-11-11 12:55:28,886] Trial 20 finished with value: -11926698903321.807 and parameters: {'C': 948.023565058984, 'epsilon': 0.3972466636742118, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 20 with value: -11926698903321.807.


Best trial: 20. Best value: -1.19267e+13:  22%|██▏       | 22/100 [00:20<01:14,  1.04it/s, 20.85/7200 seconds]

[I 2025-11-11 12:55:29,911] Trial 21 finished with value: -11929768959142.613 and parameters: {'C': 947.257152900486, 'epsilon': 0.38016853422477986, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 20 with value: -11926698903321.807.


Best trial: 22. Best value: -1.17539e+13:  23%|██▎       | 23/100 [00:21<01:15,  1.03it/s, 21.86/7200 seconds]

[I 2025-11-11 12:55:30,925] Trial 22 finished with value: -11753876678769.877 and parameters: {'C': 991.9263336052301, 'epsilon': 0.39231297460685577, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  24%|██▍       | 24/100 [00:22<01:13,  1.03it/s, 22.83/7200 seconds]

[I 2025-11-11 12:55:31,890] Trial 23 finished with value: -12284299980629.957 and parameters: {'C': 874.6014106203555, 'epsilon': 0.6390139574423845, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  25%|██▌       | 25/100 [00:23<01:15,  1.00s/it, 23.90/7200 seconds]

[I 2025-11-11 12:55:32,961] Trial 24 finished with value: -21487923144450.9 and parameters: {'C': 98.4863356252809, 'epsilon': 0.3686574791275882, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  26%|██▌       | 26/100 [00:24<01:14,  1.00s/it, 24.90/7200 seconds]

[I 2025-11-11 12:55:33,966] Trial 25 finished with value: -23901958470361.625 and parameters: {'C': 30.592946648368876, 'epsilon': 0.6372804125070154, 'gamma': 'scale', 'kernel': 'poly', 'degree': 5}. Best is trial 22 with value: -11753876678769.877.
Completed 26 trials. Best validation RMSE: $3,428,392.73


Best trial: 22. Best value: -1.17539e+13:  27%|██▋       | 27/100 [00:25<01:11,  1.02it/s, 25.82/7200 seconds]

[I 2025-11-11 12:55:34,881] Trial 26 finished with value: -11755817116926.764 and parameters: {'C': 991.3054201487947, 'epsilon': 0.4239094628636421, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  28%|██▊       | 28/100 [00:26<01:09,  1.04it/s, 26.74/7200 seconds]

[I 2025-11-11 12:55:35,802] Trial 27 finished with value: -16984015383488.0 and parameters: {'C': 338.61469013652743, 'epsilon': 0.6521877915484164, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  29%|██▉       | 29/100 [00:27<01:09,  1.02it/s, 27.77/7200 seconds]

[I 2025-11-11 12:55:36,832] Trial 28 finished with value: -23873796517725.203 and parameters: {'C': 3.5314708304903086, 'epsilon': 0.4466471304525317, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  30%|███       | 30/100 [00:28<01:09,  1.01it/s, 28.80/7200 seconds]

[I 2025-11-11 12:55:37,860] Trial 29 finished with value: -23949906598579.37 and parameters: {'C': 0.9940091238844706, 'epsilon': 0.5625917331225663, 'gamma': 'scale', 'kernel': 'poly', 'degree': 4}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  31%|███       | 31/100 [00:29<01:07,  1.02it/s, 29.74/7200 seconds]

[I 2025-11-11 12:55:38,804] Trial 30 finished with value: -14899478513432.773 and parameters: {'C': 504.1207513667964, 'epsilon': 0.752017971090748, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  32%|███▏      | 32/100 [00:30<01:05,  1.05it/s, 30.65/7200 seconds]

[I 2025-11-11 12:55:39,707] Trial 31 finished with value: -12133593903005.367 and parameters: {'C': 898.1923017194989, 'epsilon': 0.42551284165883474, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  33%|███▎      | 33/100 [00:31<01:02,  1.06it/s, 31.55/7200 seconds]

[I 2025-11-11 12:55:40,608] Trial 32 finished with value: -12053273445860.404 and parameters: {'C': 916.6007256641583, 'epsilon': 0.3233909109259287, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  34%|███▍      | 34/100 [00:32<01:01,  1.07it/s, 32.46/7200 seconds]

[I 2025-11-11 12:55:41,523] Trial 33 finished with value: -17631041512249.71 and parameters: {'C': 296.16446654731436, 'epsilon': 0.3173979941362526, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  35%|███▌      | 35/100 [00:33<01:00,  1.08it/s, 33.37/7200 seconds]

[I 2025-11-11 12:55:42,432] Trial 34 finished with value: -14490851797695.96 and parameters: {'C': 546.9091955590083, 'epsilon': 0.42137551591471817, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  36%|███▌      | 36/100 [00:34<01:01,  1.05it/s, 34.40/7200 seconds]

[I 2025-11-11 12:55:43,459] Trial 35 finished with value: -23777586218053.05 and parameters: {'C': 129.11901833567126, 'epsilon': 0.5535160810220978, 'gamma': 'scale', 'kernel': 'poly', 'degree': 3}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  37%|███▋      | 37/100 [00:35<01:00,  1.04it/s, 35.37/7200 seconds]

[I 2025-11-11 12:55:44,431] Trial 36 finished with value: -17825207918614.254 and parameters: {'C': 284.43527006910676, 'epsilon': 0.46330810408845624, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  38%|███▊      | 38/100 [00:36<00:58,  1.06it/s, 36.27/7200 seconds]

[I 2025-11-11 12:55:45,329] Trial 37 finished with value: -18776335482837.44 and parameters: {'C': 227.51075162220397, 'epsilon': 0.29428405592412044, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  39%|███▉      | 39/100 [00:37<01:02,  1.02s/it, 37.48/7200 seconds]

[I 2025-11-11 12:55:46,543] Trial 38 finished with value: -23482127091316.246 and parameters: {'C': 498.6769434657138, 'epsilon': 0.17987200328703135, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  40%|████      | 40/100 [00:38<01:00,  1.01s/it, 38.46/7200 seconds]

[I 2025-11-11 12:55:47,522] Trial 39 finished with value: -22783688947702.61 and parameters: {'C': 992.7861873379983, 'epsilon': 0.3698607122975363, 'gamma': 'auto', 'kernel': 'poly', 'degree': 3}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  41%|████      | 41/100 [00:39<01:03,  1.07s/it, 39.67/7200 seconds]

[I 2025-11-11 12:55:48,735] Trial 40 finished with value: -23884843917709.035 and parameters: {'C': 69.63299240482301, 'epsilon': 0.49994689252393903, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  42%|████▏     | 42/100 [00:40<01:01,  1.06s/it, 40.69/7200 seconds]

[I 2025-11-11 12:55:49,755] Trial 41 finished with value: -13435380869859.37 and parameters: {'C': 670.857627093037, 'epsilon': 0.3317634899414327, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  43%|████▎     | 43/100 [00:41<00:57,  1.02s/it, 41.62/7200 seconds]

[I 2025-11-11 12:55:50,680] Trial 42 finished with value: -14011441862671.37 and parameters: {'C': 597.81915912397, 'epsilon': 0.38971435883379335, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  44%|████▍     | 44/100 [00:42<00:55,  1.01it/s, 42.56/7200 seconds]

[I 2025-11-11 12:55:51,623] Trial 43 finished with value: -16189921377029.303 and parameters: {'C': 395.19821097315105, 'epsilon': 0.29217803970639705, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  45%|████▌     | 45/100 [00:43<00:53,  1.02it/s, 43.51/7200 seconds]

[I 2025-11-11 12:55:52,571] Trial 44 finished with value: -11881378837721.111 and parameters: {'C': 961.0769248502064, 'epsilon': 0.5242612176195329, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  46%|████▌     | 46/100 [00:44<00:52,  1.04it/s, 44.44/7200 seconds]

[I 2025-11-11 12:55:53,500] Trial 45 finished with value: -18639711675839.55 and parameters: {'C': 234.13442604931922, 'epsilon': 0.5127952322858489, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  47%|████▋     | 47/100 [00:45<00:51,  1.03it/s, 45.41/7200 seconds]

[I 2025-11-11 12:55:54,474] Trial 46 finished with value: -23941259673435.695 and parameters: {'C': 0.4183163210063894, 'epsilon': 0.5376929636114317, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  48%|████▊     | 48/100 [00:46<00:54,  1.04s/it, 46.62/7200 seconds]

[I 2025-11-11 12:55:55,685] Trial 47 finished with value: -23809006838608.344 and parameters: {'C': 151.62449462754486, 'epsilon': 0.6934103802299599, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  49%|████▉     | 49/100 [00:47<00:51,  1.02s/it, 47.58/7200 seconds]

[I 2025-11-11 12:55:56,643] Trial 48 finished with value: -13838273216651.87 and parameters: {'C': 620.0248109407528, 'epsilon': 0.4657110537972896, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  50%|█████     | 50/100 [00:48<00:50,  1.00s/it, 48.55/7200 seconds]

[I 2025-11-11 12:55:57,609] Trial 49 finished with value: -23681417620653.555 and parameters: {'C': 11.709876468259253, 'epsilon': 0.41555195089919567, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  51%|█████     | 51/100 [00:50<00:57,  1.17s/it, 50.11/7200 seconds]

[I 2025-11-11 12:55:59,162] Trial 50 finished with value: -16293088738647.24 and parameters: {'C': 388.2457974126192, 'epsilon': 0.5977717425718162, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.
Completed 51 trials. Best validation RMSE: $3,428,392.73


Best trial: 22. Best value: -1.17539e+13:  52%|█████▏    | 52/100 [00:51<00:56,  1.17s/it, 51.28/7200 seconds]

[I 2025-11-11 12:56:00,345] Trial 51 finished with value: -12344769435202.72 and parameters: {'C': 857.2383613215935, 'epsilon': 0.28195803747627757, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  53%|█████▎    | 53/100 [00:52<00:55,  1.17s/it, 52.47/7200 seconds]

[I 2025-11-11 12:56:01,527] Trial 52 finished with value: -13147349481082.875 and parameters: {'C': 716.7195421188289, 'epsilon': 0.3352306817712601, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 22. Best value: -1.17539e+13:  54%|█████▍    | 54/100 [00:54<01:05,  1.42s/it, 54.47/7200 seconds]

[I 2025-11-11 12:56:03,519] Trial 53 finished with value: -12144051848286.469 and parameters: {'C': 896.4795574024719, 'epsilon': 0.4807887798439466, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 22 with value: -11753876678769.877.


Best trial: 54. Best value: -1.17347e+13:  55%|█████▌    | 55/100 [00:57<01:27,  1.94s/it, 57.61/7200 seconds]

[I 2025-11-11 12:56:06,667] Trial 54 finished with value: -11734718135491.361 and parameters: {'C': 998.1100030339098, 'epsilon': 0.1990437891151269, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  56%|█████▌    | 56/100 [00:59<01:20,  1.83s/it, 59.18/7200 seconds]

[I 2025-11-11 12:56:08,242] Trial 55 finished with value: -15597179501548.312 and parameters: {'C': 443.7038812212409, 'epsilon': 0.20370469939999464, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  57%|█████▋    | 57/100 [01:00<01:13,  1.70s/it, 60.58/7200 seconds]

[I 2025-11-11 12:56:09,638] Trial 56 finished with value: -17711540576638.516 and parameters: {'C': 290.7293002776816, 'epsilon': 0.07117568457854445, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  58%|█████▊    | 58/100 [01:01<01:07,  1.60s/it, 61.96/7200 seconds]

[I 2025-11-11 12:56:11,024] Trial 57 finished with value: -23372368422151.73 and parameters: {'C': 614.2960685634903, 'epsilon': 0.6030238721628608, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  59%|█████▉    | 59/100 [01:03<00:58,  1.44s/it, 63.01/7200 seconds]

[I 2025-11-11 12:56:12,071] Trial 58 finished with value: -23907700457502.086 and parameters: {'C': 204.59914935865353, 'epsilon': 0.10938684511453628, 'gamma': 'scale', 'kernel': 'poly', 'degree': 4}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  60%|██████    | 60/100 [01:03<00:51,  1.29s/it, 63.95/7200 seconds]

[I 2025-11-11 12:56:13,010] Trial 59 finished with value: -16397586389518.457 and parameters: {'C': 379.45336760458827, 'epsilon': 0.5230564954301673, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  61%|██████    | 61/100 [01:05<00:56,  1.45s/it, 65.76/7200 seconds]

[I 2025-11-11 12:56:14,763] Trial 60 finished with value: -23900757819126.03 and parameters: {'C': 2.3848202772615403, 'epsilon': 0.3951655273573843, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  62%|██████▏   | 62/100 [01:08<01:04,  1.69s/it, 68.02/7200 seconds]

[I 2025-11-11 12:56:17,084] Trial 61 finished with value: -11758573433199.914 and parameters: {'C': 990.716200401052, 'epsilon': 0.36047837601235216, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  63%|██████▎   | 63/100 [01:09<00:56,  1.52s/it, 69.16/7200 seconds]

[I 2025-11-11 12:56:18,223] Trial 62 finished with value: -13351978029808.436 and parameters: {'C': 683.5968837694128, 'epsilon': 0.35558826635998797, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  64%|██████▍   | 64/100 [01:10<00:57,  1.60s/it, 70.93/7200 seconds]

[I 2025-11-11 12:56:19,993] Trial 63 finished with value: -14867569761905.754 and parameters: {'C': 506.9834164932706, 'epsilon': 0.44030416227558744, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  65%|██████▌   | 65/100 [01:12<00:56,  1.63s/it, 72.62/7200 seconds]

[I 2025-11-11 12:56:21,681] Trial 64 finished with value: -11816931208911.291 and parameters: {'C': 976.3250544843279, 'epsilon': 0.26225012018653654, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  66%|██████▌   | 66/100 [01:13<00:50,  1.48s/it, 73.76/7200 seconds]

[I 2025-11-11 12:56:22,820] Trial 65 finished with value: -12946244714307.998 and parameters: {'C': 753.6499603635681, 'epsilon': 0.2567392012308852, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  67%|██████▋   | 67/100 [01:14<00:44,  1.35s/it, 74.82/7200 seconds]

[I 2025-11-11 12:56:23,879] Trial 66 finished with value: -11768977285444.256 and parameters: {'C': 988.2295862074127, 'epsilon': 0.21049620652451564, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  68%|██████▊   | 68/100 [01:15<00:39,  1.24s/it, 75.79/7200 seconds]

[I 2025-11-11 12:56:24,847] Trial 67 finished with value: -15217020685532.338 and parameters: {'C': 475.81829007587015, 'epsilon': 0.16773282732062383, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  69%|██████▉   | 69/100 [01:16<00:37,  1.20s/it, 76.90/7200 seconds]

[I 2025-11-11 12:56:25,959] Trial 68 finished with value: -17101906894855.04 and parameters: {'C': 330.72468761252236, 'epsilon': 0.22710571768133855, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  70%|███████   | 70/100 [01:17<00:33,  1.11s/it, 77.79/7200 seconds]

[I 2025-11-11 12:56:26,850] Trial 69 finished with value: -21282513250705.0 and parameters: {'C': 107.47226260342083, 'epsilon': 0.11217457374018691, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  71%|███████   | 71/100 [01:18<00:31,  1.08s/it, 78.81/7200 seconds]

[I 2025-11-11 12:56:27,876] Trial 70 finished with value: -22944594521205.51 and parameters: {'C': 41.463665315297156, 'epsilon': 0.19236169216108595, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  72%|███████▏  | 72/100 [01:20<00:31,  1.14s/it, 80.07/7200 seconds]

[I 2025-11-11 12:56:29,106] Trial 71 finished with value: -11791311127652.426 and parameters: {'C': 980.8270875678603, 'epsilon': 0.2728254412192985, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  73%|███████▎  | 73/100 [01:21<00:35,  1.30s/it, 81.76/7200 seconds]

[I 2025-11-11 12:56:30,826] Trial 72 finished with value: -13170280177665.967 and parameters: {'C': 711.9249948308526, 'epsilon': 0.27089309500985637, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  74%|███████▍  | 74/100 [01:22<00:31,  1.23s/it, 82.83/7200 seconds]

[I 2025-11-11 12:56:31,888] Trial 73 finished with value: -11944934688120.94 and parameters: {'C': 943.8438727997979, 'epsilon': 0.2310656412513657, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  75%|███████▌  | 75/100 [01:23<00:29,  1.17s/it, 83.86/7200 seconds]

[I 2025-11-11 12:56:32,927] Trial 74 finished with value: -23933031773221.184 and parameters: {'C': 586.1049485501788, 'epsilon': 0.15441462240154066, 'gamma': 'scale', 'kernel': 'poly', 'degree': 2}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  76%|███████▌  | 76/100 [01:24<00:25,  1.08s/it, 84.73/7200 seconds]

[I 2025-11-11 12:56:33,792] Trial 75 finished with value: -15721422673012.754 and parameters: {'C': 433.37806563102555, 'epsilon': 0.3161511342112157, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.
Completed 76 trials. Best validation RMSE: $3,425,597.49


Best trial: 54. Best value: -1.17347e+13:  77%|███████▋  | 77/100 [01:25<00:25,  1.09s/it, 85.86/7200 seconds]

[I 2025-11-11 12:56:34,918] Trial 76 finished with value: -13015493698302.727 and parameters: {'C': 738.8303457279651, 'epsilon': 0.20677957126711788, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  78%|███████▊  | 78/100 [01:26<00:23,  1.09s/it, 86.92/7200 seconds]

[I 2025-11-11 12:56:35,986] Trial 77 finished with value: -11751144150140.945 and parameters: {'C': 993.7326787060459, 'epsilon': 0.3014451523026794, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  79%|███████▉  | 79/100 [01:28<00:25,  1.21s/it, 88.42/7200 seconds]

[I 2025-11-11 12:56:37,483] Trial 78 finished with value: -23387471093439.383 and parameters: {'C': 597.9252870527633, 'epsilon': 0.3588254854930637, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  80%|████████  | 80/100 [01:29<00:24,  1.25s/it, 89.76/7200 seconds]

[I 2025-11-11 12:56:38,817] Trial 79 finished with value: -17922739974374.74 and parameters: {'C': 278.50424721505254, 'epsilon': 0.30472652478065004, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  81%|████████  | 81/100 [01:30<00:23,  1.23s/it, 90.96/7200 seconds]

[I 2025-11-11 12:56:40,020] Trial 80 finished with value: -20115832958143.492 and parameters: {'C': 160.11771499716252, 'epsilon': 0.2607416753656332, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  82%|████████▏ | 82/100 [01:31<00:20,  1.13s/it, 91.83/7200 seconds]

[I 2025-11-11 12:56:40,896] Trial 81 finished with value: -11735158647982.02 and parameters: {'C': 997.7682863077275, 'epsilon': 0.8763674758803806, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  83%|████████▎ | 83/100 [01:32<00:18,  1.10s/it, 92.88/7200 seconds]

[I 2025-11-11 12:56:41,947] Trial 82 finished with value: -11785307655089.455 and parameters: {'C': 982.1734196434815, 'epsilon': 0.9110624952205126, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  84%|████████▍ | 84/100 [01:33<00:17,  1.07s/it, 93.87/7200 seconds]

[I 2025-11-11 12:56:42,928] Trial 83 finished with value: -12948452105212.285 and parameters: {'C': 753.3299401513814, 'epsilon': 0.9557222760584719, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  85%|████████▌ | 85/100 [01:34<00:15,  1.00s/it, 94.73/7200 seconds]

[I 2025-11-11 12:56:43,788] Trial 84 finished with value: -23947571859251.523 and parameters: {'C': 0.10099259021585616, 'epsilon': 0.9011502762452713, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  86%|████████▌ | 86/100 [01:35<00:13,  1.03it/s, 95.61/7200 seconds]

[I 2025-11-11 12:56:44,667] Trial 85 finished with value: -15126662240171.03 and parameters: {'C': 482.31511642369685, 'epsilon': 0.8678174362898281, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 54. Best value: -1.17347e+13:  87%|████████▋ | 87/100 [01:36<00:12,  1.07it/s, 96.48/7200 seconds]

[I 2025-11-11 12:56:45,541] Trial 86 finished with value: -12890308324318.398 and parameters: {'C': 764.4684704061847, 'epsilon': 0.8185915742644548, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 54 with value: -11734718135491.361.


Best trial: 87. Best value: -1.17341e+13:  88%|████████▊ | 88/100 [01:37<00:11,  1.09it/s, 97.34/7200 seconds]

[I 2025-11-11 12:56:46,407] Trial 87 finished with value: -11734133847887.545 and parameters: {'C': 998.9389503962157, 'epsilon': 0.9616645292151559, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  89%|████████▉ | 89/100 [01:38<00:10,  1.09it/s, 98.27/7200 seconds]

[I 2025-11-11 12:56:47,331] Trial 88 finished with value: -23703289420099.945 and parameters: {'C': 561.1823945973701, 'epsilon': 0.9307917846190389, 'gamma': 'scale', 'kernel': 'poly', 'degree': 4}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  90%|█████████ | 90/100 [01:39<00:08,  1.12it/s, 99.10/7200 seconds]

[I 2025-11-11 12:56:48,161] Trial 89 finished with value: -16911112293032.95 and parameters: {'C': 343.2562318595352, 'epsilon': 0.963185016360374, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  91%|█████████ | 91/100 [01:39<00:07,  1.13it/s, 99.98/7200 seconds]

[I 2025-11-11 12:56:49,039] Trial 90 finished with value: -23594404537475.34 and parameters: {'C': 15.573682517469077, 'epsilon': 0.7823934562046206, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  92%|█████████▏| 92/100 [01:40<00:07,  1.12it/s, 100.87/7200 seconds]

[I 2025-11-11 12:56:49,933] Trial 91 finished with value: -11735035521279.879 and parameters: {'C': 997.8902343847948, 'epsilon': 0.9005726141933517, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  93%|█████████▎| 93/100 [01:41<00:06,  1.14it/s, 101.73/7200 seconds]

[I 2025-11-11 12:56:50,788] Trial 92 finished with value: -12769208079420.701 and parameters: {'C': 782.2667173725516, 'epsilon': 0.8790983497956852, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  94%|█████████▍| 94/100 [01:42<00:05,  1.13it/s, 102.62/7200 seconds]

[I 2025-11-11 12:56:51,680] Trial 93 finished with value: -14370595442229.215 and parameters: {'C': 561.3750202758252, 'epsilon': 0.8369693900744054, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  95%|█████████▌| 95/100 [01:43<00:04,  1.08it/s, 103.64/7200 seconds]

[I 2025-11-11 12:56:52,699] Trial 94 finished with value: -16079612532337.957 and parameters: {'C': 404.7417413805582, 'epsilon': 0.9332780009749062, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  96%|█████████▌| 96/100 [01:44<00:03,  1.07it/s, 104.60/7200 seconds]

[I 2025-11-11 12:56:53,661] Trial 95 finished with value: -11746976990788.15 and parameters: {'C': 994.8097560853881, 'epsilon': 0.9875655092386769, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  97%|█████████▋| 97/100 [01:45<00:02,  1.08it/s, 105.50/7200 seconds]

[I 2025-11-11 12:56:54,562] Trial 96 finished with value: -12693743219930.895 and parameters: {'C': 792.3147714857564, 'epsilon': 0.9837570811200905, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  98%|█████████▊| 98/100 [01:46<00:02,  1.04s/it, 106.79/7200 seconds]

[I 2025-11-11 12:56:55,856] Trial 97 finished with value: -23346400497254.477 and parameters: {'C': 642.4832196180124, 'epsilon': 0.9980102488063735, 'gamma': 'scale', 'kernel': 'rbf'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13:  99%|█████████▉| 99/100 [01:47<00:01,  1.05s/it, 107.86/7200 seconds]

[I 2025-11-11 12:56:56,925] Trial 98 finished with value: -18464906570278.965 and parameters: {'C': 244.81671144775592, 'epsilon': 0.9704091832180466, 'gamma': 'scale', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.


Best trial: 87. Best value: -1.17341e+13: 100%|██████████| 100/100 [01:48<00:00,  1.09s/it, 108.86/7200 seconds]


[I 2025-11-11 12:56:57,918] Trial 99 finished with value: -23796188561852.707 and parameters: {'C': 6.66854593360925, 'epsilon': 0.9288872205180897, 'gamma': 'auto', 'kernel': 'linear'}. Best is trial 87 with value: -11734133847887.545.

=== SVR Optuna Results (Optimized for Validation) ===
Completed 100 out of 100 trials
Best trial:
  Trial number: 87
  Value (neg_MSE): -11734133847887.545
  Validation RMSE: $3,425,512.20
  Best params:
    C: 998.9389503962157
    epsilon: 0.9616645292151559
    gamma: scale
    kernel: linear

Training final SVR model with best parameters...

=== Model Performance Across All Sets ===
Training Set:
  RMSE: $3,807,574.61
  MAE: $2,112,581.10
  R²: 0.4451

Validation Set:
  RMSE: $3,425,512.20
  MAE: $2,132,129.29
  R²: 0.4882

Test Set (UNSEEN DATA):
  RMSE: $3,742,553.33
  MAE: $2,203,187.33
  R²: 0.4631

=== Overfitting Analysis ===
✅ Model generalizes well - training and validation performance similar
✅ Validation set is good proxy for test perform

In [66]:
import pandas as pd

print("=== Creating Consolidated DataFrame ===")

# Load all model predictions
print("Loading model prediction files...")
lgbm_df = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/lgb_predictions_regularized.csv')
xgb_df = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/xgb_predictions_regularized.csv')
cb_df = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/catboost_predictions_regularized.csv')
rf_df = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/rf_predictions_regularized.csv')
svr_df = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/svr_predictions.csv')

# ✅ Filter RF to test data only
rf_df = rf_df[rf_df['dataset'] == 'test'].reset_index(drop=True)
print(f"✅ RF filtered to test set only: {len(rf_df)} rows")

print(f"✅ All files loaded. Each contains {len(lgbm_df)} test samples")

# Start with features and actual prices from any model (they're identical)
# Drop the model-specific prediction columns and error columns
consolidated_df = lgbm_df.drop(['lgb_predicted_price', 'prediction_error', 'absolute_error'], axis=1).copy()

# Add all model predictions with corrected column names
consolidated_df['lgbm_predicted'] = lgbm_df['lgb_predicted_price']
consolidated_df['xgb_predicted'] = xgb_df['xgb_predicted_price'] 
consolidated_df['catboost_predicted'] = cb_df['catboost_predicted_price']
consolidated_df['rf_predicted'] = rf_df['rf_predicted_price']
consolidated_df['svr_predicted'] = svr_df['svr_predicted_price']

# Save consolidated results
consolidated_df.to_csv('/Users/clarencemarvin/Downloads/regularized/consolidated_predictions.csv', index=False)

print(f"\n✅ Consolidated dataframe created and saved!")
print(f"📁 File: /Users/clarencemarvin/Downloads/regularized/consolidated_predictions.csv")
print(f"\n📊 Dataframe info:")
print(f"  Shape: {consolidated_df.shape}")
print(f"  Columns: {list(consolidated_df.columns)}")
print(f"\n📋 Sample data:")
print(consolidated_df[['actual_price', 'lgbm_predicted', 'xgb_predicted', 'catboost_predicted', 'rf_predicted', 'svr_predicted']].head())

=== Creating Consolidated DataFrame ===
Loading model prediction files...
✅ RF filtered to test set only: 1232 rows
✅ All files loaded. Each contains 1232 test samples

✅ Consolidated dataframe created and saved!
📁 File: /Users/clarencemarvin/Downloads/regularized/consolidated_predictions.csv

📊 Dataframe info:
  Shape: (1232, 16)
  Columns: ['district', 'bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary', 'actual_price', 'lgbm_predicted', 'xgb_predicted', 'catboost_predicted', 'rf_predicted', 'svr_predicted']

📋 Sample data:
   actual_price  lgbm_predicted  xgb_predicted  catboost_predicted  \
0       8700000    8.559522e+06      8258734.5        8.913412e+06   
1      23000000    2.050051e+07     21432890.0        1.995195e+07   
2       7700000    6.030042e+06      6002828.5        6.171043e+06   
3       5000000

In [70]:
import pandas as pd
import numpy as np

print("=== Simple Average Ensemble ===")

# Load the consolidated dataframe
consolidated_df = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/consolidated_predictions.csv')

print(f"Loaded consolidated data: {consolidated_df.shape}")

# Create simple average of all 5 models
consolidated_df['ensemble_simple_avg'] = (
    consolidated_df['lgbm_predicted'] + 
    consolidated_df['xgb_predicted'] + 
    consolidated_df['catboost_predicted'] + 
    consolidated_df['rf_predicted'] + 
    consolidated_df['svr_predicted']
) / 5

# Calculate prediction error
consolidated_df['ensemble_error'] = consolidated_df['actual_price'] - consolidated_df['ensemble_simple_avg']

# Calculate RMSE
ensemble_rmse = np.sqrt(np.mean(consolidated_df['ensemble_error']**2))

print(f"\n🎯 Simple Average Ensemble Results:")
print(f"  RMSE: ${ensemble_rmse:,.2f}")

# For comparison, show individual model RMSEs
print(f"\n📊 Individual Model RMSEs (for comparison):")
individual_models = ['lgbm_predicted', 'xgb_predicted', 'catboost_predicted', 'rf_predicted', 'svr_predicted']

for model in individual_models:
    error = consolidated_df['actual_price'] - consolidated_df[model]
    rmse = np.sqrt(np.mean(error**2))
    model_name = model.replace('_predicted', '').upper()
    print(f"  {model_name}: ${rmse:,.2f}")

print(f"\n📈 Improvement Analysis:")
best_individual = min([np.sqrt(np.mean((consolidated_df['actual_price'] - consolidated_df[model])**2)) for model in individual_models])
print(f"  Best individual model RMSE: ${best_individual:,.2f}")
print(f"  Ensemble RMSE: ${ensemble_rmse:,.2f}")
improvement = best_individual - ensemble_rmse
print(f"  Improvement: ${improvement:,.2f} ({improvement/best_individual*100:.2f}%)")

# Show sample predictions
print(f"\n🔍 Sample Ensemble Predictions:")
sample_cols = ['actual_price', 'xgb_predicted', 'ensemble_simple_avg', 'ensemble_error']
print(consolidated_df[sample_cols].head(10))

=== Simple Average Ensemble ===
Loaded consolidated data: (1232, 16)

🎯 Simple Average Ensemble Results:
  RMSE: $2,019,293.46

📊 Individual Model RMSEs (for comparison):
  LGBM: $1,984,085.78
  XGB: $2,006,796.98
  CATBOOST: $2,016,677.70
  RF: $2,030,651.75
  SVR: $3,745,631.54

📈 Improvement Analysis:
  Best individual model RMSE: $1,984,085.78
  Ensemble RMSE: $2,019,293.46
  Improvement: $-35,207.68 (-1.77%)

🔍 Sample Ensemble Predictions:
   actual_price  xgb_predicted  ensemble_simple_avg  ensemble_error
0       8700000      8258734.5         8.604547e+06    9.545264e+04
1      23000000     21432890.0         1.823278e+07    4.767224e+06
2       7700000      6002828.5         6.155932e+06    1.544068e+06
3       5000000      4592087.0         4.696774e+06    3.032262e+05
4       3800000      5305176.0         5.092641e+06   -1.292641e+06
5       9900000     11165261.0         1.130909e+07   -1.409086e+06
6      23000000     18796276.0         1.773522e+07    5.264783e+06
7      

In [71]:
import numpy as np
import pandas as pd

df = consolidated_df.copy()

# Ensure numeric
pred_cols = [
    'lgbm_predicted',
    'xgb_predicted',
    'rf_predicted',
    'svr_predicted',
    'ensemble_simple_avg'
]

df = df[['actual_price'] + pred_cols].apply(pd.to_numeric, errors='coerce')

# Optionally drop rows with missing targets or predictions
df = df.dropna(subset=['actual_price'] + pred_cols)

y = df['actual_price'].to_numpy(dtype=float)
y_mean = y.mean()

def r2(y, yhat):
    ss_res = np.sum((y - yhat) ** 2)
    ss_tot = np.sum((y - y_mean) ** 2)
    return 1 - ss_res / ss_tot if ss_tot != 0 else np.nan

def mape(y, yhat):
    # Filter out zero targets to avoid division by zero
    mask = y != 0
    if not np.any(mask):
        return np.nan
    return np.mean(np.abs((y[mask] - yhat[mask]) / y[mask])) * 100

def mdape(y, yhat):
    mask = y != 0
    if not np.any(mask):
        return np.nan
    return np.median(np.abs((y[mask] - yhat[mask]) / y[mask])) * 100

results = []
for col in pred_cols:
    yhat = df[col].to_numpy(dtype=float)
    results.append({
        'model': col,
        'R2': r2(y, yhat),
        'MAPE_%': mape(y, yhat),
        'MdAPE_%': mdape(y, yhat)
    })

metrics_df = pd.DataFrame(results).sort_values('R2', ascending=False).reset_index(drop=True)
print(metrics_df)

                 model        R2     MAPE_%    MdAPE_%
0       lgbm_predicted  0.849107  12.141942   8.438442
1        xgb_predicted  0.845633  12.316936   8.573524
2  ensemble_simple_avg  0.843705  12.898161   9.089698
3         rf_predicted  0.841942  13.869890   9.346203
4        svr_predicted  0.462230  23.103064  17.219843


In [3]:
import pandas as pd
import numpy as np

print("=== Simple Average Ensemble (4 Models - Excluding SVR) ===")

# Load the consolidated dataframe
consolidated_df = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/consolidated_predictions.csv')

print(f"Loaded consolidated data: {consolidated_df.shape}")

# Create simple average of 4 models (excluding SVR)
consolidated_df['ensemble_avg_4models'] = (
    consolidated_df['lgbm_predicted'] + 
    consolidated_df['xgb_predicted'] + 
    consolidated_df['catboost_predicted'] + 
    consolidated_df['rf_predicted']
) / 4

# Calculate prediction error
consolidated_df['ensemble_error'] = consolidated_df['actual_price'] - consolidated_df['ensemble_avg_4models']

# Calculate RMSE
ensemble_rmse = np.sqrt(np.mean(consolidated_df['ensemble_error']**2))

print(f"\n🎯 4-Model Average Ensemble Results:")
print(f"  RMSE: ${ensemble_rmse:,.2f}")

# For comparison, show individual model RMSEs (excluding SVR)
print(f"\n📊 Individual Model RMSEs (for comparison):")
individual_models = ['lgbm_predicted', 'xgb_predicted', 'catboost_predicted', 'rf_predicted']

for model in individual_models:
    error = consolidated_df['actual_price'] - consolidated_df[model]
    rmse = np.sqrt(np.mean(error**2))
    model_name = model.replace('_predicted', '').upper()
    print(f"  {model_name}: ${rmse:,.2f}")

print(f"\n📈 Improvement Analysis:")
best_individual = min([np.sqrt(np.mean((consolidated_df['actual_price'] - consolidated_df[model])**2)) for model in individual_models])
print(f"  Best individual model RMSE: ${best_individual:,.2f}")
print(f"  4-Model Ensemble RMSE: ${ensemble_rmse:,.2f}")
improvement = best_individual - ensemble_rmse
print(f"  Improvement: ${improvement:,.2f} ({improvement/best_individual*100:.2f}%)")

# Show sample predictions
print(f"\n🔍 Sample Ensemble Predictions:")
sample_cols = ['actual_price', 'xgb_predicted', 'ensemble_avg_4models', 'ensemble_error']
print(consolidated_df[sample_cols].head(10))

=== Simple Average Ensemble (4 Models - Excluding SVR) ===
Loaded consolidated data: (1232, 17)

🎯 4-Model Average Ensemble Results:
  RMSE: $1,962,236.31

📊 Individual Model RMSEs (for comparison):
  LGBM: $2,025,296.28
  XGB: $2,010,380.24
  CATBOOST: $2,016,677.70
  RF: $2,060,934.25

📈 Improvement Analysis:
  Best individual model RMSE: $2,010,380.24
  4-Model Ensemble RMSE: $1,962,236.31
  Improvement: $48,143.92 (2.39%)

🔍 Sample Ensemble Predictions:
   actual_price  xgb_predicted  ensemble_avg_4models  ensemble_error
0       8700000      8597520.0          8.513770e+06    1.862300e+05
1      23000000     20328726.0          1.978787e+07    3.212131e+06
2       7700000      6100254.5          6.097540e+06    1.602460e+06
3       5000000      4430923.5          4.425412e+06    5.745879e+05
4       3800000      5217821.5          4.877022e+06   -1.077022e+06
5       9900000     11267624.0          1.147948e+07   -1.579478e+06
6      23000000     17939796.0          1.886848e+07   

In [26]:
import pandas as pd

# Load all the "all_data" CSV files from each model
lgb_all = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/lgb_predictions_all_data.csv')
xgb_all = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/xgb_predictions_all_data.csv')
catboost_all = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/catboost_predictions_all_data.csv')
rf_all = pd.read_csv('/Users/clarencemarvin/Downloads/regularized/rf_predictions_regularized.csv')

# Start with LightGBM as the base (has all features + actual_price + dataset)
all_consolidated_df = lgb_all[['district', 'bedroom_count', 'property_age', 'saleable_area',
       'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m',
       'category_Education_within_2000m', 'category_Medical_within_2000m',
       'category_Public_Market_within_1000m', 'pet_policy_binary', 'dataset',
       'actual_price', 'lgb_predicted_price', 'prediction_error',
       'absolute_error']].copy()

# Add prediction columns from each model
all_consolidated_df['lgbm_predicted'] = lgb_all['lgb_predicted_price']
all_consolidated_df['xgb_predicted'] = xgb_all['xgb_predicted_price']
all_consolidated_df['catboost_predicted'] = catboost_all['catboost_predicted_price']
all_consolidated_df['rf_predicted'] = rf_all['rf_predicted_price']

# Save the consolidated file
all_consolidated_df.to_csv('/Users/clarencemarvin/Downloads/regularized/all_consolidated_predictions.csv', index=False)

print(f"✅ Created consolidated DataFrame with {len(all_consolidated_df):,} rows")
print(f"Columns: {list(all_consolidated_df.columns)}")
print(f"\nDataset breakdown:")
print(all_consolidated_df['dataset'].value_counts())

✅ Created consolidated DataFrame with 6,156 rows
Columns: ['district', 'bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary', 'dataset', 'actual_price', 'lgb_predicted_price', 'prediction_error', 'absolute_error', 'lgbm_predicted', 'xgb_predicted', 'catboost_predicted', 'rf_predicted']

Dataset breakdown:
dataset
train         3693
test          1232
validation    1231
Name: count, dtype: int64


In [28]:
all_consolidated_df.head()

Unnamed: 0,district,bedroom_count,property_age,saleable_area,travel_time_to_cbd,walking_time_to_mtr,total_poi_within_1000m,category_Education_within_2000m,category_Medical_within_2000m,category_Public_Market_within_1000m,pet_policy_binary,dataset,actual_price,lgb_predicted_price,prediction_error,absolute_error,lgbm_predicted,xgb_predicted,catboost_predicted,rf_predicted
0,Ho Man Tin,3,36,978,26,16,838,484,64,3,1,train,12500000,12371050.0,128949.1,128949.1,12371050.0,12540288.0,14222190.0,12491920.0
1,Happy Valley,3,40,750,21,9,797,164,27,5,1,train,13000000,13282400.0,-282398.9,282398.9,13282400.0,12856448.0,12395820.0,12847520.0
2,Tai Wai,1,37,282,45,18,391,115,6,10,1,train,3300000,3801891.0,-501891.4,501891.4,3801891.0,3411503.0,4082155.0,3662088.0
3,Sai Ying Pun,3,48,737,14,9,760,145,25,7,1,train,10680000,10654840.0,25164.39,25164.39,10654840.0,10797545.0,11854820.0,11252190.0
4,Tsing Yi,2,39,381,41,19,484,112,5,6,1,train,2900000,4127934.0,-1227934.0,1227934.0,4127934.0,4054065.0,4219507.0,3968541.0


In [31]:
df = all_consolidated_df.copy()

df['lgbm_error'] = abs(df['lgbm_predicted'] - df['actual_price'])
df['xgb_error'] = abs(df['xgb_predicted'] - df['actual_price'])
df['catboost_error'] = abs(df['catboost_predicted'] - df['actual_price'])
df['rf_error'] = abs(df['rf_predicted'] - df['actual_price'])

error_columns = ['lgbm_error', 'xgb_error', 'catboost_error', 'rf_error']
best_model_idx = df[error_columns].idxmin(axis=1)

model_mapping = {
    'lgbm_error': 'LGBM',
    'xgb_error': 'XGB',
    'catboost_error': 'CatBoost',
    'rf_error': 'RF'
}
df['best_model'] = best_model_idx.map(model_mapping)

df['dataset'].value_counts()

dataset
train         3693
test          1232
validation    1231
Name: count, dtype: int64

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# ============================================================================
# STEP 1: Define Feature and Target Columns
# ============================================================================

print("="*80)
print("STEP 1: Defining Features and Target")
print("="*80)

# Define which columns to use for meta-learning (only original features - NO LEAKAGE!)
FEATURE_COLS = ['district', 'bedroom_count', 'property_age', 'saleable_area',
       'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m',
       'category_Education_within_2000m', 'category_Medical_within_2000m',
       'category_Public_Market_within_1000m', 'pet_policy_binary']

# Target columns
PRICE_TARGET = 'actual_price'  # For base learners (regression)
META_TARGET = 'best_model'  # For meta-classifiers (which model is best)

print(f"Feature columns: {FEATURE_COLS}")
print(f"Price target: {PRICE_TARGET}")
print(f"Meta target: {META_TARGET}")

# ============================================================================
# STEP 2: Extract Features and Target from your dataframe
# ============================================================================

print("\n" + "="*80)
print("STEP 2: Extracting Data")
print("="*80)

# Assuming your dataframe is called 'df'
df = all_consolidated_df.copy()
X = df[FEATURE_COLS].copy()
y_price = df[PRICE_TARGET].copy()
y_meta = df[META_TARGET].copy() if META_TARGET in df.columns else None

print(f"\nOriginal data shape: {X.shape}")
print(f"Target (price) shape: {y_price.shape}")
if y_meta is not None:
    print(f"Target (best_model) shape: {y_meta.shape}")
    print(f"Best model distribution:\n{y_meta.value_counts()}")

# Check data types
print(f"\nFeature data types BEFORE conversion:")
print(X.dtypes)

# CRITICAL: Convert object columns to category dtype for tree models
print("\nConverting object columns to category...")
for col in X.columns:
    if X[col].dtype == 'object':
        X[col] = X[col].astype('category')
        print(f"  Converted {col}: object → category")

print(f"\nFeature data types AFTER conversion:")
print(X.dtypes)

# ============================================================================
# STEP 3: Split Data into Train (64%), Validation (16%), Test (20%)
# ============================================================================

print("\n" + "="*80)
print("STEP 3: Splitting Data")
print("="*80)

# First split: 80% train+val, 20% test
X_temp, X_test, y_price_temp, y_test_price = train_test_split(
    X, y_price, test_size=0.20, random_state=42
)

if y_meta is not None:
    _, y_test_meta = train_test_split(
        y_meta, test_size=0.20, random_state=42
    )

# Second split: 80% train, 20% val (of the 80%)
X_train, X_val, y_train_price, y_val_price = train_test_split(
    X_temp, y_price_temp, test_size=0.20, random_state=42
)

if y_meta is not None:
    y_meta_temp = y_meta.iloc[X_temp.index]
    _, y_val_meta = train_test_split(
        y_meta_temp, test_size=0.20, random_state=42
    )

print(f"\nTrain set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")

# Rename for clarity
y_train = y_train_price
y_val = y_val_price
y_test = y_test_price

# ============================================================================
# STEP 4: Identify Categorical Columns
# ============================================================================

print("\n" + "="*80)
print("STEP 4: Identifying Categorical Columns")
print("="*80)

# Identify categorical columns (now they should all be 'category' dtype)
cat_columns = X_train.select_dtypes(include=['category']).columns.tolist()
print(f"\nCategorical columns detected: {cat_columns}")

# Store for later use
CAT_FEATURES = cat_columns.copy()

# ============================================================================
# STEP 5: Create Dataset Versions for Each Algorithm
# ============================================================================

print("\n" + "="*80)
print("STEP 5: Creating Algorithm-Specific Datasets")
print("="*80)

# ----------------------------------------------------------------------------
# 5.1 For LightGBM - Keep original (handles categorical natively)
# ----------------------------------------------------------------------------
print("\n[1/4] LightGBM datasets - keeping original with categorical")
# X_train, X_val, X_test already defined above - no changes needed
print(f"  Train: {X_train.shape}")
print(f"  Val: {X_val.shape}")
print(f"  Test: {X_test.shape}")
print(f"  District dtype: {X_train['district'].dtype}")

# ----------------------------------------------------------------------------
# 5.2 For XGBoost - Keep original (already category dtype)
# ----------------------------------------------------------------------------
print("\n[2/4] XGBoost datasets - keeping original with categorical dtype")
X_train_xgb = X_train.copy()
X_val_xgb = X_val.copy()
X_test_xgb = X_test.copy()

print(f"  Train: {X_train_xgb.shape}, district dtype: {X_train_xgb['district'].dtype}")
print(f"  Val: {X_val_xgb.shape}")
print(f"  Test: {X_test_xgb.shape}")
print(f"  ⚠️  Remember to use: enable_categorical=True, tree_method='hist'")

# ----------------------------------------------------------------------------
# 5.3 For CatBoost - Keep original (handles categorical natively)
# ----------------------------------------------------------------------------
print("\n[3/4] CatBoost datasets - keeping original with categorical")
X_train_cb = X_train.copy()
X_val_cb = X_val.copy()
X_test_cb = X_test.copy()

print(f"  Train: {X_train_cb.shape}")
print(f"  Val: {X_val_cb.shape}")
print(f"  Test: {X_test_cb.shape}")
print(f"  Categorical features: {CAT_FEATURES}")

# ----------------------------------------------------------------------------
# 5.4 For Random Forest - Encode categorical columns
# ----------------------------------------------------------------------------
print("\n[4/4] Random Forest datasets - encoding categorical")
X_train_rf = X_train.copy()
X_val_rf = X_val.copy()
X_test_rf = X_test.copy()

# Encode each categorical column
label_encoders = {}
for col in cat_columns:
    if col in X_train_rf.columns:
        le = LabelEncoder()
        
        # Fit on all unique values from train, val, and test
        all_vals = pd.concat([
            X_train_rf[col].astype(str), 
            X_val_rf[col].astype(str), 
            X_test_rf[col].astype(str)
        ]).unique()
        le.fit(all_vals)
        
        # Transform
        X_train_rf[col] = le.transform(X_train_rf[col].astype(str))
        X_val_rf[col] = le.transform(X_val_rf[col].astype(str))
        X_test_rf[col] = le.transform(X_test_rf[col].astype(str))
        
        # Store encoder
        label_encoders[col] = le

print(f"  Train: {X_train_rf.shape}")
print(f"  Val: {X_val_rf.shape}")
print(f"  Test: {X_test_rf.shape}")
print(f"  Encoded columns: {list(label_encoders.keys())}")

# ============================================================================
# SUMMARY
# ============================================================================

print("\n" + "="*80)
print("SUMMARY - All Datasets Ready!")
print("="*80)

print("\n📊 Dataset Splits:")
print(f"  Train: {len(X_train):,} samples ({len(X_train)/len(X)*100:.1f}%)")
print(f"  Val:   {len(X_val):,} samples ({len(X_val)/len(X)*100:.1f}%)")
print(f"  Test:  {len(X_test):,} samples ({len(X_test)/len(X)*100:.1f}%)")

print("\n🎯 Target Variables:")
print(f"  y_train: {y_train.shape} (price)")
print(f"  y_val: {y_val.shape} (price)")
print(f"  y_test: {y_test.shape} (price)")
if y_meta is not None:
    print(f"  y_val_meta: {y_val_meta.shape} (best model labels)")
    print(f"  y_test_meta: {y_test_meta.shape} (best model labels)")

print("\n🤖 Algorithm-Specific Datasets:")
print("  ✓ LightGBM: X_train, X_val, X_test (category dtype)")
print("  ✓ XGBoost:  X_train_xgb, X_val_xgb, X_test_xgb (category dtype)")
print("             Use: enable_categorical=True, tree_method='hist'")
print("  ✓ CatBoost: X_train_cb, X_val_cb, X_test_cb (category dtype)")
print("  ✓ Random Forest: X_train_rf, X_val_rf, X_test_rf (label encoded)")

print("\n📝 Categorical Features:")
print(f"  CAT_FEATURES = {CAT_FEATURES}")

print("\n✅ Data splitting complete! Ready for model training.")
print("="*80)

#keknya boleh hapus

STEP 1: Defining Features and Target
Feature columns: ['district', 'bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary']
Price target: actual_price
Meta target: best_model

STEP 2: Extracting Data

Original data shape: (1232, 11)
Target (price) shape: (1232,)
Target (best_model) shape: (1232,)
Best model distribution:
best_model
RF          333
CatBoost    314
XGB         310
LGBM        275
Name: count, dtype: int64

Feature data types BEFORE conversion:
district                               object
bedroom_count                           int64
property_age                            int64
saleable_area                           int64
travel_time_to_cbd                      int64
walking_time_to_mtr                     int64
total_poi_within_1000m                  int64
category_Education_with

In [38]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import optuna
from sklearn.metrics import accuracy_score, classification_report
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
import pickle

FEATURE_COLS = ['district', 'bedroom_count', 'property_age', 'saleable_area',
                'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m',
                'category_Education_within_2000m', 'category_Medical_within_2000m',
                'category_Public_Market_within_1000m', 'pet_policy_binary']
TARGET_COL = 'best_model'

X = df[FEATURE_COLS].copy()
y = df[TARGET_COL].copy()
dataset_split = df['dataset'].copy()

X['district'] = X['district'].astype('category')

X_train = X[dataset_split == 'train'].copy()
X_val = X[dataset_split == 'validation'].copy()
X_test = X[dataset_split == 'test'].copy()

y_train = y[dataset_split == 'train'].copy()
y_val = y[dataset_split == 'validation'].copy()
y_test = y[dataset_split == 'test'].copy()

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

CAT_FEATURES = ['district']

# ============================================================================
# Create algorithm-specific datasets
# ============================================================================

X_train_lgbm = X_train.copy()
X_val_lgbm = X_val.copy()
X_test_lgbm = X_test.copy()

X_train_xgb = X_train.copy()
X_val_xgb = X_val.copy()
X_test_xgb = X_test.copy()
X_train_xgb['district'] = X_train_xgb['district'].cat.codes
X_val_xgb['district'] = X_val_xgb['district'].cat.codes
X_test_xgb['district'] = X_test_xgb['district'].cat.codes

X_train_cb = X_train.copy()
X_val_cb = X_val.copy()
X_test_cb = X_test.copy()

X_train_rf = X_train.copy()
X_val_rf = X_val.copy()
X_test_rf = X_test.copy()

le_district = LabelEncoder()
all_districts = pd.concat([X_train_rf['district'], X_val_rf['district'], X_test_rf['district']]).astype(str)
le_district.fit(all_districts)

X_train_rf['district'] = le_district.transform(X_train_rf['district'].astype(str))
X_val_rf['district'] = le_district.transform(X_val_rf['district'].astype(str))
X_test_rf['district'] = le_district.transform(X_test_rf['district'].astype(str))

# ============================================================================
# STEP 2: Train Meta-Classifiers
# ============================================================================

print("\n" + "="*80)
print("STEP 2: Training Meta-Classifiers")
print("="*80)

meta_predictions_test = {}

# ----------------------------------------------------------------------------
# 2.1 LightGBM Meta-Classifier
# ----------------------------------------------------------------------------
print("\n[1/4] Training LightGBM meta-classifier...")

def optimize_lgbm_meta(trial):
    params = {
        'num_leaves': trial.suggest_int('num_leaves', 31, 150),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 15, 35),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.7, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 300),
        'learning_rate': trial.suggest_float('learning_rate', 0.05, 0.2),
        'max_depth': trial.suggest_int('max_depth', 5, 7),
        'reg_alpha': trial.suggest_float('reg_alpha', 2.5, 5.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 2.5, 5.0),
        'objective': 'multiclass',
        'num_class': len(label_encoder.classes_),
        'random_state': 42,
        'verbosity': -1
    }
    model = lgb.LGBMClassifier(**params)
    model.fit(X_train_lgbm, y_train_encoded)
    y_pred = model.predict(X_val_lgbm)
    return accuracy_score(y_val_encoded, y_pred)

study_lgbm_meta = optuna.create_study(direction='maximize')
study_lgbm_meta.optimize(optimize_lgbm_meta, n_trials=50, show_progress_bar=True)
print(f"Best LGBM Meta Validation Accuracy: {study_lgbm_meta.best_value:.4f}")

best_lgbm_meta = lgb.LGBMClassifier(**study_lgbm_meta.best_params)
best_lgbm_meta.fit(X_train_lgbm, y_train_encoded)
meta_predictions_test['LGBM_predict'] = label_encoder.inverse_transform(best_lgbm_meta.predict(X_test_lgbm))

with open('/Users/clarencemarvin/Downloads/regularized/lgbm_meta_model.pkl', 'wb') as f:
    pickle.dump(best_lgbm_meta, f)

# ----------------------------------------------------------------------------
# 2.2 XGBoost Meta-Classifier
# ----------------------------------------------------------------------------
print("\n[2/4] Training XGBoost meta-classifier...")

def optimize_xgb_meta(trial):
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 0.85),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 200),
        'reg_alpha': trial.suggest_float('reg_alpha', 2.5, 5.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 2.5, 5.0),
        'random_state': 42,
        'n_jobs': -1
    }
    model = xgb.XGBClassifier(**params)
    model.fit(X_train_xgb, y_train_encoded)
    y_pred = model.predict(X_val_xgb)
    return accuracy_score(y_val_encoded, y_pred)

study_xgb_meta = optuna.create_study(direction='maximize')
study_xgb_meta.optimize(optimize_xgb_meta, n_trials=50, show_progress_bar=True)
print(f"Best XGB Meta Validation Accuracy: {study_xgb_meta.best_value:.4f}")

best_xgb_meta = xgb.XGBClassifier(**study_xgb_meta.best_params)
best_xgb_meta.fit(X_train_xgb, y_train_encoded)
meta_predictions_test['XGB_predict'] = label_encoder.inverse_transform(best_xgb_meta.predict(X_test_xgb))

with open('/Users/clarencemarvin/Downloads/regularized/xgb_meta_model.pkl', 'wb') as f:
    pickle.dump(best_xgb_meta, f)

# ----------------------------------------------------------------------------
# 2.3 CatBoost Meta-Classifier
# ----------------------------------------------------------------------------
print("\n[3/4] Training CatBoost meta-classifier...")

def optimize_cb_meta(trial):
    params = {
        'depth': trial.suggest_int('depth', 4, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.03, 0.15),
        'iterations': trial.suggest_int('iterations', 100, 300),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 10.0),
        'border_count': trial.suggest_int('border_count', 32, 128),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_float('random_strength', 0.0, 2.0),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 20, 100),
        'loss_function': 'MultiClass',
        'random_state': 42,
        'verbose': False,
        'cat_features': CAT_FEATURES
    }
    model = CatBoostClassifier(**params)
    model.fit(X_train_cb, y_train_encoded, cat_features=CAT_FEATURES, verbose=False)
    y_pred = model.predict(X_val_cb)
    return accuracy_score(y_val_encoded, y_pred.flatten())

study_cb_meta = optuna.create_study(direction='maximize')
study_cb_meta.optimize(optimize_cb_meta, n_trials=50, show_progress_bar=True)
print(f"Best CatBoost Meta Validation Accuracy: {study_cb_meta.best_value:.4f}")

best_cb_meta = CatBoostClassifier(**study_cb_meta.best_params)
best_cb_meta.fit(X_train_cb, y_train_encoded, cat_features=CAT_FEATURES, verbose=False)
meta_predictions_test['CB_predict'] = label_encoder.inverse_transform(best_cb_meta.predict(X_test_cb).flatten())

with open('/Users/clarencemarvin/Downloads/regularized/catboost_meta_model.pkl', 'wb') as f:
    pickle.dump(best_cb_meta, f)

# ----------------------------------------------------------------------------
# 2.4 Random Forest Meta-Classifier
# ----------------------------------------------------------------------------
print("\n[4/4] Training Random Forest meta-classifier...")

def optimize_rf_meta(trial):
    max_depth_choice = trial.suggest_categorical('max_depth_type', ['int', 'none'])
    max_depth = trial.suggest_int('max_depth', 3, 12) if max_depth_choice == 'int' else None
    
    max_leaf_nodes_choice = trial.suggest_categorical('max_leaf_nodes_type', ['int', 'none'])
    max_leaf_nodes = trial.suggest_int('max_leaf_nodes', 10, 100) if max_leaf_nodes_choice == 'int' else None
    
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'max_features': trial.suggest_categorical('max_features', [0.3, 0.5, 0.7]),
        'max_depth': max_depth,
        'min_samples_split': trial.suggest_int('min_samples_split', 10, 50),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 5, 20),
        'max_leaf_nodes': max_leaf_nodes,
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'random_state': 42,
        'n_jobs': -1
    }
    model = RandomForestClassifier(**params)
    model.fit(X_train_rf, y_train_encoded)
    y_pred = model.predict(X_val_rf)
    return accuracy_score(y_val_encoded, y_pred)

study_rf_meta = optuna.create_study(direction='maximize')
study_rf_meta.optimize(optimize_rf_meta, n_trials=50, show_progress_bar=True)
print(f"Best RF Meta Validation Accuracy: {study_rf_meta.best_value:.4f}")

rf_meta_params = {k: v for k, v in study_rf_meta.best_params.items() 
                  if k not in ['max_depth_type', 'max_leaf_nodes_type']}
rf_meta_params['random_state'] = 42
rf_meta_params['n_jobs'] = -1

best_rf_meta = RandomForestClassifier(**rf_meta_params)
best_rf_meta.fit(X_train_rf, y_train_encoded)
meta_predictions_test['RF_predict'] = label_encoder.inverse_transform(best_rf_meta.predict(X_test_rf))

with open('/Users/clarencemarvin/Downloads/regularized/rf_meta_model.pkl', 'wb') as f:
    pickle.dump(best_rf_meta, f)

# Save label encoder
with open('/Users/clarencemarvin/Downloads/regularized/label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

# ============================================================================
# STEP 3: Evaluate and Save Results
# ============================================================================

print("\n" + "="*80)
print("STEP 3: Evaluation and Results")
print("="*80)

# Create output dataframe for ALL data (train, validation, test)
output_df = X.copy()
output_df['dataset'] = dataset_split

# Get predictions for each split
meta_predictions_all = {}

# Train predictions
meta_predictions_all['LGBM_train'] = label_encoder.inverse_transform(best_lgbm_meta.predict(X_train_lgbm))
meta_predictions_all['XGB_train'] = label_encoder.inverse_transform(best_xgb_meta.predict(X_train_xgb))
meta_predictions_all['CB_train'] = label_encoder.inverse_transform(best_cb_meta.predict(X_train_cb).flatten())
meta_predictions_all['RF_train'] = label_encoder.inverse_transform(best_rf_meta.predict(X_train_rf))

# Validation predictions
meta_predictions_all['LGBM_val'] = label_encoder.inverse_transform(best_lgbm_meta.predict(X_val_lgbm))
meta_predictions_all['XGB_val'] = label_encoder.inverse_transform(best_xgb_meta.predict(X_val_xgb))
meta_predictions_all['CB_val'] = label_encoder.inverse_transform(best_cb_meta.predict(X_val_cb).flatten())
meta_predictions_all['RF_val'] = label_encoder.inverse_transform(best_rf_meta.predict(X_val_rf))

# Test predictions
meta_predictions_all['LGBM_test'] = label_encoder.inverse_transform(best_lgbm_meta.predict(X_test_lgbm))
meta_predictions_all['XGB_test'] = label_encoder.inverse_transform(best_xgb_meta.predict(X_test_xgb))
meta_predictions_all['CB_test'] = label_encoder.inverse_transform(best_cb_meta.predict(X_test_cb).flatten())
meta_predictions_all['RF_test'] = label_encoder.inverse_transform(best_rf_meta.predict(X_test_rf))

# Combine all predictions back into output_df
output_df['LGBM_predict'] = np.nan
output_df['XGB_predict'] = np.nan
output_df['CB_predict'] = np.nan
output_df['RF_predict'] = np.nan

output_df.loc[dataset_split == 'train', 'LGBM_predict'] = meta_predictions_all['LGBM_train']
output_df.loc[dataset_split == 'train', 'XGB_predict'] = meta_predictions_all['XGB_train']
output_df.loc[dataset_split == 'train', 'CB_predict'] = meta_predictions_all['CB_train']
output_df.loc[dataset_split == 'train', 'RF_predict'] = meta_predictions_all['RF_train']

output_df.loc[dataset_split == 'validation', 'LGBM_predict'] = meta_predictions_all['LGBM_val']
output_df.loc[dataset_split == 'validation', 'XGB_predict'] = meta_predictions_all['XGB_val']
output_df.loc[dataset_split == 'validation', 'CB_predict'] = meta_predictions_all['CB_val']
output_df.loc[dataset_split == 'validation', 'RF_predict'] = meta_predictions_all['RF_val']

output_df.loc[dataset_split == 'test', 'LGBM_predict'] = meta_predictions_all['LGBM_test']
output_df.loc[dataset_split == 'test', 'XGB_predict'] = meta_predictions_all['XGB_test']
output_df.loc[dataset_split == 'test', 'CB_predict'] = meta_predictions_all['CB_test']
output_df.loc[dataset_split == 'test', 'RF_predict'] = meta_predictions_all['RF_test']

# Save
save_path = '/Users/clarencemarvin/Downloads/regularized/meta_classifier_predictions.csv'
output_df.to_csv(save_path, index=False)

print(f"Output shape: {output_df.shape}")
print(f"\nDataset distribution:\n{output_df['dataset'].value_counts()}")

print(f"\n✅ Saved to '{save_path}'")
print(f"\nOutput shape: {output_df.shape}")
print(f"Columns: {list(output_df.columns)}")

print("\nFirst 10 rows:")
print(output_df.head(10))

# Calculate accuracies
print("\n" + "="*80)
print("META-CLASSIFIER ACCURACIES")
print("="*80)

print("\n📊 TRUE Best Model Distribution on Test Set:")
print(y_test.value_counts())

accuracies = {}
accuracies['LGBM_meta'] = accuracy_score(y_test, meta_predictions_test['LGBM_predict'])
accuracies['XGB_meta'] = accuracy_score(y_test, meta_predictions_test['XGB_predict'])
accuracies['CB_meta'] = accuracy_score(y_test, meta_predictions_test['CB_predict'])
accuracies['RF_meta'] = accuracy_score(y_test, meta_predictions_test['RF_predict'])

print("\n🎯 Validation Set Performance:")
print(f"  LGBM Meta:     {study_lgbm_meta.best_value:.4f} ({study_lgbm_meta.best_value*100:.2f}%)")
print(f"  XGB Meta:      {study_xgb_meta.best_value:.4f} ({study_xgb_meta.best_value*100:.2f}%)")
print(f"  CatBoost Meta: {study_cb_meta.best_value:.4f} ({study_cb_meta.best_value*100:.2f}%)")
print(f"  RF Meta:       {study_rf_meta.best_value:.4f} ({study_rf_meta.best_value*100:.2f}%)")

print("\n🎯 Test Set Performance:")
for meta_name, acc in accuracies.items():
    print(f"  {meta_name:15s}: {acc:.4f} ({acc*100:.2f}%)")

best_meta = max(accuracies, key=accuracies.get)
print(f"\n🏆 Best Meta-Classifier: {best_meta} with {accuracies[best_meta]*100:.2f}% test accuracy")

# Detailed classification reports
print("\n" + "="*80)
print("DETAILED CLASSIFICATION REPORTS")
print("="*80)

for name, predictions in [('LGBM', meta_predictions_test['LGBM_predict']),
                          ('XGB', meta_predictions_test['XGB_predict']),
                          ('CatBoost', meta_predictions_test['CB_predict']),
                          ('RF', meta_predictions_test['RF_predict'])]:
    print(f"\n{name} Meta-Classifier:")
    print(classification_report(y_test, predictions))

print("\n✅ All done!")
print("="*80)

[I 2025-11-12 14:18:30,392] A new study created in memory with name: no-name-d042bc7d-c9d2-4153-9956-15b39c95b588


Train: (3693, 11), Val: (1231, 11), Test: (1232, 11)

STEP 2: Training Meta-Classifiers

[1/4] Training LightGBM meta-classifier...


Best trial: 0. Best value: 0.330626:   2%|▏         | 1/50 [00:08<06:34,  8.06s/it]

[I 2025-11-12 14:18:38,465] Trial 0 finished with value: 0.330625507717303 and parameters: {'num_leaves': 56, 'min_data_in_leaf': 30, 'feature_fraction': 0.8008147416462852, 'n_estimators': 285, 'learning_rate': 0.16645755596285394, 'max_depth': 6, 'reg_alpha': 2.6305441857389473, 'reg_lambda': 4.872331305781033}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:   4%|▍         | 2/50 [00:13<05:08,  6.43s/it]

[I 2025-11-12 14:18:43,755] Trial 1 finished with value: 0.3062550771730301 and parameters: {'num_leaves': 119, 'min_data_in_leaf': 21, 'feature_fraction': 0.7400213382565642, 'n_estimators': 252, 'learning_rate': 0.07546742571110274, 'max_depth': 5, 'reg_alpha': 2.7051349850355058, 'reg_lambda': 3.647065338749031}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:   6%|▌         | 3/50 [00:17<04:18,  5.51s/it]

[I 2025-11-12 14:18:48,168] Trial 2 finished with value: 0.31275385865150285 and parameters: {'num_leaves': 55, 'min_data_in_leaf': 34, 'feature_fraction': 0.8911915558383796, 'n_estimators': 143, 'learning_rate': 0.15840571914386026, 'max_depth': 6, 'reg_alpha': 3.574828424591349, 'reg_lambda': 3.8716655630123338}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:   8%|▊         | 4/50 [00:19<03:12,  4.19s/it]

[I 2025-11-12 14:18:50,346] Trial 3 finished with value: 0.30381803411860275 and parameters: {'num_leaves': 46, 'min_data_in_leaf': 18, 'feature_fraction': 0.835749461172459, 'n_estimators': 168, 'learning_rate': 0.1471723519500811, 'max_depth': 5, 'reg_alpha': 4.802671028360844, 'reg_lambda': 4.267520212056934}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:  10%|█         | 5/50 [00:23<02:53,  3.86s/it]

[I 2025-11-12 14:18:53,605] Trial 4 finished with value: 0.31112916328188467 and parameters: {'num_leaves': 85, 'min_data_in_leaf': 23, 'feature_fraction': 0.7119783969006882, 'n_estimators': 146, 'learning_rate': 0.06557073993739383, 'max_depth': 6, 'reg_alpha': 2.9801145834267952, 'reg_lambda': 4.113424580374723}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:  12%|█▏        | 6/50 [00:26<02:43,  3.72s/it]

[I 2025-11-12 14:18:57,072] Trial 5 finished with value: 0.31600324939073926 and parameters: {'num_leaves': 47, 'min_data_in_leaf': 32, 'feature_fraction': 0.8372028201492548, 'n_estimators': 294, 'learning_rate': 0.11022680494473491, 'max_depth': 5, 'reg_alpha': 4.323536693627129, 'reg_lambda': 3.6162193210200737}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:  14%|█▍        | 7/50 [00:30<02:38,  3.68s/it]

[I 2025-11-12 14:19:00,647] Trial 6 finished with value: 0.29975629569455725 and parameters: {'num_leaves': 62, 'min_data_in_leaf': 28, 'feature_fraction': 0.9071182241821123, 'n_estimators': 122, 'learning_rate': 0.05780051816111279, 'max_depth': 6, 'reg_alpha': 2.5946553522651388, 'reg_lambda': 3.46621120287661}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:  16%|█▌        | 8/50 [00:35<02:54,  4.14s/it]

[I 2025-11-12 14:19:05,780] Trial 7 finished with value: 0.30787977254264826 and parameters: {'num_leaves': 118, 'min_data_in_leaf': 33, 'feature_fraction': 0.7398051919420732, 'n_estimators': 289, 'learning_rate': 0.11419099100191692, 'max_depth': 6, 'reg_alpha': 4.386697090148052, 'reg_lambda': 4.72265764634659}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:  18%|█▊        | 9/50 [00:41<03:18,  4.83s/it]

[I 2025-11-12 14:19:12,134] Trial 8 finished with value: 0.31600324939073926 and parameters: {'num_leaves': 43, 'min_data_in_leaf': 35, 'feature_fraction': 0.8133718788438199, 'n_estimators': 242, 'learning_rate': 0.09764602324377486, 'max_depth': 7, 'reg_alpha': 4.787176919784511, 'reg_lambda': 4.081805325575327}. Best is trial 0 with value: 0.330625507717303.


Best trial: 0. Best value: 0.330626:  20%|██        | 10/50 [00:46<03:16,  4.91s/it]

[I 2025-11-12 14:19:17,233] Trial 9 finished with value: 0.3151909017059301 and parameters: {'num_leaves': 51, 'min_data_in_leaf': 19, 'feature_fraction': 0.7348546563224796, 'n_estimators': 198, 'learning_rate': 0.09558727298850865, 'max_depth': 7, 'reg_alpha': 4.434137807150335, 'reg_lambda': 3.887195516760513}. Best is trial 0 with value: 0.330625507717303.


Best trial: 10. Best value: 0.340374:  22%|██▏       | 11/50 [00:51<03:05,  4.77s/it]

[I 2025-11-12 14:19:21,669] Trial 10 finished with value: 0.34037367993501216 and parameters: {'num_leaves': 146, 'min_data_in_leaf': 27, 'feature_fraction': 0.9857649387128598, 'n_estimators': 240, 'learning_rate': 0.19980805526470108, 'max_depth': 7, 'reg_alpha': 3.4008790232572967, 'reg_lambda': 2.688340001486651}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  24%|██▍       | 12/50 [00:56<03:04,  4.86s/it]

[I 2025-11-12 14:19:26,748] Trial 11 finished with value: 0.3233143785540211 and parameters: {'num_leaves': 150, 'min_data_in_leaf': 27, 'feature_fraction': 0.9995207903404002, 'n_estimators': 242, 'learning_rate': 0.19850197631797453, 'max_depth': 7, 'reg_alpha': 3.3532458013275503, 'reg_lambda': 2.6432953348035184}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  26%|██▌       | 13/50 [01:01<03:05,  5.01s/it]

[I 2025-11-12 14:19:32,097] Trial 12 finished with value: 0.3314378554021121 and parameters: {'num_leaves': 81, 'min_data_in_leaf': 28, 'feature_fraction': 0.9996192099063185, 'n_estimators': 267, 'learning_rate': 0.19923378013937743, 'max_depth': 7, 'reg_alpha': 3.0729732638553355, 'reg_lambda': 2.763694037905116}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  28%|██▊       | 14/50 [01:07<03:12,  5.35s/it]

[I 2025-11-12 14:19:38,248] Trial 13 finished with value: 0.3314378554021121 and parameters: {'num_leaves': 88, 'min_data_in_leaf': 25, 'feature_fraction': 0.9960386060831362, 'n_estimators': 213, 'learning_rate': 0.19865707849961284, 'max_depth': 7, 'reg_alpha': 3.1674596849534944, 'reg_lambda': 2.5061333479987384}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  30%|███       | 15/50 [01:12<02:57,  5.07s/it]

[I 2025-11-12 14:19:42,665] Trial 14 finished with value: 0.3322502030869212 and parameters: {'num_leaves': 112, 'min_data_in_leaf': 15, 'feature_fraction': 0.9432230750399658, 'n_estimators': 211, 'learning_rate': 0.17694523893256886, 'max_depth': 7, 'reg_alpha': 3.9100051388639825, 'reg_lambda': 2.947145418168499}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  32%|███▏      | 16/50 [01:17<02:58,  5.26s/it]

[I 2025-11-12 14:19:48,364] Trial 15 finished with value: 0.3298131600324939 and parameters: {'num_leaves': 150, 'min_data_in_leaf': 17, 'feature_fraction': 0.9386330702043071, 'n_estimators': 212, 'learning_rate': 0.17460648100232135, 'max_depth': 7, 'reg_alpha': 3.960893247799682, 'reg_lambda': 3.10488462916662}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  34%|███▍      | 17/50 [01:24<03:06,  5.64s/it]

[I 2025-11-12 14:19:54,894] Trial 16 finished with value: 0.3273761169780666 and parameters: {'num_leaves': 124, 'min_data_in_leaf': 15, 'feature_fraction': 0.947762943078415, 'n_estimators': 182, 'learning_rate': 0.1392595590703227, 'max_depth': 7, 'reg_alpha': 3.9732554481300433, 'reg_lambda': 3.111363882925613}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  36%|███▌      | 18/50 [01:30<03:05,  5.80s/it]

[I 2025-11-12 14:20:01,076] Trial 17 finished with value: 0.3363119415109667 and parameters: {'num_leaves': 134, 'min_data_in_leaf': 23, 'feature_fraction': 0.9520764066817899, 'n_estimators': 226, 'learning_rate': 0.18040431585180555, 'max_depth': 7, 'reg_alpha': 3.6056363219903926, 'reg_lambda': 2.957483207609436}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  38%|███▊      | 19/50 [01:36<02:58,  5.75s/it]

[I 2025-11-12 14:20:06,684] Trial 18 finished with value: 0.338748984565394 and parameters: {'num_leaves': 135, 'min_data_in_leaf': 24, 'feature_fraction': 0.8795129673362245, 'n_estimators': 227, 'learning_rate': 0.1828979365387171, 'max_depth': 6, 'reg_alpha': 3.554842017323478, 'reg_lambda': 3.357804354100649}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  40%|████      | 20/50 [01:41<02:49,  5.64s/it]

[I 2025-11-12 14:20:12,067] Trial 19 finished with value: 0.3322502030869212 and parameters: {'num_leaves': 135, 'min_data_in_leaf': 25, 'feature_fraction': 0.8841009985492929, 'n_estimators': 263, 'learning_rate': 0.13939560549370456, 'max_depth': 6, 'reg_alpha': 3.3739197041732703, 'reg_lambda': 3.3416519480657234}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  42%|████▏     | 21/50 [01:46<02:36,  5.40s/it]

[I 2025-11-12 14:20:16,916] Trial 20 finished with value: 0.31844029244516653 and parameters: {'num_leaves': 106, 'min_data_in_leaf': 30, 'feature_fraction': 0.7929442286595471, 'n_estimators': 179, 'learning_rate': 0.18516644924168663, 'max_depth': 6, 'reg_alpha': 3.7789585108241988, 'reg_lambda': 3.276173964436129}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  44%|████▍     | 22/50 [01:51<02:24,  5.17s/it]

[I 2025-11-12 14:20:21,548] Trial 21 finished with value: 0.3322502030869212 and parameters: {'num_leaves': 137, 'min_data_in_leaf': 22, 'feature_fraction': 0.9621100756002058, 'n_estimators': 225, 'learning_rate': 0.18509646545346922, 'max_depth': 6, 'reg_alpha': 3.53961132488512, 'reg_lambda': 2.8326899537158114}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  46%|████▌     | 23/50 [01:58<02:34,  5.71s/it]

[I 2025-11-12 14:20:28,527] Trial 22 finished with value: 0.3338748984565394 and parameters: {'num_leaves': 134, 'min_data_in_leaf': 24, 'feature_fraction': 0.9162488183857284, 'n_estimators': 231, 'learning_rate': 0.16138101941140592, 'max_depth': 7, 'reg_alpha': 3.4921963491303547, 'reg_lambda': 2.9303521773509438}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  48%|████▊     | 24/50 [02:02<02:21,  5.45s/it]

[I 2025-11-12 14:20:33,350] Trial 23 finished with value: 0.3290008123476848 and parameters: {'num_leaves': 101, 'min_data_in_leaf': 20, 'feature_fraction': 0.9706880070397964, 'n_estimators': 265, 'learning_rate': 0.18448493609068342, 'max_depth': 6, 'reg_alpha': 3.7308007412998, 'reg_lambda': 3.135231775942466}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  50%|█████     | 25/50 [02:09<02:27,  5.89s/it]

[I 2025-11-12 14:20:40,258] Trial 24 finished with value: 0.3330625507717303 and parameters: {'num_leaves': 141, 'min_data_in_leaf': 27, 'feature_fraction': 0.8665138524058992, 'n_estimators': 194, 'learning_rate': 0.15062918255440755, 'max_depth': 7, 'reg_alpha': 2.878396032373372, 'reg_lambda': 2.5110361314927716}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  52%|█████▏    | 26/50 [02:15<02:19,  5.82s/it]

[I 2025-11-12 14:20:45,914] Trial 25 finished with value: 0.314378554021121 and parameters: {'num_leaves': 128, 'min_data_in_leaf': 26, 'feature_fraction': 0.9228909497856342, 'n_estimators': 225, 'learning_rate': 0.1722098946294409, 'max_depth': 5, 'reg_alpha': 4.164075202487943, 'reg_lambda': 3.3995372267402675}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  54%|█████▍    | 27/50 [02:21<02:15,  5.87s/it]

[I 2025-11-12 14:20:51,921] Trial 26 finished with value: 0.3290008123476848 and parameters: {'num_leaves': 145, 'min_data_in_leaf': 23, 'feature_fraction': 0.9724936336508399, 'n_estimators': 237, 'learning_rate': 0.1881112864374637, 'max_depth': 7, 'reg_alpha': 3.250483131662356, 'reg_lambda': 2.714062925985858}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  56%|█████▌    | 28/50 [02:27<02:07,  5.80s/it]

[I 2025-11-12 14:20:57,564] Trial 27 finished with value: 0.3314378554021121 and parameters: {'num_leaves': 131, 'min_data_in_leaf': 30, 'feature_fraction': 0.8689177315579064, 'n_estimators': 270, 'learning_rate': 0.15561832454605198, 'max_depth': 6, 'reg_alpha': 3.7252207960437724, 'reg_lambda': 3.04557958290053}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  58%|█████▊    | 29/50 [02:33<02:08,  6.10s/it]

[I 2025-11-12 14:21:04,369] Trial 28 finished with value: 0.330625507717303 and parameters: {'num_leaves': 73, 'min_data_in_leaf': 21, 'feature_fraction': 0.9786122623341075, 'n_estimators': 250, 'learning_rate': 0.135997041679882, 'max_depth': 7, 'reg_alpha': 4.116645816984014, 'reg_lambda': 3.2138702336314826}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  60%|██████    | 30/50 [02:41<02:12,  6.62s/it]

[I 2025-11-12 14:21:12,188] Trial 29 finished with value: 0.32656376929325753 and parameters: {'num_leaves': 34, 'min_data_in_leaf': 24, 'feature_fraction': 0.9326650388102208, 'n_estimators': 278, 'learning_rate': 0.16866836990545864, 'max_depth': 6, 'reg_alpha': 3.338651705733282, 'reg_lambda': 4.998272093899951}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  62%|██████▏   | 31/50 [02:50<02:14,  7.10s/it]

[I 2025-11-12 14:21:20,418] Trial 30 finished with value: 0.3290008123476848 and parameters: {'num_leaves': 104, 'min_data_in_leaf': 29, 'feature_fraction': 0.8991851069714875, 'n_estimators': 219, 'learning_rate': 0.12757867890886135, 'max_depth': 7, 'reg_alpha': 2.7812226271259717, 'reg_lambda': 3.5092583577196814}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  64%|██████▍   | 32/50 [02:56<02:02,  6.81s/it]

[I 2025-11-12 14:21:26,532] Trial 31 finished with value: 0.3346872461413485 and parameters: {'num_leaves': 140, 'min_data_in_leaf': 24, 'feature_fraction': 0.9196424066558501, 'n_estimators': 233, 'learning_rate': 0.16147304649366545, 'max_depth': 7, 'reg_alpha': 3.58682867645963, 'reg_lambda': 2.9130081765848845}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  66%|██████▌   | 33/50 [03:01<01:46,  6.26s/it]

[I 2025-11-12 14:21:31,507] Trial 32 finished with value: 0.330625507717303 and parameters: {'num_leaves': 122, 'min_data_in_leaf': 22, 'feature_fraction': 0.9555098606857052, 'n_estimators': 252, 'learning_rate': 0.19087417308546198, 'max_depth': 7, 'reg_alpha': 3.7013139191407824, 'reg_lambda': 2.9289555548338058}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  68%|██████▊   | 34/50 [03:08<01:43,  6.49s/it]

[I 2025-11-12 14:21:38,540] Trial 33 finished with value: 0.3298131600324939 and parameters: {'num_leaves': 145, 'min_data_in_leaf': 26, 'feature_fraction': 0.8778397306614922, 'n_estimators': 203, 'learning_rate': 0.16569905633197626, 'max_depth': 7, 'reg_alpha': 3.5170476874996583, 'reg_lambda': 2.6776698842989237}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  70%|███████   | 35/50 [03:14<01:37,  6.53s/it]

[I 2025-11-12 14:21:45,165] Trial 34 finished with value: 0.3314378554021121 and parameters: {'num_leaves': 139, 'min_data_in_leaf': 24, 'feature_fraction': 0.9210426388697269, 'n_estimators': 231, 'learning_rate': 0.17798703411741512, 'max_depth': 6, 'reg_alpha': 3.0910443534180434, 'reg_lambda': 2.8407854122057827}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  72%|███████▏  | 36/50 [03:19<01:22,  5.92s/it]

[I 2025-11-12 14:21:49,657] Trial 35 finished with value: 0.3200649878147847 and parameters: {'num_leaves': 128, 'min_data_in_leaf': 21, 'feature_fraction': 0.8528799680469014, 'n_estimators': 251, 'learning_rate': 0.1627681246881318, 'max_depth': 5, 'reg_alpha': 3.6060454505377364, 'reg_lambda': 2.9746523556019833}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  74%|███████▍  | 37/50 [03:24<01:15,  5.83s/it]

[I 2025-11-12 14:21:55,290] Trial 36 finished with value: 0.3290008123476848 and parameters: {'num_leaves': 116, 'min_data_in_leaf': 26, 'feature_fraction': 0.9004660415885463, 'n_estimators': 160, 'learning_rate': 0.1909184166596547, 'max_depth': 7, 'reg_alpha': 3.4083614472430606, 'reg_lambda': 3.25071425487474}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  76%|███████▌  | 38/50 [03:32<01:14,  6.22s/it]

[I 2025-11-12 14:22:02,414] Trial 37 finished with value: 0.32168968318440294 and parameters: {'num_leaves': 143, 'min_data_in_leaf': 23, 'feature_fraction': 0.982443233452291, 'n_estimators': 188, 'learning_rate': 0.17923032051392346, 'max_depth': 6, 'reg_alpha': 3.2361493660591756, 'reg_lambda': 3.7024572087772256}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  78%|███████▊  | 39/50 [03:36<01:03,  5.77s/it]

[I 2025-11-12 14:22:07,134] Trial 38 finished with value: 0.3233143785540211 and parameters: {'num_leaves': 112, 'min_data_in_leaf': 22, 'feature_fraction': 0.7741903009301329, 'n_estimators': 114, 'learning_rate': 0.15069803716002883, 'max_depth': 7, 'reg_alpha': 3.8203783284505004, 'reg_lambda': 2.5974588073840588}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  80%|████████  | 40/50 [03:40<00:52,  5.21s/it]

[I 2025-11-12 14:22:11,022] Trial 39 finished with value: 0.3192526401299756 and parameters: {'num_leaves': 126, 'min_data_in_leaf': 20, 'feature_fraction': 0.8302262361388137, 'n_estimators': 204, 'learning_rate': 0.19278946330721597, 'max_depth': 5, 'reg_alpha': 3.621276066876092, 'reg_lambda': 3.563560560340491}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  82%|████████▏ | 41/50 [03:47<00:50,  5.56s/it]

[I 2025-11-12 14:22:17,412] Trial 40 finished with value: 0.3249390739236393 and parameters: {'num_leaves': 132, 'min_data_in_leaf': 28, 'feature_fraction': 0.9321671911181686, 'n_estimators': 239, 'learning_rate': 0.16863773857990969, 'max_depth': 6, 'reg_alpha': 3.010266075693135, 'reg_lambda': 4.5375503743781085}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  84%|████████▍ | 42/50 [03:52<00:44,  5.60s/it]

[I 2025-11-12 14:22:23,088] Trial 41 finished with value: 0.3314378554021121 and parameters: {'num_leaves': 96, 'min_data_in_leaf': 24, 'feature_fraction': 0.9125008818049642, 'n_estimators': 226, 'learning_rate': 0.1590297861165652, 'max_depth': 7, 'reg_alpha': 3.4250013462282847, 'reg_lambda': 2.8500206395813104}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  86%|████████▌ | 43/50 [03:58<00:39,  5.59s/it]

[I 2025-11-12 14:22:28,656] Trial 42 finished with value: 0.3346872461413485 and parameters: {'num_leaves': 136, 'min_data_in_leaf': 24, 'feature_fraction': 0.9141755225008065, 'n_estimators': 235, 'learning_rate': 0.18115066008967318, 'max_depth': 7, 'reg_alpha': 3.5241079290020547, 'reg_lambda': 2.9922196205066007}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  88%|████████▊ | 44/50 [04:03<00:32,  5.42s/it]

[I 2025-11-12 14:22:33,702] Trial 43 finished with value: 0.32656376929325753 and parameters: {'num_leaves': 138, 'min_data_in_leaf': 32, 'feature_fraction': 0.8912038698017777, 'n_estimators': 245, 'learning_rate': 0.18186431263033617, 'max_depth': 7, 'reg_alpha': 3.869663684261903, 'reg_lambda': 2.758956716310911}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  90%|█████████ | 45/50 [04:09<00:28,  5.70s/it]

[I 2025-11-12 14:22:40,059] Trial 44 finished with value: 0.33549959382615757 and parameters: {'num_leaves': 150, 'min_data_in_leaf': 27, 'feature_fraction': 0.9563681316007903, 'n_estimators': 256, 'learning_rate': 0.19244725345737307, 'max_depth': 7, 'reg_alpha': 3.618969791668992, 'reg_lambda': 3.0319843682637724}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  92%|█████████▏| 46/50 [04:14<00:21,  5.47s/it]

[I 2025-11-12 14:22:44,988] Trial 45 finished with value: 0.3363119415109667 and parameters: {'num_leaves': 148, 'min_data_in_leaf': 27, 'feature_fraction': 0.9541668194093663, 'n_estimators': 257, 'learning_rate': 0.1954011579593078, 'max_depth': 7, 'reg_alpha': 3.640345958053307, 'reg_lambda': 3.39294483582567}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  94%|█████████▍| 47/50 [04:20<00:17,  5.70s/it]

[I 2025-11-12 14:22:51,204] Trial 46 finished with value: 0.3281884646628757 and parameters: {'num_leaves': 150, 'min_data_in_leaf': 27, 'feature_fraction': 0.9564052882050644, 'n_estimators': 283, 'learning_rate': 0.19504775383329173, 'max_depth': 7, 'reg_alpha': 3.24062962005981, 'reg_lambda': 3.788727545272231}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  96%|█████████▌| 48/50 [04:29<00:13,  6.56s/it]

[I 2025-11-12 14:22:59,790] Trial 47 finished with value: 0.32168968318440294 and parameters: {'num_leaves': 145, 'min_data_in_leaf': 31, 'feature_fraction': 0.9835584967003052, 'n_estimators': 258, 'learning_rate': 0.09587780364774645, 'max_depth': 7, 'reg_alpha': 4.106567505215125, 'reg_lambda': 3.3782039662491723}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374:  98%|█████████▊| 49/50 [04:40<00:07,  7.93s/it]

[I 2025-11-12 14:23:10,925] Trial 48 finished with value: 0.3281884646628757 and parameters: {'num_leaves': 148, 'min_data_in_leaf': 29, 'feature_fraction': 0.9607416665288881, 'n_estimators': 273, 'learning_rate': 0.07582725444407865, 'max_depth': 7, 'reg_alpha': 3.683277174439778, 'reg_lambda': 3.9815829427247036}. Best is trial 10 with value: 0.34037367993501216.


Best trial: 10. Best value: 0.340374: 100%|██████████| 50/50 [04:45<00:00,  5.71s/it]


[I 2025-11-12 14:23:15,676] Trial 49 finished with value: 0.3314378554021121 and parameters: {'num_leaves': 120, 'min_data_in_leaf': 26, 'feature_fraction': 0.9894549542849792, 'n_estimators': 292, 'learning_rate': 0.19279834859241035, 'max_depth': 6, 'reg_alpha': 4.011147887243342, 'reg_lambda': 3.1498806092531724}. Best is trial 10 with value: 0.34037367993501216.
Best LGBM Meta Validation Accuracy: 0.3404


[I 2025-11-12 14:23:20,869] A new study created in memory with name: no-name-9c2114f8-cdea-42b8-be92-7d6466836bb3



[2/4] Training XGBoost meta-classifier...


Best trial: 0. Best value: 0.265638:   2%|▏         | 1/50 [00:00<00:32,  1.51it/s]

[I 2025-11-12 14:23:21,528] Trial 0 finished with value: 0.26563769293257516 and parameters: {'max_depth': 3, 'min_child_weight': 9, 'subsample': 0.9057171036366516, 'colsample_bytree': 0.8418616328878853, 'learning_rate': 0.04042001837780375, 'n_estimators': 108, 'reg_alpha': 2.696048777550519, 'reg_lambda': 3.162125433978755}. Best is trial 0 with value: 0.26563769293257516.


Best trial: 1. Best value: 0.30788:   4%|▍         | 2/50 [00:01<00:46,  1.03it/s] 

[I 2025-11-12 14:23:22,721] Trial 1 finished with value: 0.30787977254264826 and parameters: {'max_depth': 4, 'min_child_weight': 8, 'subsample': 0.7808071090336862, 'colsample_bytree': 0.8376556645208164, 'learning_rate': 0.27241378380934106, 'n_estimators': 132, 'reg_alpha': 3.5041925134088427, 'reg_lambda': 3.8927603803156012}. Best is trial 1 with value: 0.30787977254264826.


Best trial: 2. Best value: 0.309504:   6%|▌         | 3/50 [00:04<01:13,  1.56s/it]

[I 2025-11-12 14:23:24,976] Trial 2 finished with value: 0.30950446791226643 and parameters: {'max_depth': 8, 'min_child_weight': 2, 'subsample': 0.8713441531715769, 'colsample_bytree': 0.761909797296496, 'learning_rate': 0.05468985955453346, 'n_estimators': 130, 'reg_alpha': 3.665780768423357, 'reg_lambda': 3.780480731158851}. Best is trial 2 with value: 0.30950446791226643.


Best trial: 2. Best value: 0.309504:   8%|▊         | 4/50 [00:05<01:08,  1.49s/it]

[I 2025-11-12 14:23:26,343] Trial 3 finished with value: 0.2932575142160845 and parameters: {'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.7043311017687839, 'colsample_bytree': 0.8330609206677518, 'learning_rate': 0.0857811568454027, 'n_estimators': 105, 'reg_alpha': 3.4469758021287142, 'reg_lambda': 2.519748001592053}. Best is trial 2 with value: 0.30950446791226643.


Best trial: 2. Best value: 0.309504:  10%|█         | 5/50 [00:06<01:01,  1.36s/it]

[I 2025-11-12 14:23:27,485] Trial 4 finished with value: 0.2835093419983753 and parameters: {'max_depth': 4, 'min_child_weight': 7, 'subsample': 0.7934223176051103, 'colsample_bytree': 0.8270336612786022, 'learning_rate': 0.10759311587702876, 'n_estimators': 141, 'reg_alpha': 4.692206736724399, 'reg_lambda': 3.5387296718599535}. Best is trial 2 with value: 0.30950446791226643.


Best trial: 2. Best value: 0.309504:  12%|█▏        | 6/50 [00:07<00:52,  1.19s/it]

[I 2025-11-12 14:23:28,348] Trial 5 finished with value: 0.2737611697806661 and parameters: {'max_depth': 3, 'min_child_weight': 2, 'subsample': 0.7187312815870869, 'colsample_bytree': 0.7504057512248705, 'learning_rate': 0.1113225611155598, 'n_estimators': 122, 'reg_alpha': 4.078535066563085, 'reg_lambda': 4.983852935360245}. Best is trial 2 with value: 0.30950446791226643.


Best trial: 6. Best value: 0.31844:  14%|█▍        | 7/50 [00:09<00:58,  1.36s/it] 

[I 2025-11-12 14:23:30,051] Trial 6 finished with value: 0.31844029244516653 and parameters: {'max_depth': 4, 'min_child_weight': 6, 'subsample': 0.8097986859496871, 'colsample_bytree': 0.8220801452430896, 'learning_rate': 0.28564773799693216, 'n_estimators': 196, 'reg_alpha': 4.766645474216892, 'reg_lambda': 4.388966631550614}. Best is trial 6 with value: 0.31844029244516653.


Best trial: 6. Best value: 0.31844:  16%|█▌        | 8/50 [00:11<01:11,  1.70s/it]

[I 2025-11-12 14:23:32,471] Trial 7 finished with value: 0.2851340373679935 and parameters: {'max_depth': 7, 'min_child_weight': 2, 'subsample': 0.9598161932551597, 'colsample_bytree': 0.7935125160844613, 'learning_rate': 0.031985122996459274, 'n_estimators': 162, 'reg_alpha': 3.637065165126267, 'reg_lambda': 4.858431313332913}. Best is trial 6 with value: 0.31844029244516653.


Best trial: 8. Best value: 0.320065:  18%|█▊        | 9/50 [00:13<01:13,  1.79s/it]

[I 2025-11-12 14:23:34,460] Trial 8 finished with value: 0.3200649878147847 and parameters: {'max_depth': 7, 'min_child_weight': 10, 'subsample': 0.9793972223432262, 'colsample_bytree': 0.7333831619086273, 'learning_rate': 0.20018728658204957, 'n_estimators': 132, 'reg_alpha': 2.9856093174892555, 'reg_lambda': 3.783899794932984}. Best is trial 8 with value: 0.3200649878147847.


Best trial: 9. Best value: 0.3355:  20%|██        | 10/50 [00:15<01:12,  1.80s/it] 

[I 2025-11-12 14:23:36,291] Trial 9 finished with value: 0.33549959382615757 and parameters: {'max_depth': 5, 'min_child_weight': 10, 'subsample': 0.82508194079198, 'colsample_bytree': 0.8406360925923333, 'learning_rate': 0.27355993203261436, 'n_estimators': 172, 'reg_alpha': 3.0674923742298827, 'reg_lambda': 4.664704684693109}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  22%|██▏       | 11/50 [00:17<01:11,  1.83s/it]

[I 2025-11-12 14:23:38,172] Trial 10 finished with value: 0.3233143785540211 and parameters: {'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.9072764539566044, 'colsample_bytree': 0.7017330513868207, 'learning_rate': 0.19970537770240726, 'n_estimators': 168, 'reg_alpha': 3.0393934873950625, 'reg_lambda': 4.509068087429984}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  24%|██▍       | 12/50 [00:19<01:08,  1.81s/it]

[I 2025-11-12 14:23:39,945] Trial 11 finished with value: 0.3249390739236393 and parameters: {'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.9093491525359634, 'colsample_bytree': 0.7037642335131787, 'learning_rate': 0.20764778095290357, 'n_estimators': 169, 'reg_alpha': 3.0025833207236996, 'reg_lambda': 4.4307665897115625}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  26%|██▌       | 13/50 [00:21<01:08,  1.86s/it]

[I 2025-11-12 14:23:41,916] Trial 12 finished with value: 0.330625507717303 and parameters: {'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.8451066972683952, 'colsample_bytree': 0.7951419831395443, 'learning_rate': 0.22921907830466257, 'n_estimators': 186, 'reg_alpha': 2.534518811836654, 'reg_lambda': 4.349721607599167}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  28%|██▊       | 14/50 [00:23<01:13,  2.03s/it]

[I 2025-11-12 14:23:44,340] Trial 13 finished with value: 0.3322502030869212 and parameters: {'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.8386155646943567, 'colsample_bytree': 0.7949786077953752, 'learning_rate': 0.24467170978788597, 'n_estimators': 190, 'reg_alpha': 2.5880312835962203, 'reg_lambda': 4.1432552381500685}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  30%|███       | 15/50 [00:25<01:15,  2.16s/it]

[I 2025-11-12 14:23:46,792] Trial 14 finished with value: 0.3314378554021121 and parameters: {'max_depth': 6, 'min_child_weight': 6, 'subsample': 0.7577356824948058, 'colsample_bytree': 0.8031528410731076, 'learning_rate': 0.2504863430404588, 'n_estimators': 182, 'reg_alpha': 3.205866053190023, 'reg_lambda': 4.098218356761499}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  32%|███▏      | 16/50 [00:28<01:22,  2.41s/it]

[I 2025-11-12 14:23:49,806] Trial 15 finished with value: 0.3249390739236393 and parameters: {'max_depth': 7, 'min_child_weight': 10, 'subsample': 0.8431358987630538, 'colsample_bytree': 0.8101058477143647, 'learning_rate': 0.15980242810079426, 'n_estimators': 198, 'reg_alpha': 2.71771586781174, 'reg_lambda': 4.755213287225844}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  34%|███▍      | 17/50 [00:31<01:16,  2.33s/it]

[I 2025-11-12 14:23:51,926] Trial 16 finished with value: 0.3314378554021121 and parameters: {'max_depth': 6, 'min_child_weight': 8, 'subsample': 0.8238935550537726, 'colsample_bytree': 0.781074933906767, 'learning_rate': 0.2985018112295861, 'n_estimators': 158, 'reg_alpha': 4.05201896608026, 'reg_lambda': 3.2614548514777972}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  36%|███▌      | 18/50 [00:34<01:22,  2.57s/it]

[I 2025-11-12 14:23:55,052] Trial 17 finished with value: 0.3322502030869212 and parameters: {'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.7519507207313865, 'colsample_bytree': 0.8485457441391976, 'learning_rate': 0.1627857733144456, 'n_estimators': 179, 'reg_alpha': 2.804153845655515, 'reg_lambda': 4.099931572720733}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  38%|███▊      | 19/50 [00:36<01:14,  2.40s/it]

[I 2025-11-12 14:23:57,062] Trial 18 finished with value: 0.330625507717303 and parameters: {'max_depth': 6, 'min_child_weight': 7, 'subsample': 0.8805709592862636, 'colsample_bytree': 0.7733262541526787, 'learning_rate': 0.24181676936870877, 'n_estimators': 149, 'reg_alpha': 3.2751914425409754, 'reg_lambda': 4.18657249222413}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  40%|████      | 20/50 [00:38<01:10,  2.34s/it]

[I 2025-11-12 14:23:59,266] Trial 19 finished with value: 0.3281884646628757 and parameters: {'max_depth': 5, 'min_child_weight': 8, 'subsample': 0.9340594963921823, 'colsample_bytree': 0.8159890518389116, 'learning_rate': 0.2677014040797418, 'n_estimators': 189, 'reg_alpha': 4.010946193872901, 'reg_lambda': 4.717213211067722}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 9. Best value: 0.3355:  42%|████▏     | 21/50 [00:40<01:02,  2.16s/it]

[I 2025-11-12 14:24:01,017] Trial 20 finished with value: 0.32250203086921203 and parameters: {'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.8653878124129223, 'colsample_bytree': 0.73214875632082, 'learning_rate': 0.2246303076178895, 'n_estimators': 175, 'reg_alpha': 2.515556888407452, 'reg_lambda': 3.46569515402713}. Best is trial 9 with value: 0.33549959382615757.


Best trial: 21. Best value: 0.338749:  44%|████▍     | 22/50 [00:44<01:16,  2.73s/it]

[I 2025-11-12 14:24:05,072] Trial 21 finished with value: 0.338748984565394 and parameters: {'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.7515870431091676, 'colsample_bytree': 0.844010650894149, 'learning_rate': 0.16677391417756343, 'n_estimators': 178, 'reg_alpha': 2.7929648949703783, 'reg_lambda': 4.061907203479948}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  46%|████▌     | 23/50 [00:48<01:24,  3.12s/it]

[I 2025-11-12 14:24:09,107] Trial 22 finished with value: 0.3192526401299756 and parameters: {'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.7468864365146288, 'colsample_bytree': 0.8453875087053281, 'learning_rate': 0.1364256440803841, 'n_estimators': 192, 'reg_alpha': 2.8515041769816207, 'reg_lambda': 4.622210780506224}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  48%|████▊     | 24/50 [00:50<01:18,  3.01s/it]

[I 2025-11-12 14:24:11,844] Trial 23 finished with value: 0.3281884646628757 and parameters: {'max_depth': 7, 'min_child_weight': 3, 'subsample': 0.8212125761895849, 'colsample_bytree': 0.8228448485113237, 'learning_rate': 0.1779882399242848, 'n_estimators': 174, 'reg_alpha': 3.2209001147399396, 'reg_lambda': 3.9752408631104044}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  50%|█████     | 25/50 [00:53<01:08,  2.74s/it]

[I 2025-11-12 14:24:13,959] Trial 24 finished with value: 0.3257514216084484 and parameters: {'max_depth': 7, 'min_child_weight': 6, 'subsample': 0.7815051226647379, 'colsample_bytree': 0.7863631787442645, 'learning_rate': 0.2568926098926459, 'n_estimators': 155, 'reg_alpha': 2.55974287173591, 'reg_lambda': 4.246522320474492}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  52%|█████▏    | 26/50 [00:56<01:07,  2.80s/it]

[I 2025-11-12 14:24:16,902] Trial 25 finished with value: 0.3322502030869212 and parameters: {'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.7292299736352748, 'colsample_bytree': 0.8327855628502902, 'learning_rate': 0.1797661984481598, 'n_estimators': 167, 'reg_alpha': 2.8775969569666326, 'reg_lambda': 3.6056350881276176}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  54%|█████▍    | 27/50 [00:58<01:01,  2.69s/it]

[I 2025-11-12 14:24:19,336] Trial 26 finished with value: 0.3322502030869212 and parameters: {'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8040845645141167, 'colsample_bytree': 0.8088146563567408, 'learning_rate': 0.22324274306694106, 'n_estimators': 185, 'reg_alpha': 3.1103384868183888, 'reg_lambda': 4.552178609991231}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  56%|█████▌    | 28/50 [01:00<00:54,  2.50s/it]

[I 2025-11-12 14:24:21,383] Trial 27 finished with value: 0.3346872461413485 and parameters: {'max_depth': 5, 'min_child_weight': 7, 'subsample': 0.7740792766803324, 'colsample_bytree': 0.7982065872225543, 'learning_rate': 0.29833745310081267, 'n_estimators': 199, 'reg_alpha': 3.372744659691001, 'reg_lambda': 3.996359954945176}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  58%|█████▊    | 29/50 [01:02<00:50,  2.43s/it]

[I 2025-11-12 14:24:23,647] Trial 28 finished with value: 0.3192526401299756 and parameters: {'max_depth': 5, 'min_child_weight': 9, 'subsample': 0.7681773737021066, 'colsample_bytree': 0.769943128191812, 'learning_rate': 0.29912507077827666, 'n_estimators': 200, 'reg_alpha': 3.8427435472485327, 'reg_lambda': 3.116838903924978}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  60%|██████    | 30/50 [01:04<00:41,  2.07s/it]

[I 2025-11-12 14:24:24,887] Trial 29 finished with value: 0.29406986190089357 and parameters: {'max_depth': 3, 'min_child_weight': 9, 'subsample': 0.7336798697014124, 'colsample_bytree': 0.848026129564363, 'learning_rate': 0.28060399010329984, 'n_estimators': 177, 'reg_alpha': 3.402771662697665, 'reg_lambda': 2.8114049873407154}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  62%|██████▏   | 31/50 [01:05<00:34,  1.81s/it]

[I 2025-11-12 14:24:26,075] Trial 30 finished with value: 0.27619821283509344 and parameters: {'max_depth': 4, 'min_child_weight': 7, 'subsample': 0.7843935820105273, 'colsample_bytree': 0.8336076422151962, 'learning_rate': 0.0519752576905191, 'n_estimators': 149, 'reg_alpha': 3.3426863733869774, 'reg_lambda': 3.95747008969609}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  64%|██████▍   | 32/50 [01:07<00:33,  1.86s/it]

[I 2025-11-12 14:24:28,062] Trial 31 finished with value: 0.3257514216084484 and parameters: {'max_depth': 5, 'min_child_weight': 9, 'subsample': 0.8278931037298163, 'colsample_bytree': 0.8014990380327286, 'learning_rate': 0.2605891027169904, 'n_estimators': 193, 'reg_alpha': 2.6737511790728474, 'reg_lambda': 4.281263541441771}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  66%|██████▌   | 33/50 [01:09<00:33,  1.99s/it]

[I 2025-11-12 14:24:30,349] Trial 32 finished with value: 0.338748984565394 and parameters: {'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.7678917549766405, 'colsample_bytree': 0.817244382961059, 'learning_rate': 0.278520806169733, 'n_estimators': 189, 'reg_alpha': 2.710466656811918, 'reg_lambda': 4.031706472456979}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  68%|██████▊   | 34/50 [01:11<00:31,  2.00s/it]

[I 2025-11-12 14:24:32,370] Trial 33 finished with value: 0.3249390739236393 and parameters: {'max_depth': 5, 'min_child_weight': 7, 'subsample': 0.7611682232403939, 'colsample_bytree': 0.8402981787134755, 'learning_rate': 0.2855738800632268, 'n_estimators': 183, 'reg_alpha': 2.873318630154921, 'reg_lambda': 3.6901901386894402}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  70%|███████   | 35/50 [01:12<00:27,  1.83s/it]

[I 2025-11-12 14:24:33,823] Trial 34 finished with value: 0.3151909017059301 and parameters: {'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.7106885871541337, 'colsample_bytree': 0.814659666574127, 'learning_rate': 0.27009855419515033, 'n_estimators': 172, 'reg_alpha': 3.609813895465849, 'reg_lambda': 3.878031281236913}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  72%|███████▏  | 36/50 [01:15<00:27,  1.95s/it]

[I 2025-11-12 14:24:36,027] Trial 35 finished with value: 0.31600324939073926 and parameters: {'max_depth': 6, 'min_child_weight': 8, 'subsample': 0.7725715225459381, 'colsample_bytree': 0.8268357258324661, 'learning_rate': 0.1413652345205199, 'n_estimators': 181, 'reg_alpha': 4.436435177127352, 'reg_lambda': 3.398885591300632}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  74%|███████▍  | 37/50 [01:16<00:22,  1.76s/it]

[I 2025-11-12 14:24:37,345] Trial 36 finished with value: 0.2802599512591389 and parameters: {'max_depth': 3, 'min_child_weight': 6, 'subsample': 0.7943527240407817, 'colsample_bytree': 0.8341534742287822, 'learning_rate': 0.09632229577641904, 'n_estimators': 200, 'reg_alpha': 3.1342615352859906, 'reg_lambda': 4.036314350591594}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  76%|███████▌  | 38/50 [01:17<00:20,  1.67s/it]

[I 2025-11-12 14:24:38,820] Trial 37 finished with value: 0.2794476035743298 and parameters: {'max_depth': 4, 'min_child_weight': 10, 'subsample': 0.7402332487781338, 'colsample_bytree': 0.8207210014332509, 'learning_rate': 0.0745230349922647, 'n_estimators': 165, 'reg_alpha': 3.543545582339413, 'reg_lambda': 3.795711341077367}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  78%|███████▊  | 39/50 [01:19<00:19,  1.76s/it]

[I 2025-11-12 14:24:40,774] Trial 38 finished with value: 0.3338748984565394 and parameters: {'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.8046721417451919, 'colsample_bytree': 0.8368527645698268, 'learning_rate': 0.2985563719519403, 'n_estimators': 114, 'reg_alpha': 3.795904547975953, 'reg_lambda': 3.6354901910187354}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  80%|████████  | 40/50 [01:21<00:16,  1.66s/it]

[I 2025-11-12 14:24:42,195] Trial 39 finished with value: 0.31844029244516653 and parameters: {'max_depth': 5, 'min_child_weight': 6, 'subsample': 0.7267418502233464, 'colsample_bytree': 0.8412663420549212, 'learning_rate': 0.2766012598465866, 'n_estimators': 142, 'reg_alpha': 2.715090497263602, 'reg_lambda': 4.884941954845413}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  82%|████████▏ | 41/50 [01:22<00:14,  1.63s/it]

[I 2025-11-12 14:24:43,760] Trial 40 finished with value: 0.314378554021121 and parameters: {'max_depth': 7, 'min_child_weight': 7, 'subsample': 0.8605399665347491, 'colsample_bytree': 0.8286395429568402, 'learning_rate': 0.1288406153315486, 'n_estimators': 100, 'reg_alpha': 2.958724445234925, 'reg_lambda': 3.8900750735964134}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  84%|████████▍ | 42/50 [01:24<00:13,  1.72s/it]

[I 2025-11-12 14:24:45,695] Trial 41 finished with value: 0.3290008123476848 and parameters: {'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.8046800971901068, 'colsample_bytree': 0.8395322490010275, 'learning_rate': 0.2984006634334584, 'n_estimators': 122, 'reg_alpha': 3.790212464896839, 'reg_lambda': 3.636634501330017}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  86%|████████▌ | 43/50 [01:26<00:12,  1.73s/it]

[I 2025-11-12 14:24:47,444] Trial 42 finished with value: 0.3241267262388302 and parameters: {'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.7841120075381448, 'colsample_bytree': 0.8190668088529148, 'learning_rate': 0.286721996873126, 'n_estimators': 111, 'reg_alpha': 4.288367122254789, 'reg_lambda': 3.3340667139040687}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  88%|████████▊ | 44/50 [01:28<00:10,  1.79s/it]

[I 2025-11-12 14:24:49,387] Trial 43 finished with value: 0.3330625507717303 and parameters: {'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.7950339625210011, 'colsample_bytree': 0.8280720550261068, 'learning_rate': 0.2361940834057964, 'n_estimators': 111, 'reg_alpha': 3.4645447752594016, 'reg_lambda': 3.1512421519186318}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  90%|█████████ | 45/50 [01:31<00:10,  2.06s/it]

[I 2025-11-12 14:24:52,086] Trial 44 finished with value: 0.32656376929325753 and parameters: {'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.766153781377242, 'colsample_bytree': 0.8068368433328603, 'learning_rate': 0.21166343098759638, 'n_estimators': 195, 'reg_alpha': 4.924472875375981, 'reg_lambda': 3.7177245016635965}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  92%|█████████▏| 46/50 [01:35<00:10,  2.73s/it]

[I 2025-11-12 14:24:56,375] Trial 45 finished with value: 0.3290008123476848 and parameters: {'max_depth': 8, 'min_child_weight': 2, 'subsample': 0.8147430397929204, 'colsample_bytree': 0.8430116034978063, 'learning_rate': 0.25841659734265077, 'n_estimators': 188, 'reg_alpha': 3.9374547749069784, 'reg_lambda': 4.403902100770307}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  94%|█████████▍| 47/50 [01:36<00:06,  2.33s/it]

[I 2025-11-12 14:24:57,754] Trial 46 finished with value: 0.3233143785540211 and parameters: {'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.8792036585992443, 'colsample_bytree': 0.8499894562925064, 'learning_rate': 0.2719387446254814, 'n_estimators': 121, 'reg_alpha': 3.6914968310689384, 'reg_lambda': 3.4937936243219463}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  96%|█████████▌| 48/50 [01:39<00:04,  2.33s/it]

[I 2025-11-12 14:25:00,086] Trial 47 finished with value: 0.32656376929325753 and parameters: {'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.7040890823899978, 'colsample_bytree': 0.7865967662567425, 'learning_rate': 0.28347148846412656, 'n_estimators': 161, 'reg_alpha': 3.093281378235458, 'reg_lambda': 2.943589898371253}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749:  98%|█████████▊| 49/50 [01:41<00:02,  2.18s/it]

[I 2025-11-12 14:25:01,896] Trial 48 finished with value: 0.3330625507717303 and parameters: {'max_depth': 7, 'min_child_weight': 8, 'subsample': 0.8304738683057966, 'colsample_bytree': 0.7580188591719687, 'learning_rate': 0.25027115629324037, 'n_estimators': 137, 'reg_alpha': 2.9629523056882223, 'reg_lambda': 3.8410534146173827}. Best is trial 21 with value: 0.338748984565394.


Best trial: 21. Best value: 0.338749: 100%|██████████| 50/50 [01:42<00:00,  2.05s/it]


[I 2025-11-12 14:25:03,554] Trial 49 finished with value: 0.30950446791226643 and parameters: {'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.7490746829157978, 'colsample_bytree': 0.8127116987689988, 'learning_rate': 0.2936029446580193, 'n_estimators': 194, 'reg_alpha': 4.279112150104791, 'reg_lambda': 4.054504288340974}. Best is trial 21 with value: 0.338748984565394.
Best XGB Meta Validation Accuracy: 0.3387


[I 2025-11-12 14:25:06,732] A new study created in memory with name: no-name-efe8a8b3-6512-4977-8442-e924c18ab15a



[3/4] Training CatBoost meta-classifier...


Best trial: 0. Best value: 0.278635:   2%|▏         | 1/50 [00:03<02:49,  3.46s/it]

[I 2025-11-12 14:25:10,197] Trial 0 finished with value: 0.2786352558895207 and parameters: {'depth': 8, 'learning_rate': 0.10929518309602844, 'iterations': 240, 'l2_leaf_reg': 4.630315207463226, 'border_count': 85, 'bagging_temperature': 0.07662995596143107, 'random_strength': 0.598622884173108, 'min_data_in_leaf': 31}. Best is trial 0 with value: 0.2786352558895207.


Best trial: 0. Best value: 0.278635:   4%|▍         | 2/50 [00:04<01:28,  1.84s/it]

[I 2025-11-12 14:25:10,906] Trial 1 finished with value: 0.2778229082047116 and parameters: {'depth': 7, 'learning_rate': 0.13380323587957044, 'iterations': 105, 'l2_leaf_reg': 6.499882018820415, 'border_count': 59, 'bagging_temperature': 0.5699440117737194, 'random_strength': 0.7662132035743567, 'min_data_in_leaf': 91}. Best is trial 0 with value: 0.2786352558895207.


Best trial: 2. Best value: 0.28026:   6%|▌         | 3/50 [00:05<01:13,  1.56s/it] 

[I 2025-11-12 14:25:12,118] Trial 2 finished with value: 0.2802599512591389 and parameters: {'depth': 7, 'learning_rate': 0.11274484065079163, 'iterations': 173, 'l2_leaf_reg': 3.1042250426600457, 'border_count': 59, 'bagging_temperature': 0.319771728992606, 'random_strength': 0.5917296107443157, 'min_data_in_leaf': 23}. Best is trial 2 with value: 0.2802599512591389.


Best trial: 2. Best value: 0.28026:   8%|▊         | 4/50 [00:06<01:04,  1.39s/it]

[I 2025-11-12 14:25:13,266] Trial 3 finished with value: 0.25913891145410234 and parameters: {'depth': 7, 'learning_rate': 0.07995664392512383, 'iterations': 153, 'l2_leaf_reg': 9.303568425365626, 'border_count': 123, 'bagging_temperature': 0.8990665431358041, 'random_strength': 1.6057892854075757, 'min_data_in_leaf': 27}. Best is trial 2 with value: 0.2802599512591389.


Best trial: 2. Best value: 0.28026:  10%|█         | 5/50 [00:08<01:08,  1.51s/it]

[I 2025-11-12 14:25:14,988] Trial 4 finished with value: 0.272948822095857 and parameters: {'depth': 6, 'learning_rate': 0.07013886680166419, 'iterations': 282, 'l2_leaf_reg': 2.9090126413832995, 'border_count': 83, 'bagging_temperature': 0.059748775623199135, 'random_strength': 0.7251256358544389, 'min_data_in_leaf': 58}. Best is trial 2 with value: 0.2802599512591389.


Best trial: 5. Best value: 0.281885:  12%|█▏        | 6/50 [00:09<01:04,  1.48s/it]

[I 2025-11-12 14:25:16,391] Trial 5 finished with value: 0.2818846466287571 and parameters: {'depth': 6, 'learning_rate': 0.14236131942803146, 'iterations': 235, 'l2_leaf_reg': 1.63492898346091, 'border_count': 46, 'bagging_temperature': 0.016342649431744904, 'random_strength': 0.9165641182996538, 'min_data_in_leaf': 70}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  14%|█▍        | 7/50 [00:10<00:51,  1.19s/it]

[I 2025-11-12 14:25:16,988] Trial 6 finished with value: 0.25913891145410234 and parameters: {'depth': 5, 'learning_rate': 0.11797350452702456, 'iterations': 136, 'l2_leaf_reg': 7.67257182822218, 'border_count': 81, 'bagging_temperature': 0.5618534928763166, 'random_strength': 1.2105474916266319, 'min_data_in_leaf': 24}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  16%|█▌        | 8/50 [00:10<00:43,  1.04s/it]

[I 2025-11-12 14:25:17,698] Trial 7 finished with value: 0.2737611697806661 and parameters: {'depth': 7, 'learning_rate': 0.14940050570725458, 'iterations': 107, 'l2_leaf_reg': 7.515807387840257, 'border_count': 108, 'bagging_temperature': 0.6159493939799052, 'random_strength': 0.9829541546962133, 'min_data_in_leaf': 67}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  18%|█▊        | 9/50 [00:12<00:43,  1.07s/it]

[I 2025-11-12 14:25:18,833] Trial 8 finished with value: 0.25995125913891143 and parameters: {'depth': 4, 'learning_rate': 0.045652587048943906, 'iterations': 283, 'l2_leaf_reg': 8.918556684874098, 'border_count': 120, 'bagging_temperature': 0.5752310292852586, 'random_strength': 1.0641447998706797, 'min_data_in_leaf': 21}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  20%|██        | 10/50 [00:13<00:50,  1.27s/it]

[I 2025-11-12 14:25:20,550] Trial 9 finished with value: 0.27213647441104794 and parameters: {'depth': 8, 'learning_rate': 0.10105695359224857, 'iterations': 169, 'l2_leaf_reg': 3.4505072490465465, 'border_count': 122, 'bagging_temperature': 0.15346086108831347, 'random_strength': 1.5132242817029518, 'min_data_in_leaf': 74}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  22%|██▏       | 11/50 [00:14<00:46,  1.20s/it]

[I 2025-11-12 14:25:21,581] Trial 10 finished with value: 0.2688870836718115 and parameters: {'depth': 5, 'learning_rate': 0.04002495865540178, 'iterations': 221, 'l2_leaf_reg': 1.3142128476197703, 'border_count': 33, 'bagging_temperature': 0.3412438884229876, 'random_strength': 0.08360389804843149, 'min_data_in_leaf': 47}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  24%|██▍       | 12/50 [00:15<00:41,  1.09s/it]

[I 2025-11-12 14:25:22,434] Trial 11 finished with value: 0.272948822095857 and parameters: {'depth': 6, 'learning_rate': 0.12602556841559, 'iterations': 186, 'l2_leaf_reg': 1.3398432730866203, 'border_count': 50, 'bagging_temperature': 0.2940721498107238, 'random_strength': 0.35602780063808076, 'min_data_in_leaf': 86}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  26%|██▌       | 13/50 [00:16<00:42,  1.15s/it]

[I 2025-11-12 14:25:23,712] Trial 12 finished with value: 0.2802599512591389 and parameters: {'depth': 6, 'learning_rate': 0.14706364119027984, 'iterations': 230, 'l2_leaf_reg': 2.824186416697426, 'border_count': 59, 'bagging_temperature': 0.3084485183354203, 'random_strength': 0.41776570018444514, 'min_data_in_leaf': 45}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  28%|██▊       | 14/50 [00:17<00:38,  1.07s/it]

[I 2025-11-12 14:25:24,618] Trial 13 finished with value: 0.26645004061738425 and parameters: {'depth': 5, 'learning_rate': 0.0944168967695183, 'iterations': 199, 'l2_leaf_reg': 4.657494142744549, 'border_count': 33, 'bagging_temperature': 0.2107315205662785, 'random_strength': 1.2980043334909912, 'min_data_in_leaf': 100}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 5. Best value: 0.281885:  30%|███       | 15/50 [00:19<00:45,  1.31s/it]

[I 2025-11-12 14:25:26,474] Trial 14 finished with value: 0.28107229894394803 and parameters: {'depth': 7, 'learning_rate': 0.13229272504216613, 'iterations': 265, 'l2_leaf_reg': 1.9788153202659187, 'border_count': 49, 'bagging_temperature': 0.01575780384203218, 'random_strength': 1.9419763247116313, 'min_data_in_leaf': 54}. Best is trial 5 with value: 0.2818846466287571.


Best trial: 15. Best value: 0.286759:  32%|███▏      | 16/50 [00:22<00:57,  1.70s/it]

[I 2025-11-12 14:25:29,081] Trial 15 finished with value: 0.2867587327376117 and parameters: {'depth': 8, 'learning_rate': 0.1337399341624572, 'iterations': 260, 'l2_leaf_reg': 1.95585531751168, 'border_count': 46, 'bagging_temperature': 0.03530123275789558, 'random_strength': 1.929641823900946, 'min_data_in_leaf': 55}. Best is trial 15 with value: 0.2867587327376117.


Best trial: 16. Best value: 0.292445:  34%|███▍      | 17/50 [00:24<01:05,  1.98s/it]

[I 2025-11-12 14:25:31,715] Trial 16 finished with value: 0.2924451665312754 and parameters: {'depth': 8, 'learning_rate': 0.14091727652129216, 'iterations': 251, 'l2_leaf_reg': 4.334797119610381, 'border_count': 70, 'bagging_temperature': 0.7890518358465488, 'random_strength': 1.7861676597670901, 'min_data_in_leaf': 73}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  36%|███▌      | 18/50 [00:27<01:12,  2.26s/it]

[I 2025-11-12 14:25:34,627] Trial 17 finished with value: 0.2770105605199025 and parameters: {'depth': 8, 'learning_rate': 0.05799554255776761, 'iterations': 299, 'l2_leaf_reg': 4.766961945536635, 'border_count': 71, 'bagging_temperature': 0.8405030561946868, 'random_strength': 1.9863946908842718, 'min_data_in_leaf': 79}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  38%|███▊      | 19/50 [00:30<01:16,  2.46s/it]

[I 2025-11-12 14:25:37,560] Trial 18 finished with value: 0.2826969943135662 and parameters: {'depth': 8, 'learning_rate': 0.12411277465296433, 'iterations': 261, 'l2_leaf_reg': 5.622158878158071, 'border_count': 96, 'bagging_temperature': 0.7358874521840816, 'random_strength': 1.7385389358402703, 'min_data_in_leaf': 37}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  40%|████      | 20/50 [00:33<01:16,  2.53s/it]

[I 2025-11-12 14:25:40,264] Trial 19 finished with value: 0.27213647441104794 and parameters: {'depth': 8, 'learning_rate': 0.08310438342306123, 'iterations': 252, 'l2_leaf_reg': 3.5170490174654225, 'border_count': 71, 'bagging_temperature': 0.9935111683659398, 'random_strength': 1.8076023428645511, 'min_data_in_leaf': 63}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  42%|████▏     | 21/50 [00:35<01:11,  2.47s/it]

[I 2025-11-12 14:25:42,567] Trial 20 finished with value: 0.28107229894394803 and parameters: {'depth': 8, 'learning_rate': 0.13723900871326863, 'iterations': 221, 'l2_leaf_reg': 6.05969452505878, 'border_count': 69, 'bagging_temperature': 0.4532600960852934, 'random_strength': 1.4366353152852256, 'min_data_in_leaf': 80}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  44%|████▍     | 22/50 [00:39<01:15,  2.70s/it]

[I 2025-11-12 14:25:45,823] Trial 21 finished with value: 0.2786352558895207 and parameters: {'depth': 8, 'learning_rate': 0.12150884304676896, 'iterations': 262, 'l2_leaf_reg': 5.546940979816107, 'border_count': 95, 'bagging_temperature': 0.7205234733392857, 'random_strength': 1.686864945677833, 'min_data_in_leaf': 38}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  46%|████▌     | 23/50 [00:42<01:16,  2.83s/it]

[I 2025-11-12 14:25:48,951] Trial 22 finished with value: 0.2818846466287571 and parameters: {'depth': 8, 'learning_rate': 0.12794473295150516, 'iterations': 278, 'l2_leaf_reg': 4.219168207186635, 'border_count': 97, 'bagging_temperature': 0.7292377224650808, 'random_strength': 1.7804523876635245, 'min_data_in_leaf': 51}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  48%|████▊     | 24/50 [00:44<01:06,  2.57s/it]

[I 2025-11-12 14:25:50,921] Trial 23 finished with value: 0.289195775792039 and parameters: {'depth': 7, 'learning_rate': 0.10501287192522019, 'iterations': 250, 'l2_leaf_reg': 6.749938526785403, 'border_count': 95, 'bagging_temperature': 0.7373520961079655, 'random_strength': 1.8000046229434759, 'min_data_in_leaf': 38}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  50%|█████     | 25/50 [00:46<00:59,  2.39s/it]

[I 2025-11-12 14:25:52,892] Trial 24 finished with value: 0.272948822095857 and parameters: {'depth': 7, 'learning_rate': 0.1025603931465701, 'iterations': 204, 'l2_leaf_reg': 7.203334323540898, 'border_count': 111, 'bagging_temperature': 0.4525920276964739, 'random_strength': 1.4017627386472309, 'min_data_in_leaf': 40}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  52%|█████▏    | 26/50 [00:48<00:55,  2.32s/it]

[I 2025-11-12 14:25:55,028] Trial 25 finished with value: 0.27051177904142976 and parameters: {'depth': 7, 'learning_rate': 0.10886973348347284, 'iterations': 249, 'l2_leaf_reg': 6.700702549290472, 'border_count': 91, 'bagging_temperature': 0.8320119135398522, 'random_strength': 1.9922998636123899, 'min_data_in_leaf': 62}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  54%|█████▍    | 27/50 [00:50<00:50,  2.20s/it]

[I 2025-11-12 14:25:56,957] Trial 26 finished with value: 0.2770105605199025 and parameters: {'depth': 7, 'learning_rate': 0.14040602557589682, 'iterations': 211, 'l2_leaf_reg': 8.095726744685859, 'border_count': 106, 'bagging_temperature': 0.6872695438180729, 'random_strength': 1.5793635152419858, 'min_data_in_leaf': 58}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  56%|█████▌    | 28/50 [00:53<00:56,  2.58s/it]

[I 2025-11-12 14:26:00,437] Trial 27 finished with value: 0.2802599512591389 and parameters: {'depth': 8, 'learning_rate': 0.09295044857886611, 'iterations': 299, 'l2_leaf_reg': 3.8809454310835467, 'border_count': 74, 'bagging_temperature': 0.9482557068799629, 'random_strength': 1.8584510133275027, 'min_data_in_leaf': 47}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  58%|█████▊    | 29/50 [00:56<00:54,  2.57s/it]

[I 2025-11-12 14:26:02,988] Trial 28 finished with value: 0.28107229894394803 and parameters: {'depth': 8, 'learning_rate': 0.11788104932320428, 'iterations': 273, 'l2_leaf_reg': 2.1601370324931324, 'border_count': 41, 'bagging_temperature': 0.8188874686839556, 'random_strength': 1.2114217804859122, 'min_data_in_leaf': 32}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  60%|██████    | 30/50 [00:58<00:51,  2.55s/it]

[I 2025-11-12 14:26:05,483] Trial 29 finished with value: 0.2737611697806661 and parameters: {'depth': 8, 'learning_rate': 0.10546038150046193, 'iterations': 245, 'l2_leaf_reg': 8.54232725875174, 'border_count': 65, 'bagging_temperature': 0.4894288818889798, 'random_strength': 1.650445537860492, 'min_data_in_leaf': 32}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  62%|██████▏   | 31/50 [01:01<00:47,  2.48s/it]

[I 2025-11-12 14:26:07,791] Trial 30 finished with value: 0.2867587327376117 and parameters: {'depth': 7, 'learning_rate': 0.11224106678332132, 'iterations': 236, 'l2_leaf_reg': 5.296820234641747, 'border_count': 88, 'bagging_temperature': 0.6411133496870554, 'random_strength': 1.8760100810904177, 'min_data_in_leaf': 72}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  64%|██████▍   | 32/50 [01:03<00:43,  2.41s/it]

[I 2025-11-12 14:26:10,053] Trial 31 finished with value: 0.2851340373679935 and parameters: {'depth': 7, 'learning_rate': 0.1328760626442721, 'iterations': 237, 'l2_leaf_reg': 4.86245597345805, 'border_count': 88, 'bagging_temperature': 0.6816752119548364, 'random_strength': 1.8526054367554656, 'min_data_in_leaf': 75}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  66%|██████▌   | 33/50 [01:05<00:39,  2.30s/it]

[I 2025-11-12 14:26:12,104] Trial 32 finished with value: 0.2802599512591389 and parameters: {'depth': 7, 'learning_rate': 0.11355148024052354, 'iterations': 252, 'l2_leaf_reg': 6.180941900439994, 'border_count': 78, 'bagging_temperature': 0.7804510964358544, 'random_strength': 1.8402189788211045, 'min_data_in_leaf': 67}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  68%|██████▊   | 34/50 [01:07<00:36,  2.27s/it]

[I 2025-11-12 14:26:14,304] Trial 33 finished with value: 0.2826969943135662 and parameters: {'depth': 7, 'learning_rate': 0.14052642516571048, 'iterations': 224, 'l2_leaf_reg': 5.261148656072728, 'border_count': 99, 'bagging_temperature': 0.6332590238470467, 'random_strength': 1.4911008575606597, 'min_data_in_leaf': 87}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  70%|███████   | 35/50 [01:11<00:40,  2.69s/it]

[I 2025-11-12 14:26:17,978] Trial 34 finished with value: 0.2745735174654752 and parameters: {'depth': 7, 'learning_rate': 0.09914196109431642, 'iterations': 243, 'l2_leaf_reg': 6.769018047594771, 'border_count': 63, 'bagging_temperature': 0.39397531987145695, 'random_strength': 1.7180884021267269, 'min_data_in_leaf': 72}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  72%|███████▏  | 36/50 [01:13<00:35,  2.50s/it]

[I 2025-11-12 14:26:20,034] Trial 35 finished with value: 0.26726238830219334 and parameters: {'depth': 6, 'learning_rate': 0.08446642573676531, 'iterations': 267, 'l2_leaf_reg': 2.531541688757204, 'border_count': 86, 'bagging_temperature': 0.8912764058511919, 'random_strength': 1.5892059806895558, 'min_data_in_leaf': 79}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  74%|███████▍  | 37/50 [01:16<00:35,  2.76s/it]

[I 2025-11-12 14:26:23,408] Trial 36 finished with value: 0.27538586515028435 and parameters: {'depth': 8, 'learning_rate': 0.11107230197616533, 'iterations': 290, 'l2_leaf_reg': 6.01031701715161, 'border_count': 55, 'bagging_temperature': 0.5327838542307257, 'random_strength': 1.9004071002269995, 'min_data_in_leaf': 66}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  76%|███████▌  | 38/50 [01:18<00:30,  2.51s/it]

[I 2025-11-12 14:26:25,324] Trial 37 finished with value: 0.27213647441104794 and parameters: {'depth': 7, 'learning_rate': 0.07244048321762522, 'iterations': 210, 'l2_leaf_reg': 9.845417427442145, 'border_count': 103, 'bagging_temperature': 0.6221330898534292, 'random_strength': 1.2996901782836272, 'min_data_in_leaf': 54}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  78%|███████▊  | 39/50 [01:19<00:22,  2.04s/it]

[I 2025-11-12 14:26:26,276] Trial 38 finished with value: 0.272948822095857 and parameters: {'depth': 6, 'learning_rate': 0.1307386118745391, 'iterations': 193, 'l2_leaf_reg': 4.038161341490442, 'border_count': 81, 'bagging_temperature': 0.886337830612973, 'random_strength': 1.6868982081775319, 'min_data_in_leaf': 91}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 16. Best value: 0.292445:  80%|████████  | 40/50 [01:21<00:21,  2.13s/it]

[I 2025-11-12 14:26:28,613] Trial 39 finished with value: 0.27132412672623885 and parameters: {'depth': 8, 'learning_rate': 0.11769099493200755, 'iterations': 233, 'l2_leaf_reg': 7.104418780979194, 'border_count': 42, 'bagging_temperature': 0.6629745070392513, 'random_strength': 1.5630812002670207, 'min_data_in_leaf': 57}. Best is trial 16 with value: 0.2924451665312754.


Best trial: 40. Best value: 0.296507:  82%|████████▏ | 41/50 [01:24<00:20,  2.23s/it]

[I 2025-11-12 14:26:31,072] Trial 40 finished with value: 0.2965069049553209 and parameters: {'depth': 7, 'learning_rate': 0.14779594554074932, 'iterations': 288, 'l2_leaf_reg': 3.4185074999041456, 'border_count': 116, 'bagging_temperature': 0.12026414452236428, 'random_strength': 0.7830000898533458, 'min_data_in_leaf': 27}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  84%|████████▍ | 42/50 [01:26<00:18,  2.29s/it]

[I 2025-11-12 14:26:33,499] Trial 41 finished with value: 0.2851340373679935 and parameters: {'depth': 7, 'learning_rate': 0.1486904648831339, 'iterations': 258, 'l2_leaf_reg': 5.064002495174661, 'border_count': 114, 'bagging_temperature': 0.07542045289292294, 'random_strength': 0.7962152735466752, 'min_data_in_leaf': 26}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  86%|████████▌ | 43/50 [01:28<00:15,  2.20s/it]

[I 2025-11-12 14:26:35,508] Trial 42 finished with value: 0.2883834281072299 and parameters: {'depth': 6, 'learning_rate': 0.14341536279468717, 'iterations': 285, 'l2_leaf_reg': 3.4552552190541124, 'border_count': 117, 'bagging_temperature': 0.15246358230339402, 'random_strength': 0.8518891582513664, 'min_data_in_leaf': 20}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  88%|████████▊ | 44/50 [01:30<00:12,  2.11s/it]

[I 2025-11-12 14:26:37,379] Trial 43 finished with value: 0.2794476035743298 and parameters: {'depth': 6, 'learning_rate': 0.14500391889425815, 'iterations': 284, 'l2_leaf_reg': 3.4069560666862815, 'border_count': 115, 'bagging_temperature': 0.13331278558372833, 'random_strength': 0.667475872085302, 'min_data_in_leaf': 20}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  90%|█████████ | 45/50 [01:32<00:10,  2.11s/it]

[I 2025-11-12 14:26:39,506] Trial 44 finished with value: 0.2770105605199025 and parameters: {'depth': 6, 'learning_rate': 0.1382683149699044, 'iterations': 289, 'l2_leaf_reg': 2.360181743470675, 'border_count': 128, 'bagging_temperature': 0.21546746559700738, 'random_strength': 0.8155563273645988, 'min_data_in_leaf': 29}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  92%|█████████▏| 46/50 [01:34<00:07,  1.91s/it]

[I 2025-11-12 14:26:40,956] Trial 45 finished with value: 0.26807473598700243 and parameters: {'depth': 5, 'learning_rate': 0.13617849473482707, 'iterations': 273, 'l2_leaf_reg': 3.157272093773489, 'border_count': 118, 'bagging_temperature': 0.1191635727919174, 'random_strength': 0.550771833745791, 'min_data_in_leaf': 24}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  94%|█████████▍| 47/50 [01:35<00:05,  1.69s/it]

[I 2025-11-12 14:26:42,128] Trial 46 finished with value: 0.2843216896831844 and parameters: {'depth': 4, 'learning_rate': 0.1442291238953893, 'iterations': 292, 'l2_leaf_reg': 4.333728599928965, 'border_count': 125, 'bagging_temperature': 0.1898043756261091, 'random_strength': 1.0638537424971297, 'min_data_in_leaf': 33}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  96%|█████████▌| 48/50 [01:36<00:02,  1.37s/it]

[I 2025-11-12 14:26:42,737] Trial 47 finished with value: 0.264825345247766 and parameters: {'depth': 5, 'learning_rate': 0.12917116965517267, 'iterations': 141, 'l2_leaf_reg': 2.7827059655421733, 'border_count': 108, 'bagging_temperature': 0.2627533851171452, 'random_strength': 0.9393675194626867, 'min_data_in_leaf': 42}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507:  98%|█████████▊| 49/50 [01:37<00:01,  1.51s/it]

[I 2025-11-12 14:26:44,591] Trial 48 finished with value: 0.2908204711616572 and parameters: {'depth': 6, 'learning_rate': 0.14935901067594376, 'iterations': 283, 'l2_leaf_reg': 1.1213268416236608, 'border_count': 101, 'bagging_temperature': 0.04575733530078696, 'random_strength': 0.45584807383581005, 'min_data_in_leaf': 29}. Best is trial 40 with value: 0.2965069049553209.


Best trial: 40. Best value: 0.296507: 100%|██████████| 50/50 [01:39<00:00,  1.99s/it]


[I 2025-11-12 14:26:46,394] Trial 49 finished with value: 0.2859463850528026 and parameters: {'depth': 6, 'learning_rate': 0.1479583630809734, 'iterations': 274, 'l2_leaf_reg': 3.6000967525348093, 'border_count': 103, 'bagging_temperature': 0.07705428327491698, 'random_strength': 0.4873963203468407, 'min_data_in_leaf': 27}. Best is trial 40 with value: 0.2965069049553209.
Best CatBoost Meta Validation Accuracy: 0.2965


[I 2025-11-12 14:26:48,938] A new study created in memory with name: no-name-d240a5de-4e3a-4d91-8863-0ad4c2cb33c5



[4/4] Training Random Forest meta-classifier...


Best trial: 0. Best value: 0.279448:   2%|▏         | 1/50 [00:00<00:26,  1.88it/s]

[I 2025-11-12 14:26:49,472] Trial 0 finished with value: 0.2794476035743298 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 189, 'max_features': 0.3, 'min_samples_split': 25, 'min_samples_leaf': 13, 'bootstrap': True}. Best is trial 0 with value: 0.2794476035743298.


Best trial: 0. Best value: 0.279448:   4%|▍         | 2/50 [00:01<00:28,  1.69it/s]

[I 2025-11-12 14:26:50,103] Trial 1 finished with value: 0.27619821283509344 and parameters: {'max_depth_type': 'int', 'max_depth': 9, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 48, 'n_estimators': 193, 'max_features': 0.7, 'min_samples_split': 28, 'min_samples_leaf': 9, 'bootstrap': True}. Best is trial 0 with value: 0.2794476035743298.


Best trial: 0. Best value: 0.279448:   6%|▌         | 3/50 [00:01<00:26,  1.75it/s]

[I 2025-11-12 14:26:50,648] Trial 2 finished with value: 0.27619821283509344 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 64, 'n_estimators': 122, 'max_features': 0.5, 'min_samples_split': 43, 'min_samples_leaf': 20, 'bootstrap': True}. Best is trial 0 with value: 0.2794476035743298.


Best trial: 0. Best value: 0.279448:   8%|▊         | 4/50 [00:02<00:32,  1.43it/s]

[I 2025-11-12 14:26:51,537] Trial 3 finished with value: 0.272948822095857 and parameters: {'max_depth_type': 'int', 'max_depth': 12, 'max_leaf_nodes_type': 'none', 'n_estimators': 190, 'max_features': 0.5, 'min_samples_split': 35, 'min_samples_leaf': 11, 'bootstrap': True}. Best is trial 0 with value: 0.2794476035743298.


Best trial: 0. Best value: 0.279448:  10%|█         | 5/50 [00:03<00:35,  1.27it/s]

[I 2025-11-12 14:26:52,490] Trial 4 finished with value: 0.272948822095857 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 17, 'n_estimators': 152, 'max_features': 0.7, 'min_samples_split': 32, 'min_samples_leaf': 7, 'bootstrap': False}. Best is trial 0 with value: 0.2794476035743298.


Best trial: 0. Best value: 0.279448:  12%|█▏        | 6/50 [00:04<00:32,  1.37it/s]

[I 2025-11-12 14:26:53,112] Trial 5 finished with value: 0.26726238830219334 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 21, 'n_estimators': 149, 'max_features': 0.7, 'min_samples_split': 33, 'min_samples_leaf': 20, 'bootstrap': False}. Best is trial 0 with value: 0.2794476035743298.


Best trial: 6. Best value: 0.291633:  14%|█▍        | 7/50 [00:04<00:25,  1.69it/s]

[I 2025-11-12 14:26:53,413] Trial 6 finished with value: 0.2916328188464663 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 63, 'max_features': 0.5, 'min_samples_split': 22, 'min_samples_leaf': 11, 'bootstrap': True}. Best is trial 6 with value: 0.2916328188464663.


Best trial: 6. Best value: 0.291633:  16%|█▌        | 8/50 [00:05<00:25,  1.65it/s]

[I 2025-11-12 14:26:54,054] Trial 7 finished with value: 0.27538586515028435 and parameters: {'max_depth_type': 'int', 'max_depth': 9, 'max_leaf_nodes_type': 'none', 'n_estimators': 127, 'max_features': 0.3, 'min_samples_split': 40, 'min_samples_leaf': 14, 'bootstrap': False}. Best is trial 6 with value: 0.2916328188464663.


Best trial: 6. Best value: 0.291633:  18%|█▊        | 9/50 [00:05<00:20,  1.96it/s]

[I 2025-11-12 14:26:54,349] Trial 8 finished with value: 0.27538586515028435 and parameters: {'max_depth_type': 'int', 'max_depth': 10, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 24, 'n_estimators': 65, 'max_features': 0.5, 'min_samples_split': 47, 'min_samples_leaf': 18, 'bootstrap': False}. Best is trial 6 with value: 0.2916328188464663.


Best trial: 6. Best value: 0.291633:  20%|██        | 10/50 [00:06<00:22,  1.77it/s]

[I 2025-11-12 14:26:55,037] Trial 9 finished with value: 0.2737611697806661 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 87, 'n_estimators': 177, 'max_features': 0.3, 'min_samples_split': 41, 'min_samples_leaf': 19, 'bootstrap': True}. Best is trial 6 with value: 0.2916328188464663.


Best trial: 10. Best value: 0.303818:  22%|██▏       | 11/50 [00:06<00:18,  2.08it/s]

[I 2025-11-12 14:26:55,318] Trial 10 finished with value: 0.30381803411860275 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 53, 'max_features': 0.5, 'min_samples_split': 13, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 10 with value: 0.30381803411860275.


Best trial: 11. Best value: 0.305443:  24%|██▍       | 12/50 [00:06<00:15,  2.45it/s]

[I 2025-11-12 14:26:55,568] Trial 11 finished with value: 0.30544272948822093 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 52, 'max_features': 0.5, 'min_samples_split': 14, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 11 with value: 0.30544272948822093.


Best trial: 12. Best value: 0.319253:  26%|██▌       | 13/50 [00:06<00:14,  2.57it/s]

[I 2025-11-12 14:26:55,915] Trial 12 finished with value: 0.3192526401299756 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 88, 'max_features': 0.5, 'min_samples_split': 10, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 12 with value: 0.3192526401299756.


Best trial: 13. Best value: 0.324127:  28%|██▊       | 14/50 [00:07<00:14,  2.47it/s]

[I 2025-11-12 14:26:56,348] Trial 13 finished with value: 0.3241267262388302 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 90, 'max_features': 0.5, 'min_samples_split': 13, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  30%|███       | 15/50 [00:08<00:18,  1.90it/s]

[I 2025-11-12 14:26:57,149] Trial 14 finished with value: 0.3005686433793664 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 89, 'max_features': 0.5, 'min_samples_split': 10, 'min_samples_leaf': 8, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  32%|███▏      | 16/50 [00:08<00:19,  1.74it/s]

[I 2025-11-12 14:26:57,839] Trial 15 finished with value: 0.30463038180341184 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 90, 'max_features': 0.5, 'min_samples_split': 19, 'min_samples_leaf': 6, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  34%|███▍      | 17/50 [00:09<00:18,  1.79it/s]

[I 2025-11-12 14:26:58,363] Trial 16 finished with value: 0.2875710804224208 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 93, 'max_features': 0.5, 'min_samples_split': 17, 'min_samples_leaf': 16, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  36%|███▌      | 18/50 [00:10<00:22,  1.45it/s]

[I 2025-11-12 14:26:59,362] Trial 17 finished with value: 0.29488220958570266 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 122, 'max_features': 0.5, 'min_samples_split': 11, 'min_samples_leaf': 9, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  38%|███▊      | 19/50 [00:10<00:18,  1.70it/s]

[I 2025-11-12 14:26:59,713] Trial 18 finished with value: 0.255889520714866 and parameters: {'max_depth_type': 'int', 'max_depth': 3, 'max_leaf_nodes_type': 'none', 'n_estimators': 104, 'max_features': 0.3, 'min_samples_split': 18, 'min_samples_leaf': 7, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  40%|████      | 20/50 [00:11<00:17,  1.68it/s]

[I 2025-11-12 14:27:00,327] Trial 19 finished with value: 0.29000812347684807 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 81, 'max_features': 0.7, 'min_samples_split': 24, 'min_samples_leaf': 10, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  42%|████▏     | 21/50 [00:12<00:17,  1.62it/s]

[I 2025-11-12 14:27:00,997] Trial 20 finished with value: 0.30787977254264826 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 104, 'max_features': 0.5, 'min_samples_split': 15, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  44%|████▍     | 22/50 [00:12<00:17,  1.63it/s]

[I 2025-11-12 14:27:01,597] Trial 21 finished with value: 0.29975629569455725 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 106, 'max_features': 0.5, 'min_samples_split': 16, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  46%|████▌     | 23/50 [00:13<00:14,  1.85it/s]

[I 2025-11-12 14:27:01,974] Trial 22 finished with value: 0.30706742485783917 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 78, 'max_features': 0.5, 'min_samples_split': 10, 'min_samples_leaf': 7, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  48%|████▊     | 24/50 [00:13<00:13,  1.96it/s]

[I 2025-11-12 14:27:02,415] Trial 23 finished with value: 0.29975629569455725 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 107, 'max_features': 0.5, 'min_samples_split': 20, 'min_samples_leaf': 6, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  50%|█████     | 25/50 [00:14<00:13,  1.84it/s]

[I 2025-11-12 14:27:03,038] Trial 24 finished with value: 0.30300568643379366 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 140, 'max_features': 0.5, 'min_samples_split': 14, 'min_samples_leaf': 8, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  52%|█████▏    | 26/50 [00:14<00:11,  2.04it/s]

[I 2025-11-12 14:27:03,399] Trial 25 finished with value: 0.314378554021121 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 73, 'max_features': 0.5, 'min_samples_split': 15, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  54%|█████▍    | 27/50 [00:14<00:09,  2.46it/s]

[I 2025-11-12 14:27:03,615] Trial 26 finished with value: 0.2737611697806661 and parameters: {'max_depth_type': 'int', 'max_depth': 4, 'max_leaf_nodes_type': 'none', 'n_estimators': 71, 'max_features': 0.5, 'min_samples_split': 28, 'min_samples_leaf': 6, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  56%|█████▌    | 28/50 [00:15<00:09,  2.40it/s]

[I 2025-11-12 14:27:04,041] Trial 27 finished with value: 0.2851340373679935 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 77, 'max_features': 0.5, 'min_samples_split': 21, 'min_samples_leaf': 15, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  58%|█████▊    | 29/50 [00:15<00:08,  2.43it/s]

[I 2025-11-12 14:27:04,454] Trial 28 finished with value: 0.2916328188464663 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 94, 'max_features': 0.3, 'min_samples_split': 12, 'min_samples_leaf': 8, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  60%|██████    | 30/50 [00:15<00:08,  2.32it/s]

[I 2025-11-12 14:27:04,922] Trial 29 finished with value: 0.30300568643379366 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 63, 'max_features': 0.7, 'min_samples_split': 25, 'min_samples_leaf': 12, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  62%|██████▏   | 31/50 [00:16<00:08,  2.20it/s]

[I 2025-11-12 14:27:05,421] Trial 30 finished with value: 0.2883834281072299 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 114, 'max_features': 0.3, 'min_samples_split': 24, 'min_samples_leaf': 9, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  64%|██████▍   | 32/50 [00:17<00:10,  1.78it/s]

[I 2025-11-12 14:27:06,249] Trial 31 finished with value: 0.30787977254264826 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 99, 'max_features': 0.5, 'min_samples_split': 15, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  66%|██████▌   | 33/50 [00:17<00:09,  1.72it/s]

[I 2025-11-12 14:27:06,875] Trial 32 finished with value: 0.30381803411860275 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 83, 'max_features': 0.5, 'min_samples_split': 16, 'min_samples_leaf': 6, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  68%|██████▊   | 34/50 [00:18<00:09,  1.74it/s]

[I 2025-11-12 14:27:07,434] Trial 33 finished with value: 0.2981316003249391 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 116, 'max_features': 0.5, 'min_samples_split': 13, 'min_samples_leaf': 7, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  70%|███████   | 35/50 [00:18<00:06,  2.15it/s]

[I 2025-11-12 14:27:07,643] Trial 34 finished with value: 0.2688870836718115 and parameters: {'max_depth_type': 'int', 'max_depth': 6, 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 93, 'n_estimators': 69, 'max_features': 0.5, 'min_samples_split': 28, 'min_samples_leaf': 5, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  72%|███████▏  | 36/50 [00:19<00:07,  2.00it/s]

[I 2025-11-12 14:27:08,229] Trial 35 finished with value: 0.3062550771730301 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 131, 'max_features': 0.5, 'min_samples_split': 10, 'min_samples_leaf': 6, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  74%|███████▍  | 37/50 [00:19<00:06,  2.04it/s]

[I 2025-11-12 14:27:08,691] Trial 36 finished with value: 0.272948822095857 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 51, 'n_estimators': 87, 'max_features': 0.7, 'min_samples_split': 18, 'min_samples_leaf': 9, 'bootstrap': True}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  76%|███████▌  | 38/50 [00:20<00:06,  1.91it/s]

[I 2025-11-12 14:27:09,293] Trial 37 finished with value: 0.31762794476035744 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 99, 'max_features': 0.5, 'min_samples_split': 22, 'min_samples_leaf': 7, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  78%|███████▊  | 39/50 [00:20<00:04,  2.31it/s]

[I 2025-11-12 14:27:09,517] Trial 38 finished with value: 0.2794476035743298 and parameters: {'max_depth_type': 'int', 'max_depth': 6, 'max_leaf_nodes_type': 'none', 'n_estimators': 74, 'max_features': 0.5, 'min_samples_split': 21, 'min_samples_leaf': 7, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  80%|████████  | 40/50 [00:20<00:04,  2.44it/s]

[I 2025-11-12 14:27:09,874] Trial 39 finished with value: 0.27538586515028435 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 75, 'n_estimators': 58, 'max_features': 0.7, 'min_samples_split': 36, 'min_samples_leaf': 10, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  82%|████████▏ | 41/50 [00:21<00:04,  2.02it/s]

[I 2025-11-12 14:27:10,565] Trial 40 finished with value: 0.31194151096669376 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 114, 'max_features': 0.5, 'min_samples_split': 23, 'min_samples_leaf': 8, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  84%|████████▍ | 42/50 [00:22<00:04,  1.76it/s]

[I 2025-11-12 14:27:11,301] Trial 41 finished with value: 0.31356620633631194 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 113, 'max_features': 0.5, 'min_samples_split': 23, 'min_samples_leaf': 8, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  86%|████████▌ | 43/50 [00:23<00:04,  1.68it/s]

[I 2025-11-12 14:27:11,966] Trial 42 finished with value: 0.30787977254264826 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 97, 'max_features': 0.5, 'min_samples_split': 29, 'min_samples_leaf': 6, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  88%|████████▊ | 44/50 [00:24<00:04,  1.40it/s]

[I 2025-11-12 14:27:12,945] Trial 43 finished with value: 0.2843216896831844 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 137, 'max_features': 0.5, 'min_samples_split': 50, 'min_samples_leaf': 7, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  90%|█████████ | 45/50 [00:24<00:03,  1.31it/s]

[I 2025-11-12 14:27:13,833] Trial 44 finished with value: 0.2965069049553209 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 168, 'max_features': 0.5, 'min_samples_split': 26, 'min_samples_leaf': 13, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  92%|█████████▏| 46/50 [00:25<00:02,  1.47it/s]

[I 2025-11-12 14:27:14,324] Trial 45 finished with value: 0.2932575142160845 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'int', 'max_leaf_nodes': 39, 'n_estimators': 111, 'max_features': 0.5, 'min_samples_split': 32, 'min_samples_leaf': 5, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  94%|█████████▍| 47/50 [00:25<00:01,  1.51it/s]

[I 2025-11-12 14:27:14,934] Trial 46 finished with value: 0.2818846466287571 and parameters: {'max_depth_type': 'int', 'max_depth': 11, 'max_leaf_nodes_type': 'none', 'n_estimators': 197, 'max_features': 0.3, 'min_samples_split': 12, 'min_samples_leaf': 6, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  96%|█████████▌| 48/50 [00:26<00:01,  1.52it/s]

[I 2025-11-12 14:27:15,592] Trial 47 finished with value: 0.30381803411860275 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 121, 'max_features': 0.5, 'min_samples_split': 19, 'min_samples_leaf': 11, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127:  98%|█████████▊| 49/50 [00:27<00:00,  1.59it/s]

[I 2025-11-12 14:27:16,143] Trial 48 finished with value: 0.3151909017059301 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 100, 'max_features': 0.5, 'min_samples_split': 17, 'min_samples_leaf': 8, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.


Best trial: 13. Best value: 0.324127: 100%|██████████| 50/50 [00:27<00:00,  1.80it/s]


[I 2025-11-12 14:27:16,667] Trial 49 finished with value: 0.31762794476035744 and parameters: {'max_depth_type': 'none', 'max_leaf_nodes_type': 'none', 'n_estimators': 86, 'max_features': 0.5, 'min_samples_split': 17, 'min_samples_leaf': 7, 'bootstrap': False}. Best is trial 13 with value: 0.3241267262388302.
Best RF Meta Validation Accuracy: 0.3241

STEP 3: Evaluation and Results
Output shape: (6156, 16)

Dataset distribution:
dataset
train         3693
test          1232
validation    1231
Name: count, dtype: int64

✅ Saved to '/Users/clarencemarvin/Downloads/regularized/meta_classifier_predictions.csv'

Output shape: (6156, 16)
Columns: ['district', 'bedroom_count', 'property_age', 'saleable_area', 'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m', 'category_Education_within_2000m', 'category_Medical_within_2000m', 'category_Public_Market_within_1000m', 'pet_policy_binary', 'dataset', 'LGBM_predict', 'XGB_predict', 'CB_predict', 'RF_predict']

First 10 rows:
    

In [39]:
output_df.head()

Unnamed: 0,district,bedroom_count,property_age,saleable_area,travel_time_to_cbd,walking_time_to_mtr,total_poi_within_1000m,category_Education_within_2000m,category_Medical_within_2000m,category_Public_Market_within_1000m,pet_policy_binary,dataset,LGBM_predict,XGB_predict,CB_predict,RF_predict
0,Ho Man Tin,3,36,978,26,16,838,484,64,3,1,train,RF,RF,RF,RF
1,Happy Valley,3,40,750,21,9,797,164,27,5,1,train,XGB,XGB,RF,RF
2,Tai Wai,1,37,282,45,18,391,115,6,10,1,train,XGB,XGB,CatBoost,CatBoost
3,Sai Ying Pun,3,48,737,14,9,760,145,25,7,1,train,LGBM,LGBM,RF,CatBoost
4,Tsing Yi,2,39,381,41,19,484,112,5,6,1,train,RF,RF,RF,RF


In [40]:
output_df.shape

(6156, 16)

In [44]:
all_consolidated_df.shape

(6156, 20)

In [46]:
all_consolidated_df_indexed = all_consolidated_df.reset_index(drop=False).rename(columns={'index': 'row_id'})
output_df_indexed = output_df.reset_index(drop=False).rename(columns={'index': 'row_id'})

combined_df = pd.merge(
    all_consolidated_df_indexed,
    output_df_indexed[['row_id', 'LGBM_predict', 'XGB_predict', 'CB_predict', 'RF_predict']],
    on='row_id',
    how='inner'
)

combined_df = combined_df.drop(columns=['row_id'])
print(combined_df.shape)

(6156, 24)


In [49]:
def calculate_ensemble_predicted(row):
    lgbm_choice = row['LGBM_predict']
    xgb_choice = row['XGB_predict']
    cb_choice = row['CB_predict']
    rf_choice = row['RF_predict']
    
    votes = pd.Series([lgbm_choice, xgb_choice, cb_choice, rf_choice]).value_counts()
    
    model_to_pred = {
        'LGBM': 'lgbm_predicted',
        'XGB': 'xgb_predicted',
        'CatBoost': 'catboost_predicted',
        'RF': 'rf_predicted'
    }
    
    # If all agree on same model, use only that model
    if len(votes) == 1:
        return row[model_to_pred[votes.index[0]]]
    
    # Otherwise, use weighted ensemble
    top_model = votes.index[0]
    remaining_models = votes.index[1:]
    
    ensemble = 0.5 * row[model_to_pred[top_model]]
    remaining_weight = 0.5 / len(remaining_models)
    
    for model in remaining_models:
        ensemble += remaining_weight * row[model_to_pred[model]]
    
    return ensemble

combined_df['ensemble_predicted'] = combined_df.apply(calculate_ensemble_predicted, axis=1)

In [108]:
combined_df.head()

Unnamed: 0,district,bedroom_count,property_age,saleable_area,travel_time_to_cbd,walking_time_to_mtr,total_poi_within_1000m,category_Education_within_2000m,category_Medical_within_2000m,category_Public_Market_within_1000m,...,absolute_error,lgbm_predicted,xgb_predicted,catboost_predicted,rf_predicted,LGBM_predict,XGB_predict,CB_predict,RF_predict,ensemble_predicted
0,Kowloon City,3,36,978,26,16,838,484,64,3,...,128949.1,12371050.0,12540288.0,14222190.0,12491920.0,RF,RF,RF,RF,12491920.0
1,Wan Chai,3,40,750,21,9,797,164,27,5,...,282398.9,13282400.0,12856448.0,12395820.0,12847520.0,XGB,XGB,RF,RF,12851980.0
2,Sha Tin,1,37,282,45,18,391,115,6,10,...,501891.4,3801891.0,3411503.0,4082155.0,3662088.0,XGB,XGB,CatBoost,CatBoost,3746829.0
3,Central and Western,3,48,737,14,9,760,145,25,7,...,25164.39,10654840.0,10797545.0,11854820.0,11252190.0,LGBM,LGBM,RF,CatBoost,11104170.0
4,Kwai Tsing,2,39,381,41,19,484,112,5,6,...,1227934.0,4127934.0,4054065.0,4219507.0,3968541.0,RF,RF,RF,RF,3968541.0


   Model       R²  MAPE (%)  MdAPE (%)
    LGBM 0.922421  9.858898   7.229400
     XGB 0.919888 10.167304   7.583526
CatBoost 0.923766  9.959270   7.389122
      RF 0.913071  9.786598   6.582805
Ensemble 0.931690  8.474010   5.712131


In [111]:
from sklearn.metrics import r2_score, mean_absolute_percentage_error, median_absolute_error
import numpy as np
import pandas as pd

def calculate_mdape(y_true, y_pred):
    return np.median(np.abs((y_true - y_pred) / y_true)) * 100

# Filter data to only include the test dataset
test_df = combined_df[combined_df['dataset'] == 'test']

models = {
    'LGBM': 'lgbm_predicted',
    'XGB': 'xgb_predicted',
    'CatBoost': 'catboost_predicted',
    'RF': 'rf_predicted',
    'Ensemble': 'ensemble_predicted'
}

results = []

for model_name, pred_col in models.items():
    r2 = r2_score(test_df['actual_price'], test_df[pred_col])
    mape = mean_absolute_percentage_error(test_df['actual_price'], test_df[pred_col]) * 100
    mdape = calculate_mdape(test_df['actual_price'], test_df[pred_col])
    
    results.append({
        'Model': model_name,
        'R²': r2,
        'MAPE (%)': mape,
        'MdAPE (%)': mdape
    })

results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))

   Model       R²  MAPE (%)  MdAPE (%)
    LGBM 0.842774 12.368314   8.582569
     XGB 0.845082 12.290859   8.743598
CatBoost 0.845423 12.402399   8.588082
      RF 0.837192 13.735604   9.023464
Ensemble 0.849676 12.287352   8.519365


In [114]:
from sklearn.metrics import r2_score, mean_absolute_percentage_error, median_absolute_error
import numpy as np
import pandas as pd

def calculate_mdape(y_true, y_pred):
    return np.median(np.abs((y_true - y_pred) / y_true)) * 100

# Filter data to only include the test dataset
test_df = combined_df[combined_df['dataset'] == 'test'].copy()

# Create price quartiles
test_df['price_quartile'] = pd.qcut(test_df['actual_price'], q=4, labels=['Q1', 'Q2', 'Q3', 'Q4'])

models = {
    'LGBM': 'lgbm_predicted',
    'XGB': 'xgb_predicted',
    'CatBoost': 'catboost_predicted',
    'RF': 'rf_predicted',
    'Ensemble': 'ensemble_predicted'
}

# Overall results
print("OVERALL PERFORMANCE:")
print("=" * 50)
results = []

for model_name, pred_col in models.items():
    r2 = r2_score(test_df['actual_price'], test_df[pred_col])
    mape = mean_absolute_percentage_error(test_df['actual_price'], test_df[pred_col]) * 100
    mdape = calculate_mdape(test_df['actual_price'], test_df[pred_col])
    
    results.append({
        'Model': model_name,
        'R²': r2,
        'MAPE (%)': mape,
        'MdAPE (%)': mdape
    })

results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))

# Error distribution across price quartiles
print("\n\nERROR DISTRIBUTION ACROSS PRICE QUARTILES:")
print("=" * 50)

# Show quartile ranges
quartile_ranges = test_df.groupby('price_quartile')['actual_price'].agg(['min', 'max', 'count'])
print("Price Quartile Ranges:")
print(quartile_ranges)

print("\nPerformance by Quartile:")
for quartile in ['Q1', 'Q2', 'Q3', 'Q4']:
    quartile_data = test_df[test_df['price_quartile'] == quartile]
    print(f"\n{quartile} ({len(quartile_data)} properties):")
    
    quartile_results = []
    for model_name, pred_col in models.items():
        r2 = r2_score(quartile_data['actual_price'], quartile_data[pred_col])
        mape = mean_absolute_percentage_error(quartile_data['actual_price'], quartile_data[pred_col]) * 100
        mdape = calculate_mdape(quartile_data['actual_price'], quartile_data[pred_col])
        
        quartile_results.append({
            'Model': model_name,
            'R²': r2,
            'MAPE (%)': mape,
            'MdAPE (%)': mdape
        })
    
    quartile_results_df = pd.DataFrame(quartile_results)
    print(quartile_results_df.to_string(index=False))

OVERALL PERFORMANCE:
   Model       R²  MAPE (%)  MdAPE (%)
    LGBM 0.842774 12.368314   8.582569
     XGB 0.845082 12.290859   8.743598
CatBoost 0.845423 12.402399   8.588082
      RF 0.837192 13.735604   9.023464
Ensemble 0.849676 12.287352   8.519365


ERROR DISTRIBUTION ACROSS PRICE QUARTILES:
Price Quartile Ranges:
                     min       max  count
price_quartile                           
Q1               1680000   5480000    308
Q2               5490000   7500000    317
Q3               7550000  10500000    301
Q4              10600000  48500000    306

Performance by Quartile:

Q1 (308 properties):
   Model        R²  MAPE (%)  MdAPE (%)
    LGBM -0.277309 16.761825  10.136453
     XGB -0.288110 16.597657  10.713573
CatBoost -0.174851 16.440753  10.611932
      RF -0.795768 20.025538  12.379256
Ensemble -0.324315 16.914691  10.650959

Q2 (317 properties):
   Model        R²  MAPE (%)  MdAPE (%)
    LGBM -1.310672  9.856102   7.469549
     XGB -1.062315  9.649193   6.99

In [112]:
# Check for missing values in predictions
print("Missing values in predictions:")
for model_name, pred_col in models.items():
    missing_count = test_df[pred_col].isna().sum()
    print(f"{model_name}: {missing_count} missing values")

# Check if predictions are reasonable
print("\nPrediction statistics:")
for model_name, pred_col in models.items():
    print(f"\n{model_name}:")
    print(f"  Min: {test_df[pred_col].min():,.0f}")
    print(f"  Max: {test_df[pred_col].max():,.0f}")
    print(f"  Mean: {test_df[pred_col].mean():,.0f}")

print(f"\nActual price statistics:")
print(f"  Min: {test_df['actual_price'].min():,.0f}")
print(f"  Max: {test_df['actual_price'].max():,.0f}")
print(f"  Mean: {test_df['actual_price'].mean():,.0f}")

# Check a sample of data
print("\nSample data (first 5 rows):")
sample_cols = ['actual_price'] + list(models.values())
print(test_df[sample_cols].head())

Missing values in predictions:
LGBM: 0 missing values
XGB: 0 missing values
CatBoost: 0 missing values
RF: 0 missing values
Ensemble: 0 missing values

Prediction statistics:

LGBM:
  Min: 2,511,342
  Max: 48,437,836
  Mean: 8,847,929

XGB:
  Min: 2,621,734
  Max: 49,212,504
  Mean: 8,850,445

CatBoost:
  Min: 2,745,827
  Max: 54,185,155
  Mean: 8,884,067

RF:
  Min: 2,799,780
  Max: 45,519,024
  Mean: 8,875,125

Ensemble:
  Min: 2,799,780
  Max: 54,185,155
  Mean: 8,877,571

Actual price statistics:
  Min: 1,680,000
  Max: 48,500,000
  Mean: 8,847,192

Sample data (first 5 rows):
      actual_price  lgbm_predicted  xgb_predicted  catboost_predicted  \
4924       8700000    8.181195e+06      8597520.0        8.867575e+06   
4925      23000000    1.989366e+07     20328726.0        1.916435e+07   
4926       7700000    6.030642e+06      6100254.5        6.194364e+06   
4927       5000000    4.399743e+06      4430923.5        4.510727e+06   
4928       3800000    4.638800e+06      5217821

In [84]:
import numpy as np
import pandas as pd
from scipy import stats

# ============================================================================
# UPDATED WorthinessScorer Class
# ============================================================================

class WorthinessScorer:
    def __init__(self):
        """Initialize the worthiness scorer with updated preference weights and mappings"""
        
        # District scores mapping (age_2 = "25-44", age_4 = "65+")
        self.district_scores = {
            'age_2': {  # 25-44
                'Central and Western': 1.000,
                'Wan Chai': 0.744,
                'Islands': 0.621,
                'Yau Tsim Mong': 0.602,
                'Southern': 0.586,
                'Eastern': 0.526,
                'Sha Tin': 0.396,
                'Kwun Tong': 0.394,
                'Tuen Mun': 0.394,
                'Tai Po': 0.386,
                'North': 0.384,
                'Yuen Long': 0.361,
                'Kowloon City': 0.335,
                'Kwai Tsing': 0.303,
                'Tsuen Wan': 0.286,
                'Sham Shui Po': 0.219,
                'Sai Kung': 0.201,
                'Wong Tai Sin': 0.000
            },
            'age_4': {  # 65+
                'Central and Western': 0.000,
                'Wan Chai': 0.256,
                'Islands': 0.379,
                'Yau Tsim Mong': 0.398,
                'Southern': 0.414,
                'Eastern': 0.474,
                'Sha Tin': 0.604,
                'Kwun Tong': 0.606,
                'Tuen Mun': 0.606,
                'Tai Po': 0.614,
                'North': 0.616,
                'Yuen Long': 0.639,
                'Kowloon City': 0.665,
                'Kwai Tsing': 0.697,
                'Tsuen Wan': 0.714,
                'Sham Shui Po': 0.781,
                'Sai Kung': 0.799,
                'Wong Tai Sin': 1.000
            }
        }
        
        # Walking time to MTR scores
        self.walking_time_scores = {
            'age_2': {  # 25-44
                0: 1.000, 1: 1.000, 2: 1.000, 3: 1.000, 4: 1.000, 5: 1.000,
                6: 1.000, 7: 1.000, 8: 1.000, 9: 1.000, 10: 1.000, 11: 1.000,
                12: 1.000, 13: 0.875, 14: 0.750, 15: 0.625, 16: 0.500,
                17: 0.375, 18: 0.250, 19: 0.125
            },
            'age_4': {  # 65+
                0: 0.800, 1: 0.800, 2: 0.800, 3: 0.800, 4: 0.800, 5: 0.800,
                6: 0.800, 7: 0.800, 8: 0.800, 9: 0.800, 10: 0.800, 11: 0.800,
                12: 0.800, 13: 0.675, 14: 0.550, 15: 0.425, 16: 0.300,
                17: 0.175, 18: 0.050, 19: 0.000
            }
        }
        
        # Bedroom count scores
        self.bedroom_scores = {
            'age_2': {  # 25-44
                'range_1_2': 0.522041,
                'range_3': 0.372007,
                'range_4': 0.890750,
                'range_5_plus': 0.771534
            },
            'age_4': {  # 65+
                'range_1_2': 0.620547,
                'range_3': 0.605019,
                'range_4': 0.044497,
                'range_5_plus': 0.266851
            }
        }
        
        # POI scores (MUF=Public_Market, SCH=Education, HNC=Medical)
        self.poi_scores = {
            'age_2': {  # 25-44
                'MUF': 0.029093,  # Municipal Facility (Public Market)
                'SCH': 0.665867,  # Educational
                'HNC': 1.000     # Health Care (Medical)
            },
            'age_4': {  # 65+
                'MUF': 0.890925,  # Municipal Facility (Public Market)
                'SCH': 0.300066,  # Educational
                'HNC': 0.101520   # Health Care (Medical)
            }
        }
        
        # Component weights for final multiplier
        # worthiness_multiplier = 0.08×bedroom + 0.02×area + 0.25×poi + 0.5×location + 0.15×walking_to_mtr
        self.component_weights = {
            'bedroom': 0.08,
            'area': 0.02,
            'poi': 0.25,
            'location': 0.50,
            'walking_to_mtr': 0.15
        }
        
        # Area benchmarks (keeping from original)
        self.benchmarks = {
            'saleable_area': {
                'median': 522.0,
                'q1': 429.0,
                'q3': 687.0,
                'mean': 558.09
            }
        }
    
    def _get_bedroom_score(self, bedroom_count, age_group):
        """Get bedroom score based on count and age group"""
        if bedroom_count <= 2:
            return self.bedroom_scores[age_group]['range_1_2']
        elif bedroom_count == 3:
            return self.bedroom_scores[age_group]['range_3']
        elif bedroom_count == 4:
            return self.bedroom_scores[age_group]['range_4']
        else:  # 5+
            return self.bedroom_scores[age_group]['range_5_plus']
    
    def _get_district_score(self, district, age_group):
        """Get district score based on district name and age group"""
        return self.district_scores[age_group].get(district, 0.0)
    
    def _get_walking_time_score(self, walking_time, age_group):
        """Get walking time score based on minutes to MTR"""
        walking_time = int(walking_time) if not pd.isna(walking_time) else 20
        
        if walking_time >= 20:
            return 0.0
        else:
            return self.walking_time_scores[age_group].get(walking_time, 0.0)
    
    def _calculate_bedroom_multiplier(self, bedroom_count, age_group):
        """Calculate bedroom preference multiplier"""
        score = self._get_bedroom_score(bedroom_count, age_group)
        # Normalize around 1.0 (using average of all bedroom scores as baseline)
        avg_score = np.mean(list(self.bedroom_scores[age_group].values()))
        multiplier = score / avg_score
        return multiplier
    
    def _calculate_area_multiplier(self, saleable_area, age_group):
        """Calculate area importance multiplier"""
        area_median = self.benchmarks['saleable_area']['median']
        area_ratio = saleable_area / area_median
        
        # Simple area multiplier: larger areas get slight bonus, smaller get slight penalty
        multiplier = 0.8 + (area_ratio * 0.4)  # Range roughly 0.8 to 1.2
        return max(0.5, min(1.5, multiplier))  # Cap between 0.5 and 1.5
    
    def _calculate_poi_multiplier(self, row, age_group):
        """Calculate POI accessibility multiplier"""
        # Get POI counts
        public_market_count = row.get('category_Public_Market_within_1000m', 0)
        education_count = row.get('category_Education_within_2000m', 0)
        medical_count = row.get('category_Medical_within_2000m', 0)
        
        # Get POI scores for age group
        muf_score = self.poi_scores[age_group]['MUF']
        sch_score = self.poi_scores[age_group]['SCH']
        hnc_score = self.poi_scores[age_group]['HNC']
        
        # Normalize POI counts (cap at reasonable levels)
        public_market_norm = min(public_market_count / 5, 1.0)
        education_norm = min(education_count / 10, 1.0)
        medical_norm = min(medical_count / 5, 1.0)
        
        # Calculate weighted POI score
        poi_score = (
            muf_score * public_market_norm +
            sch_score * education_norm +
            hnc_score * medical_norm
        )
        
        # Normalize to multiplier range (0.7 to 1.3)
        max_possible_score = max(muf_score, sch_score, hnc_score)
        normalized_score = poi_score / max_possible_score if max_possible_score > 0 else 0
        multiplier = 0.7 + (normalized_score * 0.6)
        
        return multiplier
    
    def _calculate_location_multiplier(self, district, age_group):
        """Calculate location quality multiplier based on district"""
        district_score = self._get_district_score(district, age_group)
        # Convert district score to multiplier (0.8 to 1.2 range)
        multiplier = 0.8 + (district_score * 0.4)
        return multiplier
    
    def _calculate_walking_multiplier(self, walking_time, age_group):
        """Calculate walking time to MTR multiplier"""
        walking_score = self._get_walking_time_score(walking_time, age_group)
        # Convert walking score to multiplier (0.8 to 1.2 range)
        multiplier = 0.8 + (walking_score * 0.4)
        return multiplier
    
    def calculate_worthiness_score(self, row, age_group):
        """Calculate worthiness score for a property"""
        
        # Use ensemble_predicted as base price
        if 'ensemble_predicted' not in row.index or pd.isna(row['ensemble_predicted']):
            raise ValueError("ensemble_predicted column missing or null")
        
        base_price = row['ensemble_predicted']
        actual_price = row['actual_price']
        
        # Calculate all multipliers
        bedroom_mult = self._calculate_bedroom_multiplier(row['bedroom_count'], age_group)
        area_mult = self._calculate_area_multiplier(row['saleable_area'], age_group)
        poi_mult = self._calculate_poi_multiplier(row, age_group)
        location_mult = self._calculate_location_multiplier(row['district'], age_group)
        walking_mult = self._calculate_walking_multiplier(row['walking_time_to_mtr'], age_group)
        
        # Combined multiplier using new weights
        # worthiness_multiplier = 0.08×bedroom + 0.02×area + 0.25×poi + 0.5×location + 0.15×walking_to_mtr
        combined_multiplier = (
            self.component_weights['bedroom'] * bedroom_mult +
            self.component_weights['area'] * area_mult +
            self.component_weights['poi'] * poi_mult +
            self.component_weights['location'] * location_mult +
            self.component_weights['walking_to_mtr'] * walking_mult
        )
        
        worthiness_price = base_price * combined_multiplier
        
        return {
            'age_group': age_group,
            'actual_price': actual_price,
            'base_price': base_price,
            'worthiness_price': worthiness_price,
            'worthiness_multiplier': combined_multiplier,
            'bedroom_multiplier': bedroom_mult,
            'area_multiplier': area_mult,
            'poi_multiplier': poi_mult,
            'location_multiplier': location_mult,
            'walking_multiplier': walking_mult,
            'price_difference_vs_base': worthiness_price - base_price,
            'price_difference_vs_base_pct': ((worthiness_price / base_price) - 1) * 100,
        }


# ============================================================================
# STEP 2: Define MultiplierAnalyzer Class (unchanged)
# ============================================================================

class MultiplierAnalyzer:
    """Analyze worthiness multiplier distributions before creating scores"""
    
    def __init__(self, scorer):
        self.scorer = scorer
    
    def calculate_all_multipliers(self, df):
        """Calculate worthiness multipliers for all properties in dataframe"""
        results = []
        
        print("Calculating multipliers for all properties...")
        print(f"Total properties: {len(df)}")
        total = len(df)
        errors = 0
        
        for idx, row in df.iterrows():
            try:
                age_2_result = self.scorer.calculate_worthiness_score(row, 'age_2')
                age_4_result = self.scorer.calculate_worthiness_score(row, 'age_4')
                
                results.append({
                    'index': idx,
                    'age_2_multiplier': age_2_result['worthiness_multiplier'],
                    'age_2_worthiness_price': age_2_result['worthiness_price'],
                    'age_2_premium_pct': age_2_result['price_difference_vs_base_pct'],
                    'age_4_multiplier': age_4_result['worthiness_multiplier'],
                    'age_4_worthiness_price': age_4_result['worthiness_price'],
                    'age_4_premium_pct': age_4_result['price_difference_vs_base_pct'],
                    'better_for': 'age_2' if age_2_result['worthiness_price'] > age_4_result['worthiness_price'] else 'age_4',
                    'worthiness_gap': abs(age_2_result['worthiness_price'] - age_4_result['worthiness_price'])
                })
                
                if len(results) % 500 == 0:
                    print(f"  Processed {len(results)}/{total} properties ({len(results)/total*100:.1f}%)...")
                    
            except Exception as e:
                errors += 1
                if errors <= 5:  # Only print first 5 errors
                    print(f"  ⚠️  Error at index {idx}: {str(e)[:50]}")
                continue
        
        if errors > 5:
            print(f"  ⚠️  ... and {errors - 5} more errors (total: {errors} errors)")
        
        print(f"✅ Completed! Processed {len(results)}/{total} properties (success rate: {len(results)/total*100:.1f}%)")
        
        return pd.DataFrame(results)
    
    def print_distribution_analysis(self, multipliers_df):
        """Print detailed distribution analysis for multipliers"""
        
        if len(multipliers_df) == 0:
            print("❌ No valid multipliers calculated!")
            return
        
        print("\n" + "="*100)
        print("WORTHINESS MULTIPLIER DISTRIBUTION ANALYSIS")
        print("="*100)
        
        for age_group in ['age_2', 'age_4']:
            col_name = f'{age_group}_multiplier'
            
            if col_name not in multipliers_df.columns:
                print(f"❌ Column {col_name} not found!")
                continue
                
            multipliers = multipliers_df[col_name].dropna()
            
            if len(multipliers) == 0:
                print(f"❌ No valid multipliers for {age_group}!")
                continue
            
            print(f"\n{'='*100}")
            print(f"AGE GROUP: {age_group.upper()} ({'25-44' if age_group == 'age_2' else '65+'})")
            print(f"{'='*100}")
            
            # Basic statistics
            print(f"\n📊 SUMMARY STATISTICS:")
            print(f"  • Count:        {len(multipliers):>10,}")
            print(f"  • Mean:         {multipliers.mean():>10.4f}")
            print(f"  • Std Dev:      {multipliers.std():>10.4f}")
            print(f"  • Min:          {multipliers.min():>10.4f}")
            print(f"  • Max:          {multipliers.max():>10.4f}")
            print(f"  • Range:        {multipliers.max() - multipliers.min():>10.4f}")
            
            # Percentile analysis
            percentiles = [0, 5, 10, 25, 50, 75, 90, 95, 100]
            values = np.percentile(multipliers, percentiles)
            
            print(f"\n📈 PERCENTILE BREAKDOWN:")
            print(f"  {'Percentile':<15} {'Multiplier':<15} {'Interpretation':<40}")
            print(f"  {'-'*70}")
            
            interpretations = [
                "Worst fit (minimum)",
                "Bottom 5% - Very poor fit",
                "Bottom 10% - Poor fit",
                "Q1 - Below average",
                "Median - Average fit",
                "Q3 - Above average",
                "Top 10% - Very good fit",
                "Top 5% - Excellent fit",
                "Best fit (maximum)"
            ]
            
            for p, v, interp in zip(percentiles, values, interpretations):
                print(f"  {str(p)+'th':<15} {v:<15.4f} {interp:<40}")
            
            # Distribution by ranges
            print(f"\n📊 DISTRIBUTION BY RANGE:")
            ranges = [
                (0, 0.85, "Very Poor (<0.85)"),
                (0.85, 0.90, "Poor (0.85-0.90)"),
                (0.90, 0.95, "Below Average (0.90-0.95)"),
                (0.95, 1.00, "Slightly Below (0.95-1.00)"),
                (1.00, 1.05, "Slightly Above (1.00-1.05)"),
                (1.05, 1.10, "Above Average (1.05-1.10)"),
                (1.10, 1.15, "Good (1.10-1.15)"),
                (1.15, 2.00, "Excellent (>1.15)")
            ]
            
            print(f"  {'Range':<30} {'Count':<10} {'Percentage':<15} {'Cumulative %':<15}")
            print(f"  {'-'*70}")
            
            cumulative = 0
            for low, high, label in ranges:
                count = ((multipliers >= low) & (multipliers < high)).sum()
                pct = (count / len(multipliers)) * 100
                cumulative += pct
                print(f"  {label:<30} {count:<10,} {pct:<14.2f}% {cumulative:<14.2f}%")
        
        # Comparison
        print(f"\n{'='*100}")
        print("COMPARISON: AGE_2 (25-44) vs AGE_4 (65+)")
        print(f"{'='*100}")
        
        age_2_mult = multipliers_df['age_2_multiplier']
        age_4_mult = multipliers_df['age_4_multiplier']
        
        print(f"\n📊 MEAN COMPARISON:")
        print(f"  • age_2 (25-44) mean:   {age_2_mult.mean():.4f}")
        print(f"  • age_4 (65+) mean:     {age_4_mult.mean():.4f}")
        print(f"  • Difference:           {age_2_mult.mean() - age_4_mult.mean():.4f}")
        
        correlation = multipliers_df['age_2_multiplier'].corr(multipliers_df['age_4_multiplier'])
        print(f"\n📊 CORRELATION:")
        print(f"  • Correlation coefficient: {correlation:.4f}")
        
        better_for_counts = multipliers_df['better_for'].value_counts()
        print(f"\n📊 WHICH AGE GROUP VALUES PROPERTIES MORE:")
        for age, count in better_for_counts.items():
            pct = (count / len(multipliers_df)) * 100
            age_label = "25-44" if age == "age_2" else "65+"
            print(f"  • Better for {age} ({age_label}): {count:>6,} properties ({pct:>5.1f}%)")
        
        print(f"\n{'='*100}")
    
    def suggest_score_tiers(self, multipliers_df):
        """Suggest tier boundaries based on actual data distribution"""
        print("\n" + "="*100)
        print("SUGGESTED SCORE TIERS")
        print("="*100)
        
        for age_group in ['age_2', 'age_4']:
            col_name = f'{age_group}_multiplier'
            multipliers = multipliers_df[col_name].dropna()
            age_label = "25-44" if age_group == "age_2" else "65+"
            
            print(f"\n{'─'*100}")
            print(f"AGE GROUP: {age_group.upper()} ({age_label})")
            print(f"{'─'*100}")
            
            p10 = np.percentile(multipliers, 10)
            p40 = np.percentile(multipliers, 40)
            p60 = np.percentile(multipliers, 60)
            p75 = np.percentile(multipliers, 75)
            p90 = np.percentile(multipliers, 90)
            
            print(f"\n💡 RECOMMENDED TIER STRUCTURE:")
            print(f"\n  {'Tier':<20} {'Score Range':<15} {'Multiplier Range':<30}")
            print(f"  {'-'*65}")
            print(f"  {'Poor Fit':<20} {'1-20':<15} {'< {:.4f}':<30}".format(p10))
            print(f"  {'Below Average':<20} {'21-40':<15} {'{:.4f} - {:.4f}':<30}".format(p10, p40))
            print(f"  {'Average':<20} {'41-60':<15} {'{:.4f} - {:.4f}':<30}".format(p40, p60))
            print(f"  {'Above Average':<20} {'61-75':<15} {'{:.4f} - {:.4f}':<30}".format(p60, p75))
            print(f"  {'Good Fit':<20} {'76-90':<15} {'{:.4f} - {:.4f}':<30}".format(p75, p90))
            print(f"  {'Excellent Fit':<20} {'91-100':<15} {'> {:.4f}':<30}".format(p90))
        
        print(f"\n{'='*100}")


# ============================================================================
# STEP 3: Run Analysis Function (unchanged)
# ============================================================================

def run_multiplier_analysis(df, scorer):
    """Run complete multiplier analysis"""
    analyzer = MultiplierAnalyzer(scorer)
    multipliers_df = analyzer.calculate_all_multipliers(df)
    analyzer.print_distribution_analysis(multipliers_df)
    analyzer.suggest_score_tiers(multipliers_df)
    return multipliers_df

In [100]:
combined_df_filtered.columns

Index(['district', 'bedroom_count', 'property_age', 'saleable_area',
       'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m',
       'category_Education_within_2000m', 'category_Medical_within_2000m',
       'category_Public_Market_within_1000m', 'pet_policy_binary', 'dataset',
       'actual_price', 'lgb_predicted_price', 'prediction_error',
       'absolute_error', 'lgbm_predicted', 'xgb_predicted',
       'catboost_predicted', 'rf_predicted', 'LGBM_predict', 'XGB_predict',
       'CB_predict', 'RF_predict', 'ensemble_predicted'],
      dtype='object')

In [92]:
# Get districts used in WorthinessScorer
scorer_districts = [
    'Central and Western',
    'Wan Chai',
    'Islands',
    'Yau Tsim Mong',
    'Southern',
    'Eastern',
    'Sha Tin',
    'Kwun Tong',
    'Tuen Mun',
    'Tai Po',
    'North',
    'Yuen Long',
    'Kowloon City',
    'Kwai Tsing',
    'Tsuen Wan',
    'Sham Shui Po',
    'Sai Kung',
    'Wong Tai Sin'
]

# Filter combined_df to only include scorer districts
combined_df_filtered = combined_df[combined_df['district'].isin(scorer_districts)].copy()

print(f"Original: {len(combined_df):,} properties")
print(f"Filtered: {len(combined_df_filtered):,} properties")

Original: 6,156 properties
Filtered: 267 properties


In [103]:
combined_df_filtered.head()

Unnamed: 0,district,bedroom_count,property_age,saleable_area,travel_time_to_cbd,walking_time_to_mtr,total_poi_within_1000m,category_Education_within_2000m,category_Medical_within_2000m,category_Public_Market_within_1000m,...,absolute_error,lgbm_predicted,xgb_predicted,catboost_predicted,rf_predicted,LGBM_predict,XGB_predict,CB_predict,RF_predict,ensemble_predicted
0,Kowloon City,3,36,978,26,16,838,484,64,3,...,128949.1,12371050.0,12540288.0,14222190.0,12491920.0,RF,RF,RF,RF,12491920.0
1,Wan Chai,3,40,750,21,9,797,164,27,5,...,282398.9,13282400.0,12856448.0,12395820.0,12847520.0,XGB,XGB,RF,RF,12851980.0
2,Sha Tin,1,37,282,45,18,391,115,6,10,...,501891.4,3801891.0,3411503.0,4082155.0,3662088.0,XGB,XGB,CatBoost,CatBoost,3746829.0
3,Central and Western,3,48,737,14,9,760,145,25,7,...,25164.39,10654840.0,10797545.0,11854820.0,11252190.0,LGBM,LGBM,RF,CatBoost,11104170.0
4,Kwai Tsing,2,39,381,41,19,484,112,5,6,...,1227934.0,4127934.0,4054065.0,4219507.0,3968541.0,RF,RF,RF,RF,3968541.0


In [101]:
scorer = WorthinessScorer()
multipliers_df = run_multiplier_analysis(combined_df_filtered, scorer)

Calculating multipliers for all properties...
Total properties: 6156
  Processed 500/6156 properties (8.1%)...
  Processed 1000/6156 properties (16.2%)...
  Processed 1500/6156 properties (24.4%)...
  Processed 2000/6156 properties (32.5%)...
  Processed 2500/6156 properties (40.6%)...
  Processed 3000/6156 properties (48.7%)...
  Processed 3500/6156 properties (56.9%)...
  Processed 4000/6156 properties (65.0%)...
  Processed 4500/6156 properties (73.1%)...
  Processed 5000/6156 properties (81.2%)...
  Processed 5500/6156 properties (89.3%)...
  Processed 6000/6156 properties (97.5%)...
✅ Completed! Processed 6156/6156 properties (success rate: 100.0%)

WORTHINESS MULTIPLIER DISTRIBUTION ANALYSIS

AGE GROUP: AGE_2 (25-44)

📊 SUMMARY STATISTICS:
  • Count:             6,156
  • Mean:             1.1459
  • Std Dev:          0.0739
  • Min:              0.8130
  • Max:              1.3495
  • Range:            0.5365

📈 PERCENTILE BREAKDOWN:
  Percentile      Multiplier      Interpretat

In [102]:
# Print percentiles for both age groups
print("="*100)
print("DETAILED PERCENTILE ANALYSIS (Every 10th Percentile)")
print("="*100)

for age_group in ['age_2', 'age_4']:
    col_name = f'{age_group}_multiplier'
    multipliers = multipliers_df[col_name].dropna()
    
    print(f"\n{'─'*100}")
    print(f"AGE GROUP: {age_group.upper()}")
    print(f"{'─'*100}")
    
    print(f"\n{'Percentile':<15} {'Multiplier':<15} {'Premium/Discount':<20}")
    print(f"{'-'*50}")
    
    # Calculate percentiles from 0 to 100 in steps of 10
    for percentile in range(0, 101, 10):
        value = np.percentile(multipliers, percentile)
        premium_pct = (value - 1.0) * 100
        
        print(f"{str(percentile)+'th':<15} {value:<15.4f} {premium_pct:>+6.2f}%")

# Print combined (both age groups together)
print(f"\n{'='*100}")
print("COMBINED (BOTH AGE GROUPS)")
print(f"{'='*100}")

all_multipliers = pd.concat([
    multipliers_df['age_2_multiplier'],
    multipliers_df['age_4_multiplier']
])

print(f"\n{'Percentile':<15} {'Multiplier':<15} {'Premium/Discount':<20}")
print(f"{'-'*50}")

for percentile in range(0, 101, 10):
    value = np.percentile(all_multipliers, percentile)
    premium_pct = (value - 1.0) * 100
    
    print(f"{str(percentile)+'th':<15} {value:<15.4f} {premium_pct:>+6.2f}%")

print(f"\n{'='*100}")

DETAILED PERCENTILE ANALYSIS (Every 10th Percentile)

────────────────────────────────────────────────────────────────────────────────────────────────────
AGE GROUP: AGE_2
────────────────────────────────────────────────────────────────────────────────────────────────────

Percentile      Multiplier      Premium/Discount    
--------------------------------------------------
0th             0.8130          -18.70%
10th            1.0749           +7.49%
20th            1.1137          +11.37%
30th            1.1283          +12.83%
40th            1.1393          +13.93%
50th            1.1453          +14.53%
60th            1.1583          +15.83%
70th            1.1699          +16.99%
80th            1.1916          +19.16%
90th            1.2303          +23.03%
100th           1.3495          +34.95%

────────────────────────────────────────────────────────────────────────────────────────────────────
AGE GROUP: AGE_4
───────────────────────────────────────────────────────────────

In [105]:
import numpy as np
import pandas as pd
from scipy.stats import percentileofscore

# ============================================================================
# STEP 1: Calculate Unified Percentile Thresholds (UPDATED)
# ============================================================================

def calculate_unified_thresholds(multipliers_df):
    """
    Calculate unified percentile thresholds from combined age group data
    
    Returns:
        dict: Percentile thresholds for scoring
    """
    # Combine multipliers from both age groups
    all_multipliers = pd.concat([
        multipliers_df['age_2_multiplier'],
        multipliers_df['age_4_multiplier']
    ])
    
    # Use your actual benchmark data
    actual_thresholds = {
        0: 0.8130,
        10: 1.0756,
        20: 1.1106,
        30: 1.1306,
        40: 1.1439,
        50: 1.1595,
        60: 1.1760,
        70: 1.2033,
        80: 1.2310,
        90: 1.2648,
        100: 1.3495
    }
    
    print("\n" + "="*100)
    print("UNIFIED PERCENTILE THRESHOLDS (Combined Age Groups) - ACTUAL DATA")
    print("="*100)
    print(f"\n{'Percentile':<15} {'Multiplier':<15} {'Premium/Discount':<20}")
    print(f"{'-'*50}")
    
    for percentile in [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
        value = actual_thresholds[percentile]
        premium_pct = (value - 1.0) * 100
        print(f"{str(percentile)+'th':<15} {value:<15.4f} {premium_pct:>+6.2f}%")
    
    print("="*100)
    
    return all_multipliers, actual_thresholds


# ============================================================================
# STEP 2: Convert Multiplier to Score (1-100) - UPDATED
# ============================================================================

def multiplier_to_score(multiplier, thresholds):
    """
    Convert multiplier to fine-grained score (1-100) based on actual percentile thresholds
    
    Args:
        multiplier: The worthiness multiplier for a property
        thresholds: Dictionary with actual percentile thresholds
    
    Returns:
        int: Score from 1 to 100
    """
    # Handle edge cases
    if multiplier <= thresholds[0]:
        return 1
    if multiplier >= thresholds[100]:
        return 100
    
    # Find which percentile range the multiplier falls into
    percentile_points = sorted(thresholds.keys())
    
    for i in range(len(percentile_points) - 1):
        lower_percentile = percentile_points[i]
        upper_percentile = percentile_points[i + 1]
        
        lower_value = thresholds[lower_percentile]
        upper_value = thresholds[upper_percentile]
        
        if lower_value <= multiplier < upper_value:
            # Linear interpolation between the two percentiles
            ratio = (multiplier - lower_value) / (upper_value - lower_value)
            score = lower_percentile + ratio * (upper_percentile - lower_percentile)
            return max(1, min(100, int(round(score))))
    
    # If we get here, multiplier is exactly at the highest threshold
    return 100


# ============================================================================
# STEP 3: Generate Complete Assessment DataFrame (UPDATED)
# ============================================================================

def generate_age_assessment(combined_df, scorer, multipliers_df, thresholds):
    """
    Generate complete age assessment for all properties
    
    Args:
        combined_df: Original dataframe with all property data
        scorer: WorthinessScorer instance
        multipliers_df: DataFrame with pre-calculated multipliers
        thresholds: Dictionary with actual percentile thresholds
    
    Returns:
        DataFrame: Original columns + age assessment columns
    """
    print("\n" + "="*100)
    print("GENERATING AGE ASSESSMENT FOR ALL PROPERTIES")
    print("="*100)
    
    results = []
    total = len(combined_df)
    errors = 0
    
    print(f"\nProcessing {total} properties...")
    
    for idx, row in combined_df.iterrows():
        try:
            # Calculate worthiness for both age groups
            age_2_result = scorer.calculate_worthiness_score(row, 'age_2')
            age_4_result = scorer.calculate_worthiness_score(row, 'age_4')
            
            # Convert multipliers to scores (1-100) using actual thresholds
            age_2_score = multiplier_to_score(
                age_2_result['worthiness_multiplier'], 
                thresholds
            )
            age_4_score = multiplier_to_score(
                age_4_result['worthiness_multiplier'], 
                thresholds
            )
            
            # Determine which age group values it more
            if age_2_result['worthiness_price'] > age_4_result['worthiness_price']:
                better_for = 'age_2'
            elif age_4_result['worthiness_price'] > age_2_result['worthiness_price']:
                better_for = 'age_4'
            else:
                better_for = 'equal'
            
            # Create result dictionary with all columns
            result = row.to_dict()
            
            # Add age_2 columns
            result['age_2_worthiness_price'] = age_2_result['worthiness_price']
            result['age_2_worthiness_score'] = age_2_score
            result['age_2_worthiness_multiplier'] = age_2_result['worthiness_multiplier']
            
            # Add age_4 columns
            result['age_4_worthiness_price'] = age_4_result['worthiness_price']
            result['age_4_worthiness_score'] = age_4_score
            result['age_4_worthiness_multiplier'] = age_4_result['worthiness_multiplier']
            
            # Add comparison column
            result['better_for'] = better_for
            
            results.append(result)
            
            if len(results) % 500 == 0:
                print(f"  Processed {len(results)}/{total} properties ({len(results)/total*100:.1f}%)...")
                
        except Exception as e:
            errors += 1
            if errors <= 5:
                print(f"  ⚠️  Error at index {idx}: {str(e)[:100]}")
            continue
    
    if errors > 5:
        print(f"  ⚠️  ... and {errors - 5} more errors (total: {errors} errors)")
    
    print(f"\n✅ Completed! Successfully processed {len(results)}/{total} properties")
    
    # Create DataFrame
    assessment_df = pd.DataFrame(results)
    
    return assessment_df


# ============================================================================
# STEP 4: Main Execution Function (FIXED)
# ============================================================================

def create_age_assessment_csv(combined_df, scorer, multipliers_df, output_path):
    """
    Complete pipeline to create age assessment CSV
    
    Args:
        combined_df: Input dataframe with property data
        scorer: WorthinessScorer instance
        multipliers_df: Pre-calculated multipliers (from run_multiplier_analysis)
        output_path: Path to save CSV file
    """
    print("\n" + "="*100)
    print("AGE ASSESSMENT PIPELINE")
    print("="*100)
    
    # Step 1: Calculate unified thresholds using actual data
    all_multipliers, thresholds = calculate_unified_thresholds(multipliers_df)
    
    # Step 2: Generate assessment dataframe
    assessment_df = generate_age_assessment(
        combined_df, 
        scorer, 
        multipliers_df, 
        thresholds  # Pass thresholds instead of all_multipliers
    )
    
    # Step 3: Save to CSV
    print(f"\n💾 Saving to: {output_path}")
    assessment_df.to_csv(output_path, index=False)
    
    # Step 4: Print summary statistics
    print("\n" + "="*100)
    print("SUMMARY STATISTICS")
    print("="*100)
    
    print(f"\n📊 DATAFRAME INFO:")
    print(f"  • Total properties: {len(assessment_df):,}")
    print(f"  • Total columns: {len(assessment_df.columns)}")
    
    print(f"\n📊 NEW COLUMNS ADDED:")
    new_cols = [
        'age_2_worthiness_price',
        'age_2_worthiness_score',
        'age_2_worthiness_multiplier',
        'age_4_worthiness_price',
        'age_4_worthiness_score',
        'age_4_worthiness_multiplier',
        'better_for'
    ]
    for col in new_cols:
        print(f"  • {col}")
    
    print(f"\n📊 SCORE DISTRIBUTION:")
    print(f"\n  AGE_2 Scores:")
    print(f"    • Min:    {assessment_df['age_2_worthiness_score'].min()}")
    print(f"    • Mean:   {assessment_df['age_2_worthiness_score'].mean():.1f}")
    print(f"    • Median: {assessment_df['age_2_worthiness_score'].median():.0f}")
    print(f"    • Max:    {assessment_df['age_2_worthiness_score'].max()}")
    
    print(f"\n  AGE_4 Scores:")
    print(f"    • Min:    {assessment_df['age_4_worthiness_score'].min()}")
    print(f"    • Mean:   {assessment_df['age_4_worthiness_score'].mean():.1f}")
    print(f"    • Median: {assessment_df['age_4_worthiness_score'].median():.0f}")
    print(f"    • Max:    {assessment_df['age_4_worthiness_score'].max()}")
    
    print(f"\n📊 BETTER FOR DISTRIBUTION:")
    better_for_counts = assessment_df['better_for'].value_counts()
    for group, count in better_for_counts.items():
        pct = (count / len(assessment_df)) * 100
        print(f"  • {group:10s}: {count:>6,} properties ({pct:>5.1f}%)")
    
    # FIXED: Correct bin edges and labels
    print(f"\n📊 SCORE TIER DISTRIBUTION (using actual thresholds):")
    
    # Fixed: 6 bin edges for 5 labels
    tier_bins = [0, 20, 40, 60, 80, 100]  # 6 edges
    tier_labels = ['Poor (1-20)', 'Below Avg (21-40)', 'Average (41-60)', 'Above Avg (61-80)', 'Excellent (81-100)']  # 5 labels
    
    print(f"\n  AGE_2 Score Tiers:")
    age_2_tiers = pd.cut(
        assessment_df['age_2_worthiness_score'],
        bins=tier_bins,
        labels=tier_labels
    )
    for tier, count in age_2_tiers.value_counts().sort_index().items():
        pct = (count / len(assessment_df)) * 100
        print(f"  • {tier:25s}: {count:>6,} ({pct:>5.1f}%)")
    
    print(f"\n  AGE_4 Score Tiers:")
    age_4_tiers = pd.cut(
        assessment_df['age_4_worthiness_score'],
        bins=tier_bins,
        labels=tier_labels
    )
    for tier, count in age_4_tiers.value_counts().sort_index().items():
        pct = (count / len(assessment_df)) * 100
        print(f"  • {tier:25s}: {count:>6,} ({pct:>5.1f}%)")
    
    print(f"\n📊 MULTIPLIER vs SCORE VALIDATION:")
    print(f"\n  Sample mappings (first 10 properties):")
    print(f"  {'Age_2 Multiplier':<18} {'Age_2 Score':<12} {'Age_4 Multiplier':<18} {'Age_4 Score':<12}")
    print(f"  {'-'*60}")
    for i in range(min(10, len(assessment_df))):
        print(f"  {assessment_df.iloc[i]['age_2_worthiness_multiplier']:<18.4f} "
              f"{assessment_df.iloc[i]['age_2_worthiness_score']:<12} "
              f"{assessment_df.iloc[i]['age_4_worthiness_multiplier']:<18.4f} "
              f"{assessment_df.iloc[i]['age_4_worthiness_score']:<12}")
    
    print("\n" + "="*100)
    print("✅ AGE ASSESSMENT CSV CREATED SUCCESSFULLY!")
    print("="*100)
    print(f"\nFile saved to: {output_path}")
    print(f"\nSample of first 5 rows:")
    print(assessment_df[['ensemble_predicted', 'actual_price', 
                         'age_2_worthiness_price', 'age_2_worthiness_score',
                         'age_4_worthiness_price', 'age_4_worthiness_score',
                         'better_for']].head())
    
    return assessment_df


# ============================================================================
# READY TO USE!
# ============================================================================

print("✅ Age Assessment Pipeline Ready! (Fixed bin edges)")
print("\nUsage:")
print("  assessment_df = create_age_assessment_csv(")
print("      combined_df=combined_df,")
print("      scorer=scorer,")
print("      multipliers_df=multipliers_df,")
print("      output_path='/Users/clarencemarvin/Downloads/regularized/age_assessment.csv'")
print("  )")

✅ Age Assessment Pipeline Ready! (Fixed bin edges)

Usage:
  assessment_df = create_age_assessment_csv(
      combined_df=combined_df,
      scorer=scorer,
      multipliers_df=multipliers_df,
      output_path='/Users/clarencemarvin/Downloads/regularized/age_assessment.csv'
  )


In [83]:
assessment_df.columns

Index(['district', 'bedroom_count', 'property_age', 'saleable_area',
       'travel_time_to_cbd', 'walking_time_to_mtr', 'total_poi_within_1000m',
       'category_Education_within_2000m', 'category_Medical_within_2000m',
       'category_Public_Market_within_1000m', 'pet_policy_binary', 'dataset',
       'actual_price', 'lgb_predicted_price', 'prediction_error',
       'absolute_error', 'lgbm_predicted', 'xgb_predicted',
       'catboost_predicted', 'rf_predicted', 'LGBM_predict', 'XGB_predict',
       'CB_predict', 'RF_predict', 'ensemble_predicted',
       'age_2_worthiness_price', 'age_2_worthiness_score',
       'age_2_worthiness_multiplier', 'age_4_worthiness_price',
       'age_4_worthiness_score', 'age_4_worthiness_multiplier', 'better_for'],
      dtype='object')

In [90]:
assessment_df = create_age_assessment_csv(
    combined_df=combined_df,
    scorer=scorer,
    multipliers_df=multipliers_df,
    output_path='/Users/clarencemarvin/Downloads/regularized/age_assessment.csv'
)
assessment_df.head()


AGE ASSESSMENT PIPELINE

UNIFIED PERCENTILE THRESHOLDS (Combined Age Groups) - ACTUAL DATA

Percentile      Multiplier      Premium/Discount    
--------------------------------------------------
0th             0.7692          -23.08%
10th            0.9690           -3.10%
20th            1.0223           +2.23%
30th            1.0492           +4.92%
40th            1.0731           +7.31%
50th            1.0829           +8.29%
60th            1.0949           +9.49%
70th            1.0978           +9.78%
80th            1.1107          +11.07%
90th            1.1136          +11.36%
100th           1.3166          +31.66%

GENERATING AGE ASSESSMENT FOR ALL PROPERTIES

Processing 6156 properties...
  Processed 500/6156 properties (8.1%)...
  Processed 1000/6156 properties (16.2%)...
  Processed 1500/6156 properties (24.4%)...
  Processed 2000/6156 properties (32.5%)...
  Processed 2500/6156 properties (40.6%)...
  Processed 3000/6156 properties (48.7%)...
  Processed 3500/6156 pr

Unnamed: 0,district,bedroom_count,property_age,saleable_area,travel_time_to_cbd,walking_time_to_mtr,total_poi_within_1000m,category_Education_within_2000m,category_Medical_within_2000m,category_Public_Market_within_1000m,...,CB_predict,RF_predict,ensemble_predicted,age_2_worthiness_price,age_2_worthiness_score,age_2_worthiness_multiplier,age_4_worthiness_price,age_4_worthiness_score,age_4_worthiness_multiplier,better_for
0,Ho Man Tin,3,36,978,26,16,838,484,64,3,...,RF,RF,12491920.0,13167300.0,32,1.054066,12823990.0,22,1.026583,age_2
1,Happy Valley,3,40,750,21,9,797,164,27,5,...,RF,RF,12851980.0,13922630.0,50,1.083306,14318110.0,90,1.114078,age_4
2,Tai Wai,1,37,282,45,18,391,115,6,10,...,CatBoost,CatBoost,3746829.0,3933851.0,30,1.049915,3990891.0,37,1.065138,age_4
3,Sai Ying Pun,3,48,737,14,9,760,145,25,7,...,RF,CatBoost,11104170.0,12027000.0,50,1.083107,12368700.0,90,1.113878,age_4
4,Tsing Yi,2,39,381,41,19,484,112,5,6,...,RF,RF,3968541.0,4142887.0,28,1.043932,4221161.0,36,1.063656,age_4


In [91]:
assessment_df.drop(columns=['better_for']).to_csv(
    '/Users/clarencemarvin/Downloads/regularized/age_df.csv', 
    index=False
)
print("✅ Done!")

✅ Done!
