In [1]:
import pandas as pd
import numpy as np

**Reading Transactions File**

In [2]:
df_txn=pd.read_csv('transactions.csv')

In [3]:
df_txn.head()

Unnamed: 0,doj,doi,srcid,destid,srcid_region,destid_region,srcid_tier,destid_tier,cumsum_seatcount,cumsum_searchcount,dbd
0,2023-03-01,2023-01-30,45,46,Karnataka,Tamil Nadu,Tier 1,Tier 1,8.0,76.0,30
1,2023-03-01,2023-01-30,46,45,Tamil Nadu,Karnataka,Tier 1,Tier 1,8.0,70.0,30
2,2023-03-01,2023-01-30,45,47,Karnataka,Andhra Pradesh,Tier 1,Tier 1,4.0,142.0,30
3,2023-03-01,2023-01-30,47,45,Andhra Pradesh,Karnataka,Tier 1,Tier 1,0.0,68.0,30
4,2023-03-01,2023-01-30,46,9,Tamil Nadu,Tamil Nadu,Tier 1,Tier2,9.0,162.0,30


**Filtering Out DBD=15 since we want to predict bookings 15 days prior to booking**

In [4]:
df_txn=df_txn[df_txn['dbd']==15]

In [5]:
df_txn.head()

Unnamed: 0,doj,doi,srcid,destid,srcid_region,destid_region,srcid_tier,destid_tier,cumsum_seatcount,cumsum_searchcount,dbd
1500,2023-03-01,2023-02-14,45,46,Karnataka,Tamil Nadu,Tier 1,Tier 1,16.0,480.0,15
1501,2023-03-01,2023-02-14,46,45,Tamil Nadu,Karnataka,Tier 1,Tier 1,34.0,352.0,15
1502,2023-03-01,2023-02-14,45,47,Karnataka,Andhra Pradesh,Tier 1,Tier 1,36.0,892.0,15
1503,2023-03-01,2023-02-14,47,45,Andhra Pradesh,Karnataka,Tier 1,Tier 1,18.0,1130.0,15
1504,2023-03-01,2023-02-14,46,9,Tamil Nadu,Tamil Nadu,Tier 1,Tier2,48.0,1023.0,15


In [6]:
df_txn.info()

<class 'pandas.core.frame.DataFrame'>
Index: 73100 entries, 1500 to 2264599
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   doj                 73100 non-null  object 
 1   doi                 73100 non-null  object 
 2   srcid               73100 non-null  int64  
 3   destid              73100 non-null  int64  
 4   srcid_region        73100 non-null  object 
 5   destid_region       73100 non-null  object 
 6   srcid_tier          73100 non-null  object 
 7   destid_tier         73100 non-null  object 
 8   cumsum_seatcount    73100 non-null  float64
 9   cumsum_searchcount  73100 non-null  float64
 10  dbd                 73100 non-null  int64  
dtypes: float64(2), int64(3), object(6)
memory usage: 8.7+ MB


In [7]:
df_txn['doj']=pd.to_datetime(df_txn['doj'])
df_txn['doi']=pd.to_datetime(df_txn['doi'])

In [8]:
for col in ['srcid', 'destid','srcid_region','destid_region','srcid_tier','destid_tier']:
    df_txn[col] = df_txn[col].astype('category')

In [9]:
df_txn.info()

<class 'pandas.core.frame.DataFrame'>
Index: 73100 entries, 1500 to 2264599
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   doj                 73100 non-null  datetime64[ns]
 1   doi                 73100 non-null  datetime64[ns]
 2   srcid               73100 non-null  category      
 3   destid              73100 non-null  category      
 4   srcid_region        73100 non-null  category      
 5   destid_region       73100 non-null  category      
 6   srcid_tier          73100 non-null  category      
 7   destid_tier         73100 non-null  category      
 8   cumsum_seatcount    73100 non-null  float64       
 9   cumsum_searchcount  73100 non-null  float64       
 10  dbd                 73100 non-null  int64         
dtypes: category(6), datetime64[ns](2), float64(2), int64(1)
memory usage: 5.8 MB


In [10]:
df_txn.describe()

Unnamed: 0,doj,doi,cumsum_seatcount,cumsum_searchcount,dbd
count,73100,73100,73100.0,73100.0,73100.0
mean,2024-02-28 23:59:59.999999744,2024-02-13 23:59:59.999999744,54.444241,1948.652298,15.0
min,2023-03-01 00:00:00,2023-02-14 00:00:00,0.0,0.0,15.0
25%,2023-08-30 00:00:00,2023-08-15 00:00:00,4.0,348.0,15.0
50%,2024-02-29 00:00:00,2024-02-14 00:00:00,16.0,780.0,15.0
75%,2024-08-30 00:00:00,2024-08-15 00:00:00,51.0,1684.0,15.0
max,2025-02-28 00:00:00,2025-02-13 00:00:00,4724.0,391464.0,15.0
std,,,135.714726,6834.956089,0.0


In [11]:
df_txn.rename(columns={
    'cumsum_seatcount': 'bookings_dbd_15',
    'cumsum_searchcount': 'searches_dbd_15'
}, inplace=True)

**Dropping dbd column as it is redundant now**

In [12]:
df_txn.drop(columns=['dbd'], inplace=True)

**Reading the Train File**

In [13]:
df_train=pd.read_csv('train.csv')

In [14]:
df_train.head()

Unnamed: 0,doj,srcid,destid,final_seatcount
0,2023-03-01,45,46,2838.0
1,2023-03-01,46,45,2298.0
2,2023-03-01,45,47,2720.0
3,2023-03-01,47,45,2580.0
4,2023-03-01,46,9,4185.0


In [15]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67200 entries, 0 to 67199
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   doj              67200 non-null  object 
 1   srcid            67200 non-null  int64  
 2   destid           67200 non-null  int64  
 3   final_seatcount  67200 non-null  float64
dtypes: float64(1), int64(2), object(1)
memory usage: 2.1+ MB


In [16]:
len(df_train)

67200

In [17]:
len(df_txn)

73100

In [18]:
for col in ['srcid', 'destid']:
    df_train[col] = df_train[col].astype('category')

In [19]:
df_train['doj']=pd.to_datetime(df_train['doj'])

In [20]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67200 entries, 0 to 67199
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   doj              67200 non-null  datetime64[ns]
 1   srcid            67200 non-null  category      
 2   destid           67200 non-null  category      
 3   final_seatcount  67200 non-null  float64       
dtypes: category(2), datetime64[ns](1), float64(1)
memory usage: 1.2 MB


In [21]:
df_train.describe()

Unnamed: 0,doj,final_seatcount
count,67200,67200.0
mean,2024-01-30 11:59:59.999999744,2001.729464
min,2023-03-01 00:00:00,2.0
25%,2023-08-15 18:00:00,1252.0
50%,2024-01-30 12:00:00,1685.0
75%,2024-07-16 06:00:00,2408.0
max,2024-12-31 00:00:00,13503.0
std,,1194.71114


In [22]:
df_test=pd.read_csv('test_redbus.csv')

In [23]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5900 entries, 0 to 5899
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   route_key  5900 non-null   object
 1   doj        5900 non-null   object
 2   srcid      5900 non-null   int64 
 3   destid     5900 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 184.5+ KB


In [24]:
df_test['doj']=pd.to_datetime(df_test['doj'])

In [25]:
for col in ['srcid', 'destid']:
    df_test[col] = df_test[col].astype('category')

In [26]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5900 entries, 0 to 5899
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   route_key  5900 non-null   object        
 1   doj        5900 non-null   datetime64[ns]
 2   srcid      5900 non-null   category      
 3   destid     5900 non-null   category      
dtypes: category(2), datetime64[ns](1), object(1)
memory usage: 106.6+ KB


In [27]:
df_test.describe()

Unnamed: 0,doj
count,5900
mean,2025-01-30 00:00:00
min,2025-01-01 00:00:00
25%,2025-01-15 00:00:00
50%,2025-01-30 00:00:00
75%,2025-02-14 00:00:00
max,2025-02-28 00:00:00


**Merging train and test data with transaction data**

In [28]:
# Merge with training data
df_train_merged = df_train.merge(
    df_txn, on=['srcid', 'destid', 'doj'], how='left'
)

# Merge with test data
df_test_merged = df_test.merge(
    df_txn, on=['srcid', 'destid', 'doj'], how='left'
)


In [29]:
route_counts = df_train_merged.groupby(['srcid', 'destid']).size().reset_index(name='route_freq')

df_train_merged = df_train_merged.merge(route_counts, on=['srcid', 'destid'], how='left')
df_test_merged = df_test_merged.merge(route_counts, on=['srcid', 'destid'], how='left')

# Force route_freq to be numeric and fill NaN only in test
df_test_merged['route_freq'] = pd.to_numeric(df_test_merged['route_freq'], errors='coerce').fillna(0)


  route_counts = df_train_merged.groupby(['srcid', 'destid']).size().reset_index(name='route_freq')


In [30]:
df_train_merged['is_intra_state'] = (df_train_merged['srcid_region'] == df_train_merged['destid_region']).astype(int)
df_test_merged['is_intra_state'] = (df_test_merged['srcid_region'] == df_test_merged['destid_region']).astype(int)


In [31]:
# Sanity check
print("Train shape:", df_train_merged.shape)
print("Test shape:", df_test_merged.shape)

# Check for missing txn info (e.g., no dbd=15 snapshot)
print("Missing in train:", df_train_merged.isna().sum())
print("Missing in test:", df_test_merged.isna().sum())


Train shape: (67200, 13)
Test shape: (5900, 13)
Missing in train: doj                0
srcid              0
destid             0
final_seatcount    0
doi                0
srcid_region       0
destid_region      0
srcid_tier         0
destid_tier        0
bookings_dbd_15    0
searches_dbd_15    0
route_freq         0
is_intra_state     0
dtype: int64
Missing in test: route_key          0
doj                0
srcid              0
destid             0
doi                0
srcid_region       0
destid_region      0
srcid_tier         0
destid_tier        0
bookings_dbd_15    0
searches_dbd_15    0
route_freq         0
is_intra_state     0
dtype: int64


In [32]:
df_train_merged.head()

Unnamed: 0,doj,srcid,destid,final_seatcount,doi,srcid_region,destid_region,srcid_tier,destid_tier,bookings_dbd_15,searches_dbd_15,route_freq,is_intra_state
0,2023-03-01,45,46,2838.0,2023-02-14,Karnataka,Tamil Nadu,Tier 1,Tier 1,16.0,480.0,672,0
1,2023-03-01,46,45,2298.0,2023-02-14,Tamil Nadu,Karnataka,Tier 1,Tier 1,34.0,352.0,672,0
2,2023-03-01,45,47,2720.0,2023-02-14,Karnataka,Andhra Pradesh,Tier 1,Tier 1,36.0,892.0,672,0
3,2023-03-01,47,45,2580.0,2023-02-14,Andhra Pradesh,Karnataka,Tier 1,Tier 1,18.0,1130.0,672,0
4,2023-03-01,46,9,4185.0,2023-02-14,Tamil Nadu,Tamil Nadu,Tier 1,Tier2,48.0,1023.0,672,1


**Feature Engineering**

In [33]:
# Create weekday (0 = Monday, 6 = Sunday)
df_train_merged['weekday_num'] = df_train['doj'].dt.dayofweek
df_test_merged['weekday_num'] = df_test['doj'].dt.dayofweek

In [34]:
df_train_merged.head()


Unnamed: 0,doj,srcid,destid,final_seatcount,doi,srcid_region,destid_region,srcid_tier,destid_tier,bookings_dbd_15,searches_dbd_15,route_freq,is_intra_state,weekday_num
0,2023-03-01,45,46,2838.0,2023-02-14,Karnataka,Tamil Nadu,Tier 1,Tier 1,16.0,480.0,672,0,2
1,2023-03-01,46,45,2298.0,2023-02-14,Tamil Nadu,Karnataka,Tier 1,Tier 1,34.0,352.0,672,0,2
2,2023-03-01,45,47,2720.0,2023-02-14,Karnataka,Andhra Pradesh,Tier 1,Tier 1,36.0,892.0,672,0,2
3,2023-03-01,47,45,2580.0,2023-02-14,Andhra Pradesh,Karnataka,Tier 1,Tier 1,18.0,1130.0,672,0,2
4,2023-03-01,46,9,4185.0,2023-02-14,Tamil Nadu,Tamil Nadu,Tier 1,Tier2,48.0,1023.0,672,1,2


In [46]:
df_test_merged.head()

Unnamed: 0,route_key,doj,srcid,destid,doi,srcid_region,destid_region,srcid_tier,destid_tier,bookings_dbd_15,searches_dbd_15,route_freq,is_intra_state,weekday_num,search_to_book_ratio,bookings_x_weekday
0,2025-02-11_46_45,2025-02-11,46,45,2025-01-27,Tamil Nadu,Karnataka,Tier 1,Tier 1,38.0,1082.0,672,0,1,28.473684,38.0
1,2025-01-20_17_23,2025-01-20,17,23,2025-01-05,East 1,East 1,Tier2,Tier 1,0.0,1175.0,672,1,0,11750.0,0.0
2,2025-01-08_02_14,2025-01-08,2,14,2024-12-24,Maharashtra and Goa,Maharashtra and Goa,Tier 1,Tier2,0.0,370.0,672,1,2,3700.0,0.0
3,2025-01-08_08_47,2025-01-08,8,47,2024-12-24,Andhra Pradesh,Andhra Pradesh,Tier2,Tier 1,0.0,120.0,672,1,2,1200.0,0.0
4,2025-01-08_09_46,2025-01-08,9,46,2024-12-24,Tamil Nadu,Tamil Nadu,Tier2,Tier 1,39.0,1230.0,672,1,2,31.538462,78.0


**Creating A New Variable Called Search/Bookings Ratio to measure demand**

Rows having 0 searches have been estimated to 0.1 to facilitate division

In [36]:
len(df_train_merged[df_train_merged['bookings_dbd_15']==0])

15756

In [37]:
df_train_merged['search_to_book_ratio'] = (
    df_train_merged['searches_dbd_15'] / df_train_merged['bookings_dbd_15'].replace(0, 0.1))

In [38]:
len(df_test_merged[df_test_merged['bookings_dbd_15']==0])

995

In [39]:
df_test_merged['search_to_book_ratio'] = (
    df_test_merged['searches_dbd_15'] / df_test_merged['bookings_dbd_15'].replace(0, 0.1))

In [44]:
df_train_merged['bookings_x_weekday'] = df_train_merged['bookings_dbd_15'] * df_train_merged['weekday_num']
df_test_merged['bookings_x_weekday'] = df_test_merged['bookings_dbd_15'] * df_test_merged['weekday_num']


In [45]:
df_train_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67200 entries, 0 to 67199
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   doj                   67200 non-null  datetime64[ns]
 1   srcid                 67200 non-null  category      
 2   destid                67200 non-null  category      
 3   final_seatcount       67200 non-null  float64       
 4   doi                   67200 non-null  datetime64[ns]
 5   srcid_region          67200 non-null  category      
 6   destid_region         67200 non-null  category      
 7   srcid_tier            67200 non-null  category      
 8   destid_tier           67200 non-null  category      
 9   bookings_dbd_15       67200 non-null  float64       
 10  searches_dbd_15       67200 non-null  float64       
 11  route_freq            67200 non-null  int64         
 12  is_intra_state        67200 non-null  int64         
 13  weekday_num     

**Data Modelling**

In [47]:
!pip install optuna
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import optuna



In [48]:
from lightgbm import LGBMRegressor, early_stopping, log_evaluation
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import optuna

# Feature set
features = [
    'srcid', 'destid',
    'srcid_region', 'destid_region',
    'srcid_tier', 'destid_tier',
    'bookings_dbd_15', 'searches_dbd_15',
    'search_to_book_ratio', 'weekday_num',
    'route_freq', 'is_intra_state'
]

X = df_train_merged[features]
y = df_train_merged['final_seatcount']
X_test = df_test_merged[features]

cat_features = [col for col in features if str(X[col].dtype) == 'category']

# Optuna objective
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float("learning_rate", 0.01, 0.1),
        'num_leaves': trial.suggest_int("num_leaves", 20, 100),
        'max_depth': trial.suggest_int("max_depth", 3, 10),
        'feature_fraction': trial.suggest_float("feature_fraction", 0.6, 1.0),
        'bagging_fraction': trial.suggest_float("bagging_fraction", 0.6, 1.0),
        'bagging_freq': trial.suggest_int("bagging_freq", 1, 7),
        'min_child_samples': trial.suggest_int("min_child_samples", 10, 100)
    }

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse_scores = []

    for train_idx, val_idx in kf.split(X):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        model = LGBMRegressor(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            eval_metric="rmse",
            categorical_feature=cat_features,
            callbacks=[
                early_stopping(stopping_rounds=50),
                log_evaluation(0)
            ]
        )
        preds = model.predict(X_val)
        rmse = np.sqrt(mean_squared_error(y_val, preds))
        rmse_scores.append(rmse)

    return np.mean(rmse_scores)

# Run optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)

# Train final model
best_params = study.best_params
best_params.update({'objective': 'regression', 'metric': 'rmse'})

final_model = LGBMRegressor(**best_params)
final_model.fit(X, y, categorical_feature=cat_features)

# Predict
test_preds = final_model.predict(X_test)

# Submission
submission = df_test_merged[['route_key']].copy()
submission['final_seatcount'] = test_preds
submission.to_csv("submission_file.csv", index=False)
print(" submission_file.csv created")


[I 2025-06-21 03:10:20,473] A new study created in memory with name: no-name-c5953917-770a-4637-888f-2b7a640c27e8


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 571.306
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 571.217
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 566.985
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 571.913
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 563.464


[I 2025-06-21 03:10:37,321] Trial 0 finished with value: 568.9768225095546 and parameters: {'learning_rate': 0.01951734440842117, 'num_leaves': 76, 'max_depth': 7, 'feature_fraction': 0.764961087353285, 'bagging_fraction': 0.7584002799193388, 'bagging_freq': 3, 'min_child_samples': 17}. Best is trial 0 with value: 568.9768225095546.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 698.169
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 698.263
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 696.434
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 694.114
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:10:44,931] Trial 1 finished with value: 694.5841177751776 and parameters: {'learning_rate': 0.014340508119641979, 'num_leaves': 70, 'max_depth': 5, 'feature_fraction': 0.6518649386492948, 'bagging_fraction': 0.8220635730778151, 'bagging_freq': 7, 'min_child_samples': 26}. Best is trial 0 with value: 568.9768225095546.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 685.941
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 574.41
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 573.487
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 573.326
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 574.338
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:10:51,829] Trial 2 finished with value: 573.0862898753555 and parameters: {'learning_rate': 0.024653071722695347, 'num_leaves': 28, 'max_depth': 10, 'feature_fraction': 0.6007863543337042, 'bagging_fraction': 0.9426452250469826, 'bagging_freq': 2, 'min_child_samples': 86}. Best is trial 0 with value: 568.9768225095546.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 569.87
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 526.332
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 526.129
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 522.469
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 525.939
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:10:58,494] Trial 3 finished with value: 524.5469345487929 and parameters: {'learning_rate': 0.028389641479229774, 'num_leaves': 44, 'max_depth': 10, 'feature_fraction': 0.7587086435602127, 'bagging_fraction': 0.8751238661936501, 'bagging_freq': 5, 'min_child_samples': 32}. Best is trial 3 with value: 524.5469345487929.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 521.866
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 476.165
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 472.993
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 464.712
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[99]	valid_0's rmse: 472.614
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:11:04,411] Trial 4 finished with value: 471.5061848857693 and parameters: {'learning_rate': 0.09774742398616973, 'num_leaves': 41, 'max_depth': 7, 'feature_fraction': 0.9677736673529297, 'bagging_fraction': 0.9484284884076447, 'bagging_freq': 1, 'min_child_samples': 39}. Best is trial 4 with value: 471.5061848857693.


Did not meet early stopping. Best iteration is:
[98]	valid_0's rmse: 471.047
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 527.403
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 526.92
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 525.424
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 529.471
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:11:09,738] Trial 5 finished with value: 526.8026501333684 and parameters: {'learning_rate': 0.04276860204528327, 'num_leaves': 86, 'max_depth': 5, 'feature_fraction': 0.6682885911147682, 'bagging_fraction': 0.8869229025086831, 'bagging_freq': 5, 'min_child_samples': 69}. Best is trial 4 with value: 471.5061848857693.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 524.794
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 536.317
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 536.859
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 534.702
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 540.796
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:11:16,089] Trial 6 finished with value: 537.142421436061 and parameters: {'learning_rate': 0.038312172661529136, 'num_leaves': 92, 'max_depth': 5, 'feature_fraction': 0.6012152853250207, 'bagging_fraction': 0.9376689808942035, 'bagging_freq': 7, 'min_child_samples': 59}. Best is trial 4 with value: 471.5061848857693.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 537.038
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 701.53
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 704.353
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 708.51
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 698.15
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:11:19,848] Trial 7 finished with value: 701.1060814947273 and parameters: {'learning_rate': 0.021326585908986236, 'num_leaves': 76, 'max_depth': 3, 'feature_fraction': 0.7186539776807545, 'bagging_fraction': 0.9605487351324058, 'bagging_freq': 3, 'min_child_samples': 63}. Best is trial 4 with value: 471.5061848857693.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 692.988
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 551.745
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 544.4
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 546.034
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 549.025
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:11:24,342] Trial 8 finished with value: 547.3261361660674 and parameters: {'learning_rate': 0.028694776432665513, 'num_leaves': 77, 'max_depth': 5, 'feature_fraction': 0.9884358786289649, 'bagging_fraction': 0.8507889349815063, 'bagging_freq': 1, 'min_child_samples': 52}. Best is trial 4 with value: 471.5061848857693.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 545.426
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 708.765
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 711.448
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 714.112
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 707.991
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:11:30,962] Trial 9 finished with value: 707.9232329294572 and parameters: {'learning_rate': 0.011489320829681534, 'num_leaves': 38, 'max_depth': 8, 'feature_fraction': 0.7322314206914079, 'bagging_fraction': 0.6005527514873223, 'bagging_freq': 7, 'min_child_samples': 82}. Best is trial 4 with value: 471.5061848857693.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 697.3
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.828
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[99]	valid_0's rmse: 468.772
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 463.598
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 472.236
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 469.287


[I 2025-06-21 03:11:36,693] Trial 10 finished with value: 469.74428872601436 and parameters: {'learning_rate': 0.09989254161419701, 'num_leaves': 57, 'max_depth': 8, 'feature_fraction': 0.9174300044040247, 'bagging_fraction': 0.7166265837167286, 'bagging_freq': 1, 'min_child_samples': 41}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 475.213
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 467.872
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 464.662
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 472.515
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 469.592


[I 2025-06-21 03:11:43,243] Trial 11 finished with value: 469.9705383762709 and parameters: {'learning_rate': 0.09807431250553363, 'num_leaves': 54, 'max_depth': 8, 'feature_fraction': 0.9382954543085807, 'bagging_fraction': 0.7234981543013432, 'bagging_freq': 1, 'min_child_samples': 41}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.624
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[98]	valid_0's rmse: 468.704
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 464.482
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.63
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 469.807


[I 2025-06-21 03:11:49,053] Trial 12 finished with value: 469.8494021367692 and parameters: {'learning_rate': 0.09957192397687523, 'num_leaves': 57, 'max_depth': 9, 'feature_fraction': 0.8839292915481483, 'bagging_fraction': 0.7220135399675052, 'bagging_freq': 1, 'min_child_samples': 44}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 477.046
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 470.857
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 466.8
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.988
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.177


[I 2025-06-21 03:11:57,138] Trial 13 finished with value: 471.97352559047084 and parameters: {'learning_rate': 0.07893938392736174, 'num_leaves': 58, 'max_depth': 9, 'feature_fraction': 0.8731636591912073, 'bagging_fraction': 0.682378189995158, 'bagging_freq': 2, 'min_child_samples': 49}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 477.716
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 472.368
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 466.574
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 475.707
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.264


[I 2025-06-21 03:12:04,040] Trial 14 finished with value: 472.72581448773553 and parameters: {'learning_rate': 0.07576354028962873, 'num_leaves': 63, 'max_depth': 9, 'feature_fraction': 0.8570211738373968, 'bagging_fraction': 0.665768025071034, 'bagging_freq': 3, 'min_child_samples': 10}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 478.415
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.163
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.041
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 476.031
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.608


[I 2025-06-21 03:12:12,161] Trial 15 finished with value: 474.65165996312135 and parameters: {'learning_rate': 0.0814768185678432, 'num_leaves': 50, 'max_depth': 8, 'feature_fraction': 0.8822244450833012, 'bagging_fraction': 0.7696691146026192, 'bagging_freq': 2, 'min_child_samples': 72}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 495.968
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 493.01
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 488.165
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 493.512
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:12:17,328] Trial 16 finished with value: 492.3558685594676 and parameters: {'learning_rate': 0.06343404452051479, 'num_leaves': 20, 'max_depth': 9, 'feature_fraction': 0.8291027680420022, 'bagging_fraction': 0.6437232805206186, 'bagging_freq': 4, 'min_child_samples': 45}. Best is trial 10 with value: 469.74428872601436.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 491.125
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 476.17
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[98]	valid_0's rmse: 469.319
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 465.485
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.276
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 469.99


[I 2025-06-21 03:12:25,041] Trial 17 finished with value: 470.8479577659572 and parameters: {'learning_rate': 0.08895355009884226, 'num_leaves': 63, 'max_depth': 7, 'feature_fraction': 0.9277639539847914, 'bagging_fraction': 0.7198604360282596, 'bagging_freq': 1, 'min_child_samples': 27}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 491.251
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 486.195
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 484.084
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 487.73
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 487.531


[I 2025-06-21 03:12:32,029] Trial 18 finished with value: 487.35825041411334 and parameters: {'learning_rate': 0.06320523895202872, 'num_leaves': 32, 'max_depth': 6, 'feature_fraction': 0.9153500677710458, 'bagging_fraction': 0.7993755806527836, 'bagging_freq': 2, 'min_child_samples': 95}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.702
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 470.26
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 464.948
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 475.065
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.011


[I 2025-06-21 03:12:39,190] Trial 19 finished with value: 471.1971097105872 and parameters: {'learning_rate': 0.09061827265338633, 'num_leaves': 52, 'max_depth': 10, 'feature_fraction': 0.8090938053436569, 'bagging_fraction': 0.711832567170569, 'bagging_freq': 4, 'min_child_samples': 35}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 556.426
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 558.307
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 553.018
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 557.864
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:12:42,520] Trial 20 finished with value: 556.5328299687892 and parameters: {'learning_rate': 0.06572082793290174, 'num_leaves': 99, 'max_depth': 3, 'feature_fraction': 0.8963951620803587, 'bagging_fraction': 0.6328864711245493, 'bagging_freq': 6, 'min_child_samples': 20}. Best is trial 10 with value: 469.74428872601436.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 557.049
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.906
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[98]	valid_0's rmse: 467.63
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 465.022
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.113
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.126


[I 2025-06-21 03:12:49,202] Trial 21 finished with value: 470.159458228586 and parameters: {'learning_rate': 0.09907530769321643, 'num_leaves': 54, 'max_depth': 8, 'feature_fraction': 0.9393577397617661, 'bagging_fraction': 0.7362042538667786, 'bagging_freq': 1, 'min_child_samples': 43}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.782
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 468.123
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 467.973
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.093
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 470.837


[I 2025-06-21 03:12:55,396] Trial 22 finished with value: 470.9614804897895 and parameters: {'learning_rate': 0.0887893072346859, 'num_leaves': 66, 'max_depth': 8, 'feature_fraction': 0.9556058072678363, 'bagging_fraction': 0.7035219601976888, 'bagging_freq': 1, 'min_child_samples': 55}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.611
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 469.293
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 462.919
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.556
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 470.196


[I 2025-06-21 03:13:03,273] Trial 23 finished with value: 469.91480589204883 and parameters: {'learning_rate': 0.09910365877789738, 'num_leaves': 49, 'max_depth': 9, 'feature_fraction': 0.8404794288059174, 'bagging_fraction': 0.7722676203300951, 'bagging_freq': 2, 'min_child_samples': 40}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 476.768
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.17
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 467.438
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 475.307
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 472.756


[I 2025-06-21 03:13:10,792] Trial 24 finished with value: 473.08783331422603 and parameters: {'learning_rate': 0.07240913284828446, 'num_leaves': 45, 'max_depth': 9, 'feature_fraction': 0.8311544839748287, 'bagging_fraction': 0.7846468944520805, 'bagging_freq': 2, 'min_child_samples': 30}. Best is trial 10 with value: 469.74428872601436.


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 478.254
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.529
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 466.782
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 475.182
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:13:17,477] Trial 25 finished with value: 473.82092485709035 and parameters: {'learning_rate': 0.08464188536367777, 'num_leaves': 35, 'max_depth': 9, 'feature_fraction': 0.8529705586738969, 'bagging_fraction': 0.8266442447235612, 'bagging_freq': 3, 'min_child_samples': 46}. Best is trial 10 with value: 469.74428872601436.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.357
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.281
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.633
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 465.19
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.133
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:13:25,195] Trial 26 finished with value: 471.2440452732729 and parameters: {'learning_rate': 0.09369836098251165, 'num_leaves': 47, 'max_depth': 10, 'feature_fraction': 0.787888396178792, 'bagging_fraction': 0.7569247372357534, 'bagging_freq': 2, 'min_child_samples': 35}. Best is trial 10 with value: 469.74428872601436.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 470.984
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 493.91
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 484.539
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 486.961
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 489.903
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:13:30,637] Trial 27 finished with value: 488.9535473464175 and parameters: {'learning_rate': 0.05316635543707959, 'num_leaves': 58, 'max_depth': 6, 'feature_fraction': 0.9050377960355062, 'bagging_fraction': 0.6858027196300254, 'bagging_freq': 1, 'min_child_samples': 63}. Best is trial 10 with value: 469.74428872601436.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 489.455
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 475.036
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.457
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 464.687
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[99]	valid_0's rmse: 473.087
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 03:13:38,541] Trial 28 finished with value: 470.62396696785726 and parameters: {'learning_rate': 0.08380122571726926, 'num_leaves': 58, 'max_depth': 9, 'feature_fraction': 0.9992380071111908, 'bagging_fraction': 0.7384768730780258, 'bagging_freq': 4, 'min_child_samples': 21}. Best is trial 10 with value: 469.74428872601436.


Did not meet early stopping. Best iteration is:
[98]	valid_0's rmse: 468.852
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 477.361
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 471.366
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 468.804
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 473.949
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 474.1


[I 2025-06-21 03:13:45,806] Trial 29 finished with value: 473.1158531762079 and parameters: {'learning_rate': 0.07163725469903033, 'num_leaves': 68, 'max_depth': 7, 'feature_fraction': 0.7810552548862809, 'bagging_fraction': 0.7740247289865786, 'bagging_freq': 3, 'min_child_samples': 51}. Best is trial 10 with value: 469.74428872601436.


 submission_file.csv created


In [49]:
# Evaluate final model performance on training data (since we don't have test labels)
train_preds = final_model.predict(X)
rmse_final = np.sqrt(mean_squared_error(y, train_preds))

print(f" Final model training RMSE: {rmse_final:.2f}")


 Final model training RMSE: 440.33
