In [None]:
!pip install xgboost-ray scikit-learn pandas pyarrow kagglehub "ray[default]" --upgrade ray

In [14]:
import pandas as pd
import ray
from xgboost_ray import RayDMatrix, RayParams, train
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import time
import glob
import kagglehub
from ray.train.xgboost import XGBoostTrainer
from ray.train import ScalingConfig
from ray.data import from_pandas

In [3]:
path = kagglehub.dataset_download("elemento/nyc-yellow-taxi-trip-data")
print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/elemento/nyc-yellow-taxi-trip-data?dataset_version_number=2...


100%|██████████| 1.78G/1.78G [00:07<00:00, 260MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/elemento/nyc-yellow-taxi-trip-data/versions/2


In [8]:
# Initialize Ray
ray.init(ignore_reinit_error=True, num_cpus=16)

2025-03-02 12:53:36,006	INFO worker.py:1832 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


0,1
Python version:,3.11.11
Ray version:,2.43.0
Dashboard:,http://127.0.0.1:8265


In [4]:
local_path = "/root/.cache/kagglehub/datasets/elemento/nyc-yellow-taxi-trip-data/versions/2/*.csv"
csv_files = glob.glob(local_path)
df = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True)


#df = df.sample(frac=0.5, random_state=42)
#ds = ray.data.read_csv(csv_files)
#ds = ds.limit(int(ds.count() * 0.01))

2025-03-02 12:46:28,864	INFO worker.py:1832 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


In [16]:
ray.init(ignore_reinit_error=True, num_cpus=32)

2025-02-21 20:21:17,958	INFO worker.py:1832 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


0,1
Python version:,3.11.11
Ray version:,2.42.1
Dashboard:,http://127.0.0.1:8265


In [7]:
ray.shutdown()

In [9]:
@ray.remote
def process_data(df: pd.DataFrame):
    # 1 Filter invalid trip_distance:
    df = df[(df["trip_distance"] != 0) & (df["trip_distance"].notna())]

    df = df[(df["tpep_pickup_datetime"] != 0) & (df["tpep_pickup_datetime"].notna())]

    df = df[(df["tpep_dropoff_datetime"] != 0) & (df["tpep_dropoff_datetime"].notna())]

    # 2 Outlier Handling (Quantile-based):
    trip_distance_quantiles = df["trip_distance"].quantile([0.01, 0.99])
    fare_amount_quantiles = df["fare_amount"].quantile([0.01, 0.99])
    df = df[
        (df["trip_distance"] >= trip_distance_quantiles[0.01])
        & (df["trip_distance"] <= trip_distance_quantiles[0.99])
        & (df["fare_amount"] >= fare_amount_quantiles[0.01])
        & (df["fare_amount"] <= fare_amount_quantiles[0.99])
    ]

    # 3 Datetime Features:
    df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"], errors='coerce')
    df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"], errors='coerce')

    # Access datetime properties directly without 'dt' and assign to new columns
    df[['pickup_hour','day_of_week']] = pd.DataFrame(df['tpep_pickup_datetime'].apply(lambda x: [x.hour, x.dayofweek]).tolist(), index= df.index)
    df.loc[:, "is_weekend"] = df["day_of_week"].isin([5, 6]).astype(int)

    # 4. Trip Duration:
    df.loc[:, "trip_duration"] = (df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]).dt.total_seconds() / 60.0

    # 5 Filter for Valid Fare Amounts:
    df = df[df["fare_amount"] > 0]

    df = df.dropna(subset=["pickup_hour", "is_weekend", "trip_duration", "fare_amount" , "trip_distance"])


    return df

In [49]:
#processed_ds = ds.map_batches(process_data, batch_format="pandas")
#num_rows = processed_ds.count()
#print(f"Number of rows: {num_rows}")
#results = ray.get(process_data.remote(df))

2025-02-21 18:50:18,164	INFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-02-21_18-50-09_431668_10449/logs/ray-data
2025-02-21 18:50:18,165	INFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadCSV] -> TaskPoolMapOperator[MapBatches(process_data)] -> AggregateNumRows[AggregateNumRows]


Running 0: 0.00 row [00:00, ? row/s]

- ReadCSV->SplitBlocks(4) 1: 0.00 row [00:00, ? row/s]

- MapBatches(process_data) 2: 0.00 row [00:00, ? row/s]

- AggregateNumRows 3: 0.00 row [00:00, ? row/s]

Number of rows: 45833988


In [10]:
#num_chunks = 4
#chunk_size = len(df) // num_chunks

# Create chunks in one step
#chunks = [df[i * chunk_size : (i + 1) * chunk_size] for i in range(num_chunks - 1)]
#chunks.append(df[(num_chunks - 1) * chunk_size:])

#results = ray.get([process_data.remote(chunk) for chunk in chunks])
# Combine the processed chunks into a single DataFrame
#processed_df = pd.concat(results, ignore_index=True)


results = ray.get(process_data.remote(df))
processed_df = results

# Show the first 5 rows of the processed dataset
print(processed_df.head(5))

   VendorID tpep_pickup_datetime tpep_dropoff_datetime  passenger_count  \
0         2  2015-01-15 19:05:39   2015-01-15 19:23:42                1   
1         1  2015-01-10 20:33:38   2015-01-10 20:53:28                1   
2         1  2015-01-10 20:33:38   2015-01-10 20:43:41                1   
3         1  2015-01-10 20:33:39   2015-01-10 20:35:31                1   
4         1  2015-01-10 20:33:39   2015-01-10 20:52:58                1   

   trip_distance  pickup_longitude  pickup_latitude  RateCodeID  \
0           1.59        -73.993896        40.750111         1.0   
1           3.30        -74.001648        40.724243         1.0   
2           1.80        -73.963341        40.802788         1.0   
3           0.50        -74.009087        40.713818         1.0   
4           3.00        -73.971176        40.762428         1.0   

  store_and_fwd_flag  dropoff_longitude  ...  mta_tax  tip_amount  \
0                  N         -73.974785  ...      0.5        3.25   
1       

In [None]:
#feature_columns = ["pickup_hour", "passenger_count", "is_weekend", "trip_duration" , "trip_distance"]
#target_column = "fare_amount"

#train_dataset = processed_ds.map_batches(
#    lambda df: pd.concat([pd.DataFrame(df[feature_columns], columns=feature_columns), df[target_column]], axis=1),
#    batch_format="pandas"
#)
#test_dataset = processed_ds.map_batches(
#    lambda df: pd.concat([pd.DataFrame(df[feature_columns], columns=feature_columns), df[target_column]], axis=1),
#    batch_format="pandas"
#)

In [11]:
feature_columns = ["pickup_hour", "passenger_count",
                   "is_weekend", "trip_duration", "trip_distance"]
target_column = "fare_amount"


X_train, X_test, y_train, y_test = train_test_split(
    processed_df[feature_columns], processed_df[target_column], test_size=0.2, random_state=42
)


In [12]:
X_train.head()

Unnamed: 0,pickup_hour,passenger_count,is_weekend,trip_duration,trip_distance
2445482,2,3,1,30.083333,5.59
20808674,17,1,0,32.533333,6.5
904211,17,5,0,13.45,2.67
14641190,19,1,0,21.616667,3.8
43236269,20,1,0,14.616667,2.7


In [15]:
train_dataset = from_pandas(pd.concat([pd.DataFrame(X_train, columns=feature_columns), y_train], axis=1))
test_dataset = from_pandas(pd.concat([pd.DataFrame(X_test, columns=feature_columns), y_test], axis=1))

In [16]:
train_dataset.show(5)

2025-03-02 12:59:50,097	INFO dataset.py:2787 -- Tip: Use `take_batch()` instead of `take() / show()` to return records in pandas or numpy batch format.
2025-03-02 12:59:50,103	INFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-02_12-53-34_086702_3020/logs/ray-data
2025-03-02 12:59:50,104	INFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> LimitOperator[limit=5]


Running 0: 0.00 row [00:00, ? row/s]

- limit=5 1: 0.00 row [00:00, ? row/s]

{'pickup_hour': 2, 'passenger_count': 3, 'is_weekend': 1, 'trip_duration': 30.083333333333332, 'trip_distance': 5.59, 'fare_amount': 22.5}
{'pickup_hour': 17, 'passenger_count': 1, 'is_weekend': 0, 'trip_duration': 32.53333333333333, 'trip_distance': 6.5, 'fare_amount': 25.0}
{'pickup_hour': 17, 'passenger_count': 5, 'is_weekend': 0, 'trip_duration': 13.45, 'trip_distance': 2.67, 'fare_amount': 12.0}
{'pickup_hour': 19, 'passenger_count': 1, 'is_weekend': 0, 'trip_duration': 21.616666666666667, 'trip_distance': 3.8, 'fare_amount': 16.5}
{'pickup_hour': 20, 'passenger_count': 1, 'is_weekend': 0, 'trip_duration': 14.616666666666667, 'trip_distance': 2.7, 'fare_amount': 12.5}


In [18]:
xgboost_config = {
    "objective": "reg:squarederror",
    "max_depth": 6,
    "eta": 0.1,
    "subsample": 0.8,
    "eval_metric": ["rmse", "mae"],
}

trainer = XGBoostTrainer(
    datasets={"train": train_dataset, "test": test_dataset},
    label_column=target_column,
    params=xgboost_config,
    num_boost_round=50,
    scaling_config=ScalingConfig(num_workers=16)
)

In [19]:
result = trainer.fit()

2025-03-02 13:01:01,862	INFO tensorboardx.py:193 -- pip install "ray[tune]" to see TensorBoard files.



View detailed results here: /root/ray_results/XGBoostTrainer_2025-03-02_13-01-01

Training started without custom configuration.


[36m(XGBoostTrainer pid=18245)[0m Started distributed worker processes: 
[36m(XGBoostTrainer pid=18245)[0m - (node_id=8e63983b3dfc7afcaa77dcaf79dfbacd83bb24033ba06d9d8da679d1, ip=172.28.0.12, pid=18342) world_rank=0, local_rank=0, node_rank=0
[36m(XGBoostTrainer pid=18245)[0m - (node_id=8e63983b3dfc7afcaa77dcaf79dfbacd83bb24033ba06d9d8da679d1, ip=172.28.0.12, pid=18343) world_rank=1, local_rank=1, node_rank=0
[36m(XGBoostTrainer pid=18245)[0m - (node_id=8e63983b3dfc7afcaa77dcaf79dfbacd83bb24033ba06d9d8da679d1, ip=172.28.0.12, pid=18344) world_rank=2, local_rank=2, node_rank=0
[36m(XGBoostTrainer pid=18245)[0m - (node_id=8e63983b3dfc7afcaa77dcaf79dfbacd83bb24033ba06d9d8da679d1, ip=172.28.0.12, pid=18348) world_rank=3, local_rank=3, node_rank=0
[36m(XGBoostTrainer pid=18245)[0m - (node_id=8e63983b3dfc7afcaa77dcaf79dfbacd83bb24033ba06d9d8da679d1, ip=172.28.0.12, pid=18345) world_rank=4, local_rank=4, node_rank=0
[36m(XGBoostTrainer pid=18245)[0m - (node_id=8e63983b3dfc7afcaa

[2m[36m(pid=19678) [0mRunning 0: 0.00 row [00:00, ? row/s]

[2m[36m(pid=19678) [0m- split(16, equal=True) 1: 0.00 row [00:00, ? row/s]

[36m(SplitCoordinator pid=19678)[0m Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-02_12-53-34_086702_3020/logs/ray-data
[36m(SplitCoordinator pid=19678)[0m Execution plan of Dataset: InputDataBuffer[Input] -> OutputSplitter[split(16, equal=True)]
[36m(RayTrainWorker pid=18355)[0m [13:01:06] Task [xgboost.ray-rank=00000013]:6d0d25ae3eb4519ab7b8ad3f01000000 got rank 13[32m [repeated 15x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m


[2m[36m(pid=19679) [0mRunning 0: 0.00 row [00:00, ? row/s]

[2m[36m(pid=19679) [0m- split(16, equal=True) 1: 0.00 row [00:00, ? row/s]

[36m(SplitCoordinator pid=19679)[0m Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-02_12-53-34_086702_3020/logs/ray-data
[36m(SplitCoordinator pid=19679)[0m Execution plan of Dataset: InputDataBuffer[Input] -> OutputSplitter[split(16, equal=True)]
[36m(XGBoostTrainer pid=18245)[0m [13:02:01] [0]	train-rmse:7.75764	train-mae:5.29918	test-rmse:7.75327	test-mae:5.29684



Training finished iteration 1 at 2025-03-02 13:02:01. Total running time: 59s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      56.9025 |
| time_total_s          56.9025 |
| training_iteration          1 |
| test-mae              5.29684 |
| test-rmse             7.75327 |
| train-mae             5.29918 |
| train-rmse            7.75764 |
+-------------------------------+

Training finished iteration 2 at 2025-03-02 13:02:01. Total running time: 59s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.47831 |
| time_total_s          57.3808 |
| training_iteration          2 |
| test-mae              4.77578 |
| test-rmse             7.00268 |
| train-mae             4.77789 |
| train-rmse            7.00669 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:01] [1]	train-rmse:7.00669	train-mae:4.77789	test-rmse:7.00268	test-mae:4.77578



Training finished iteration 3 at 2025-03-02 13:02:02. Total running time: 1min 0s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48021 |
| time_total_s           57.861 |
| training_iteration          3 |
| test-mae              4.30653 |
| test-rmse             6.32893 |
| train-mae             4.30847 |
| train-rmse            6.33263 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:01] [2]	train-rmse:6.33263	train-mae:4.30847	test-rmse:6.32893	test-mae:4.30653



Training finished iteration 4 at 2025-03-02 13:02:02. Total running time: 1min 0s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48948 |
| time_total_s          58.3505 |
| training_iteration          4 |
| test-mae              3.88483 |
| test-rmse              5.7246 |
| train-mae              3.8866 |
| train-rmse            5.72801 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:02] [3]	train-rmse:5.72801	train-mae:3.88660	test-rmse:5.72460	test-mae:3.88483



Training finished iteration 5 at 2025-03-02 13:02:02. Total running time: 1min 1s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.45573 |
| time_total_s          58.8063 |
| training_iteration          5 |
| test-mae              3.50449 |
| test-rmse             5.18215 |
| train-mae             3.50609 |
| train-rmse            5.18533 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:02] [4]	train-rmse:5.18533	train-mae:3.50609	test-rmse:5.18215	test-mae:3.50449



Training finished iteration 6 at 2025-03-02 13:02:03. Total running time: 1min 1s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s       0.4612 |
| time_total_s          59.2675 |
| training_iteration          6 |
| test-mae              3.16212 |
| test-rmse             4.69617 |
| train-mae              3.1636 |
| train-rmse            4.69917 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:03] [5]	train-rmse:4.69917	train-mae:3.16360	test-rmse:4.69617	test-mae:3.16212



Training finished iteration 7 at 2025-03-02 13:02:03. Total running time: 1min 2s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.46356 |
| time_total_s           59.731 |
| training_iteration          7 |
| test-mae              2.85392 |
| test-rmse             4.26173 |
| train-mae             2.85526 |
| train-rmse            4.26456 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:03] [6]	train-rmse:4.26456	train-mae:2.85526	test-rmse:4.26173	test-mae:2.85392



Training finished iteration 8 at 2025-03-02 13:02:04. Total running time: 1min 2s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.46705 |
| time_total_s          60.1981 |
| training_iteration          8 |
| test-mae              2.57761 |
| test-rmse             3.87365 |
| train-mae             2.57882 |
| train-rmse            3.87633 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:04] [7]	train-rmse:3.87633	train-mae:2.57882	test-rmse:3.87365	test-mae:2.57761



Training finished iteration 9 at 2025-03-02 13:02:04. Total running time: 1min 3s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.50898 |
| time_total_s           60.707 |
| training_iteration          9 |
| test-mae              2.32848 |
| test-rmse             3.52767 |
| train-mae             2.32958 |
| train-rmse            3.53025 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:04] [8]	train-rmse:3.53025	train-mae:2.32958	test-rmse:3.52767	test-mae:2.32848



Training finished iteration 10 at 2025-03-02 13:02:05. Total running time: 1min 3s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48328 |
| time_total_s          61.1903 |
| training_iteration         10 |
| test-mae              2.10524 |
| test-rmse             3.21962 |
| train-mae             2.10625 |
| train-rmse            3.22211 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:05] [9]	train-rmse:3.22211	train-mae:2.10625	test-rmse:3.21962	test-mae:2.10524



Training finished iteration 11 at 2025-03-02 13:02:05. Total running time: 1min 3s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48509 |
| time_total_s          61.6754 |
| training_iteration         11 |
| test-mae              1.90418 |
| test-rmse             2.94578 |
| train-mae             1.90509 |
| train-rmse            2.94823 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:05] [10]	train-rmse:2.94823	train-mae:1.90509	test-rmse:2.94578	test-mae:1.90418



Training finished iteration 12 at 2025-03-02 13:02:06. Total running time: 1min 4s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.46841 |
| time_total_s          62.1438 |
| training_iteration         12 |
| test-mae               1.7239 |
| test-rmse             2.70365 |
| train-mae             1.72473 |
| train-rmse            2.70607 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:06] [11]	train-rmse:2.70607	train-mae:1.72473	test-rmse:2.70365	test-mae:1.72390



Training finished iteration 13 at 2025-03-02 13:02:06. Total running time: 1min 4s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.47879 |
| time_total_s          62.6226 |
| training_iteration         13 |
| test-mae              1.56155 |
| test-rmse             2.48984 |
| train-mae             1.56231 |
| train-rmse            2.49227 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:06] [12]	train-rmse:2.49227	train-mae:1.56231	test-rmse:2.48984	test-mae:1.56155



Training finished iteration 14 at 2025-03-02 13:02:07. Total running time: 1min 5s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.47024 |
| time_total_s          63.0929 |
| training_iteration         14 |
| test-mae              1.41616 |
| test-rmse             2.30139 |
| train-mae             1.41685 |
| train-rmse            2.30381 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:07] [13]	train-rmse:2.30381	train-mae:1.41685	test-rmse:2.30139	test-mae:1.41616



Training finished iteration 15 at 2025-03-02 13:02:07. Total running time: 1min 5s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.47541 |
| time_total_s          63.5683 |
| training_iteration         15 |
| test-mae              1.28568 |
| test-rmse             2.13635 |
| train-mae             1.28629 |
| train-rmse            2.13876 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:07] [14]	train-rmse:2.13876	train-mae:1.28629	test-rmse:2.13635	test-mae:1.28568
[36m(XGBoostTrainer pid=18245)[0m [13:02:08] [15]	train-rmse:1.99421	train-mae:1.16939	test-rmse:1.99178	test-mae:1.16883



Training finished iteration 16 at 2025-03-02 13:02:08. Total running time: 1min 6s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.46131 |
| time_total_s          64.0296 |
| training_iteration         16 |
| test-mae              1.16883 |
| test-rmse             1.99178 |
| train-mae             1.16939 |
| train-rmse            1.99421 |
+-------------------------------+

Training finished iteration 17 at 2025-03-02 13:02:08. Total running time: 1min 6s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.43544 |
| time_total_s           64.465 |
| training_iteration         17 |
| test-mae              1.06409 |
| test-rmse             1.86611 |
| train-mae             1.06459 |
| train-rmse            1.86856 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:08] [16]	train-rmse:1.86856	train-mae:1.06459	test-rmse:1.86611	test-mae:1.06409



Training finished iteration 18 at 2025-03-02 13:02:09. Total running time: 1min 7s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.47606 |
| time_total_s          64.9411 |
| training_iteration         18 |
| test-mae              0.97072 |
| test-rmse             1.75749 |
| train-mae             0.97117 |
| train-rmse            1.75998 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:09] [17]	train-rmse:1.75998	train-mae:0.97117	test-rmse:1.75749	test-mae:0.97072



Training finished iteration 19 at 2025-03-02 13:02:09. Total running time: 1min 7s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s       0.4441 |
| time_total_s          65.3852 |
| training_iteration         19 |
| test-mae              0.88737 |
| test-rmse             1.66419 |
| train-mae             0.88776 |
| train-rmse            1.66668 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:09] [18]	train-rmse:1.66668	train-mae:0.88776	test-rmse:1.66419	test-mae:0.88737



Training finished iteration 20 at 2025-03-02 13:02:10. Total running time: 1min 8s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.44423 |
| time_total_s          65.8294 |
| training_iteration         20 |
| test-mae              0.81336 |
| test-rmse              1.5845 |
| train-mae              0.8137 |
| train-rmse            1.58695 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:10] [19]	train-rmse:1.58695	train-mae:0.81370	test-rmse:1.58450	test-mae:0.81336



Training finished iteration 21 at 2025-03-02 13:02:10. Total running time: 1min 8s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.43199 |
| time_total_s          66.2614 |
| training_iteration         21 |
| test-mae              0.74797 |
| test-rmse             1.51648 |
| train-mae             0.74826 |
| train-rmse            1.51891 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:10] [20]	train-rmse:1.51891	train-mae:0.74826	test-rmse:1.51648	test-mae:0.74797



Training finished iteration 22 at 2025-03-02 13:02:10. Total running time: 1min 9s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.44591 |
| time_total_s          66.7073 |
| training_iteration         22 |
| test-mae               0.6904 |
| test-rmse             1.45876 |
| train-mae             0.69066 |
| train-rmse            1.46116 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:10] [21]	train-rmse:1.46116	train-mae:0.69066	test-rmse:1.45876	test-mae:0.69040



Training finished iteration 23 at 2025-03-02 13:02:11. Total running time: 1min 9s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.43514 |
| time_total_s          67.1424 |
| training_iteration         23 |
| test-mae              0.63959 |
| test-rmse                1.41 |
| train-mae             0.63982 |
| train-rmse            1.41245 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:11] [22]	train-rmse:1.41245	train-mae:0.63982	test-rmse:1.41000	test-mae:0.63959



Training finished iteration 24 at 2025-03-02 13:02:11. Total running time: 1min 9s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.43228 |
| time_total_s          67.5747 |
| training_iteration         24 |
| test-mae              0.59543 |
| test-rmse             1.36926 |
| train-mae             0.59564 |
| train-rmse            1.37169 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:11] [23]	train-rmse:1.37169	train-mae:0.59564	test-rmse:1.36926	test-mae:0.59543
[36m(XGBoostTrainer pid=18245)[0m [13:02:12] [24]	train-rmse:1.33755	train-mae:0.55707	test-rmse:1.33514	test-mae:0.55689



Training finished iteration 25 at 2025-03-02 13:02:12. Total running time: 1min 10s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.66498 |
| time_total_s          68.2397 |
| training_iteration         25 |
| test-mae              0.55689 |
| test-rmse             1.33514 |
| train-mae             0.55707 |
| train-rmse            1.33755 |
+-------------------------------+

Training finished iteration 26 at 2025-03-02 13:02:12. Total running time: 1min 10s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.28438 |
| time_total_s          68.5241 |
| training_iteration         26 |
| test-mae              0.52343 |
| test-rmse              1.3066 |
| train-mae             0.52359 |
| train-rmse            1.30901 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:12] [25]	train-rmse:1.30901	train-mae:0.52359	test-rmse:1.30660	test-mae:0.52343



Training finished iteration 27 at 2025-03-02 13:02:13. Total running time: 1min 11s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.50865 |
| time_total_s          69.0327 |
| training_iteration         27 |
| test-mae               0.4945 |
| test-rmse             1.28291 |
| train-mae             0.49463 |
| train-rmse            1.28532 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:13] [26]	train-rmse:1.28532	train-mae:0.49463	test-rmse:1.28291	test-mae:0.49450



Training finished iteration 28 at 2025-03-02 13:02:13. Total running time: 1min 11s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.50918 |
| time_total_s          69.5419 |
| training_iteration         28 |
| test-mae              0.46961 |
| test-rmse             1.26322 |
| train-mae             0.46972 |
| train-rmse            1.26563 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:13] [27]	train-rmse:1.26563	train-mae:0.46972	test-rmse:1.26322	test-mae:0.46961



Training finished iteration 29 at 2025-03-02 13:02:14. Total running time: 1min 12s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.46317 |
| time_total_s          70.0051 |
| training_iteration         29 |
| test-mae              0.44831 |
| test-rmse             1.24691 |
| train-mae              0.4484 |
| train-rmse            1.24934 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:14] [28]	train-rmse:1.24934	train-mae:0.44840	test-rmse:1.24691	test-mae:0.44831



Training finished iteration 30 at 2025-03-02 13:02:14. Total running time: 1min 12s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.44513 |
| time_total_s          70.4502 |
| training_iteration         30 |
| test-mae              0.43017 |
| test-rmse             1.23321 |
| train-mae             0.43024 |
| train-rmse            1.23564 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:14] [29]	train-rmse:1.23564	train-mae:0.43024	test-rmse:1.23321	test-mae:0.43017



Training finished iteration 31 at 2025-03-02 13:02:15. Total running time: 1min 13s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.51567 |
| time_total_s          70.9659 |
| training_iteration         31 |
| test-mae              0.41474 |
| test-rmse             1.22208 |
| train-mae              0.4148 |
| train-rmse            1.22448 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:15] [30]	train-rmse:1.22448	train-mae:0.41480	test-rmse:1.22208	test-mae:0.41474



Training finished iteration 32 at 2025-03-02 13:02:15. Total running time: 1min 13s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48416 |
| time_total_s          71.4501 |
| training_iteration         32 |
| test-mae              0.40163 |
| test-rmse             1.21273 |
| train-mae             0.40168 |
| train-rmse            1.21516 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:15] [31]	train-rmse:1.21516	train-mae:0.40168	test-rmse:1.21273	test-mae:0.40163



Training finished iteration 33 at 2025-03-02 13:02:16. Total running time: 1min 14s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s       0.4866 |
| time_total_s          71.9367 |
| training_iteration         33 |
| test-mae              0.39042 |
| test-rmse             1.20476 |
| train-mae             0.39046 |
| train-rmse             1.2072 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:16] [32]	train-rmse:1.20720	train-mae:0.39046	test-rmse:1.20476	test-mae:0.39042



Training finished iteration 34 at 2025-03-02 13:02:16. Total running time: 1min 14s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.50518 |
| time_total_s          72.4418 |
| training_iteration         34 |
| test-mae              0.38113 |
| test-rmse             1.19824 |
| train-mae             0.38116 |
| train-rmse            1.20069 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:16] [33]	train-rmse:1.20069	train-mae:0.38116	test-rmse:1.19824	test-mae:0.38113



Training finished iteration 35 at 2025-03-02 13:02:17. Total running time: 1min 15s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.49043 |
| time_total_s          72.9323 |
| training_iteration         35 |
| test-mae              0.37319 |
| test-rmse             1.19276 |
| train-mae             0.37323 |
| train-rmse             1.1952 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:17] [34]	train-rmse:1.19520	train-mae:0.37323	test-rmse:1.19276	test-mae:0.37319



Training finished iteration 36 at 2025-03-02 13:02:17. Total running time: 1min 15s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.47146 |
| time_total_s          73.4037 |
| training_iteration         36 |
| test-mae              0.36656 |
| test-rmse             1.18834 |
| train-mae             0.36659 |
| train-rmse            1.19077 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:17] [35]	train-rmse:1.19077	train-mae:0.36659	test-rmse:1.18834	test-mae:0.36656



Training finished iteration 37 at 2025-03-02 13:02:18. Total running time: 1min 16s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.49805 |
| time_total_s          73.9018 |
| training_iteration         37 |
| test-mae              0.36104 |
| test-rmse             1.18466 |
| train-mae             0.36106 |
| train-rmse             1.1871 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:18] [36]	train-rmse:1.18710	train-mae:0.36106	test-rmse:1.18466	test-mae:0.36104



Training finished iteration 38 at 2025-03-02 13:02:18. Total running time: 1min 16s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.45949 |
| time_total_s          74.3613 |
| training_iteration         38 |
| test-mae              0.35632 |
| test-rmse             1.18147 |
| train-mae             0.35633 |
| train-rmse            1.18387 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:18] [37]	train-rmse:1.18387	train-mae:0.35633	test-rmse:1.18147	test-mae:0.35632



Training finished iteration 39 at 2025-03-02 13:02:19. Total running time: 1min 17s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48268 |
| time_total_s           74.844 |
| training_iteration         39 |
| test-mae               0.3524 |
| test-rmse             1.17893 |
| train-mae             0.35241 |
| train-rmse            1.18131 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:19] [38]	train-rmse:1.18131	train-mae:0.35241	test-rmse:1.17893	test-mae:0.35240



Training finished iteration 40 at 2025-03-02 13:02:19. Total running time: 1min 17s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.45853 |
| time_total_s          75.3025 |
| training_iteration         40 |
| test-mae              0.34906 |
| test-rmse             1.17663 |
| train-mae             0.34906 |
| train-rmse            1.17897 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:19] [39]	train-rmse:1.17897	train-mae:0.34906	test-rmse:1.17663	test-mae:0.34906



Training finished iteration 41 at 2025-03-02 13:02:20. Total running time: 1min 18s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.46584 |
| time_total_s          75.7683 |
| training_iteration         41 |
| test-mae              0.34626 |
| test-rmse             1.17472 |
| train-mae             0.34626 |
| train-rmse            1.17702 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:20] [40]	train-rmse:1.17702	train-mae:0.34626	test-rmse:1.17472	test-mae:0.34626



Training finished iteration 42 at 2025-03-02 13:02:20. Total running time: 1min 18s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.45455 |
| time_total_s          76.2229 |
| training_iteration         42 |
| test-mae               0.3438 |
| test-rmse             1.17317 |
| train-mae             0.34379 |
| train-rmse             1.1754 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:20] [41]	train-rmse:1.17540	train-mae:0.34379	test-rmse:1.17317	test-mae:0.34380
[36m(XGBoostTrainer pid=18245)[0m [13:02:21] [42]	train-rmse:1.17395	train-mae:0.34169	test-rmse:1.17174	test-mae:0.34170



Training finished iteration 43 at 2025-03-02 13:02:21. Total running time: 1min 19s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.45799 |
| time_total_s          76.6809 |
| training_iteration         43 |
| test-mae               0.3417 |
| test-rmse             1.17174 |
| train-mae             0.34169 |
| train-rmse            1.17395 |
+-------------------------------+

Training finished iteration 44 at 2025-03-02 13:02:21. Total running time: 1min 19s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48024 |
| time_total_s          77.1611 |
| training_iteration         44 |
| test-mae              0.33996 |
| test-rmse             1.17056 |
| train-mae             0.33995 |
| train-rmse            1.17278 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:21] [43]	train-rmse:1.17278	train-mae:0.33995	test-rmse:1.17056	test-mae:0.33996



Training finished iteration 45 at 2025-03-02 13:02:21. Total running time: 1min 20s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.45103 |
| time_total_s          77.6121 |
| training_iteration         45 |
| test-mae              0.33852 |
| test-rmse             1.16958 |
| train-mae              0.3385 |
| train-rmse            1.17175 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:21] [44]	train-rmse:1.17175	train-mae:0.33850	test-rmse:1.16958	test-mae:0.33852



Training finished iteration 46 at 2025-03-02 13:02:22. Total running time: 1min 20s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.48068 |
| time_total_s          78.0928 |
| training_iteration         46 |
| test-mae              0.33722 |
| test-rmse             1.16882 |
| train-mae             0.33719 |
| train-rmse            1.17096 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:22] [45]	train-rmse:1.17096	train-mae:0.33719	test-rmse:1.16882	test-mae:0.33722



Training finished iteration 47 at 2025-03-02 13:02:22. Total running time: 1min 21s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.42343 |
| time_total_s          78.5162 |
| training_iteration         47 |
| test-mae              0.33616 |
| test-rmse             1.16807 |
| train-mae             0.33614 |
| train-rmse            1.17022 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:22] [46]	train-rmse:1.17022	train-mae:0.33614	test-rmse:1.16807	test-mae:0.33616



Training finished iteration 48 at 2025-03-02 13:02:23. Total running time: 1min 21s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.43369 |
| time_total_s          78.9499 |
| training_iteration         48 |
| test-mae              0.33522 |
| test-rmse             1.16745 |
| train-mae             0.33519 |
| train-rmse            1.16959 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:23] [47]	train-rmse:1.16959	train-mae:0.33519	test-rmse:1.16745	test-mae:0.33522



Training finished iteration 49 at 2025-03-02 13:02:23. Total running time: 1min 21s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.44018 |
| time_total_s          79.3901 |
| training_iteration         49 |
| test-mae              0.33449 |
| test-rmse             1.16699 |
| train-mae             0.33446 |
| train-rmse            1.16914 |
+-------------------------------+


[36m(XGBoostTrainer pid=18245)[0m [13:02:23] [48]	train-rmse:1.16914	train-mae:0.33446	test-rmse:1.16699	test-mae:0.33449



Training finished iteration 50 at 2025-03-02 13:02:24. Total running time: 1min 22s
+-------------------------------+
| Training result               |
+-------------------------------+
| checkpoint_dir_name           |
| time_this_iter_s      0.41405 |
| time_total_s          79.8042 |
| training_iteration         50 |
| test-mae              0.33377 |
| test-rmse             1.16643 |
| train-mae             0.33374 |
| train-rmse            1.16855 |
+-------------------------------+

Training finished iteration 51 at 2025-03-02 13:02:24. Total running time: 1min 22s
+-----------------------------------------+
| Training result                         |
+-----------------------------------------+
| checkpoint_dir_name   checkpoint_000000 |
| time_this_iter_s                0.00728 |
| time_total_s                   79.81144 |
| training_iteration                   51 |
| test-mae                        0.33377 |
| test-rmse                       1.16643 |
| train-mae               

[36m(XGBoostTrainer pid=18245)[0m [13:02:24] [49]	train-rmse:1.16855	train-mae:0.33374	test-rmse:1.16643	test-mae:0.33377
[36m(RayTrainWorker pid=18342)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/XGBoostTrainer_2025-03-02_13-01-01/XGBoostTrainer_649e6_00000_0_2025-03-02_13-01-01/checkpoint_000000)
2025-03-02 13:02:25,474	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/XGBoostTrainer_2025-03-02_13-01-01' in 0.0056s.



Training completed after 51 iterations at 2025-03-02 13:02:25. Total running time: 1min 23s

