In [1]:
import sys
import os

# Einen Ordner hochgehen (du bist in /notebooks, Modell liegt eine Ebene dar√ºber)
os.chdir("..")

# src/ zum Python-Pfad hinzuf√ºgen
sys.path.append(os.getcwd())

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
from lightgbm import LGBMRegressor
import itertools
import os
import joblib

In [3]:
# ============================================
# üî• FULL WORKING LIGHTGBM TUNING PIPELINE üî•
# Kein Preprocessor n√∂tig ‚Äì alles hier drin.
# ============================================

from src.data.load_data import load_data
from sklearn.metrics import mean_absolute_error
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor

import pandas as pd
import numpy as np
import itertools
import joblib
import os

# =============================
# Konstanten
# =============================
TARGET_END = "Auftragsende_IST"
START_COL = "Auftragseingang"
DATE_COLS = [
    "Auftragseingang",
    "Auftragsende_SOLL",
    "AFO_Start_SOLL",
    "AFO_Ende_SOLL",
    "AFO_Start_IST",
    "AFO_Ende_IST",
]

# =============================
# 1) Daten laden
# =============================
data = load_data()

# Datumsfelder ‚Üí Datetime
for col in DATE_COLS + [TARGET_END]:
    data[col] = pd.to_datetime(data[col], errors="coerce")

# g√ºltige rows filtern
mask = (~data[TARGET_END].isna()) & (~data[START_COL].isna())
data = data[mask].copy()

start_dt = data[START_COL]

# Dauer in Tagen als Target
y = (data[TARGET_END] - start_dt).dt.total_seconds() / 86400.0
y = y.astype("float32")

# =============================
# 2) Date-Features erstellen
# =============================
for col in DATE_COLS:
    data[f"{col}_dow"] = data[col].dt.dayofweek
    data[f"{col}_hour"] = data[col].dt.hour
    data[f"{col}_day"]  = data[col].dt.day
    data[f"{col}_month"] = data[col].dt.month
    data[f"{col}_week"] = data[col].dt.isocalendar().week.astype(int)

# Originaldatum remove
data = data.drop(columns=DATE_COLS + [TARGET_END])

# IDs entfernen
for col in ["AuftragsID", "BauteilID", "MaschinenID"]:
    if col in data.columns:
        data = data.drop(columns=[col])

# =============================
# 3) Preprocessor definieren
# =============================
categorical = data.select_dtypes(include=["object"]).columns.tolist()
numeric     = data.select_dtypes(include=[np.number]).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", Pipeline([
            ("imputer", SimpleImputer(strategy="most_frequent")),
            ("onehot", OneHotEncoder(handle_unknown="ignore")),
        ]), categorical),

        ("num", Pipeline([
            ("imputer", SimpleImputer(strategy="median")),
        ]), numeric),
    ]
)

# =============================
# 4) Train/Test Split
# =============================
X_train, X_test, y_train, y_test = train_test_split(
    data, y, test_size=0.35, random_state=32
)

# =============================
# 5) GRID-TUNING
# =============================
param_grid = {
    "n_estimators": [350, 470],         # 2 Werte
    "num_leaves": [96, 128, 256],       # 3 Werte
    "min_child_samples": [30, 60],      # 2 Werte
    "subsample": [0.85, 0.9],           # 2 Werte
    "colsample_bytree": [0.85, 0.9],    # 2 Werte
}

combos = list(itertools.product(
    param_grid["n_estimators"],
    param_grid["num_leaves"],
    param_grid["min_child_samples"],
    param_grid["subsample"],
    param_grid["colsample_bytree"],
))

print(f"\nüîç Starte Tuning √ºber {len(combos)} Kombinationen...\n")

best_mae = 99999
best_model = None
best_params = None

for i, (N, L, M, SUB, COL) in enumerate(combos, 1):

    print(f"üîÅ Test {i}/{len(combos)}")

    model = LGBMRegressor(
        n_estimators=N,
        learning_rate=0.03,
        num_leaves=L,
        min_child_samples=M,
        subsample=SUB,
        colsample_bytree=COL,
        n_jobs=-1,
        random_state=42
    )

    pipe_tmp = Pipeline([
        ("prep", preprocessor),
        ("model", model)
    ])

    pipe_tmp.fit(X_train, y_train)
    preds = pipe_tmp.predict(X_test)
    mae = mean_absolute_error(y_test, preds)

    print(f"‚û°Ô∏è MAE {mae:.3f} | n={N}, leaves={L}, min_child={M}, subs={SUB}, col={COL}")

    if mae < best_mae:
        best_mae = mae
        best_params = (N, L, M, SUB, COL)
        best_model = pipe_tmp

# =============================
# 6) Ergebnis
# =============================
print("\n==============================")
print("üèÜ BESTE PARAMETER GEFUNDEN")
print("==============================")
print("MAE:", best_mae)
print("n_estimators:", best_params[0])
print("num_leaves:", best_params[1])
print("min_child_samples:", best_params[2])
print("subsample:", best_params[3])
print("colsample_bytree:", best_params[4])

# Speichern
os.makedirs("models/lightgbm/pipeline", exist_ok=True)
joblib.dump(best_model, "models/lightgbm/pipeline/best_lgbm_pipeline.pkl")

print("\nüì¶ Modells gespeichert!")

Spalten im DataFrame:
['AuftragsID', 'BauteilID', 'Bauteilbezeichnung', 'Auftragseingang', 'Priorit√§t', 'Auftragsende_SOLL', 'Arbeitsschritt', 'Arbeitsschrittbezeichnung', 'AFO_Start_SOLL', 'AFO_Ende_SOLL', 'AFO_Start_IST', 'AFO_Ende_IST', 'MaschinenID', 'Maschinenbezeichnung', 'Auftragsende_IST']

üîç Starte Tuning √ºber 48 Kombinationen...

üîÅ Test 1/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014034 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 31.901 | n=350, leaves=96, min_child=30, subs=0.85, col=0.85
üîÅ Test 2/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013615 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 32.099 | n=350, leaves=96, min_child=30, subs=0.85, col=0.9
üîÅ Test 3/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013703 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 31.901 | n=350, leaves=96, min_child=30, subs=0.9, col=0.85
üîÅ Test 4/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013885 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 32.099 | n=350, leaves=96, min_child=30, subs=0.9, col=0.9
üîÅ Test 5/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 32.638 | n=350, leaves=96, min_child=60, subs=0.85, col=0.85
üîÅ Test 6/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014153 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 32.208 | n=350, leaves=96, min_child=60, subs=0.85, col=0.9
üîÅ Test 7/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013277 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 32.638 | n=350, leaves=96, min_child=60, subs=0.9, col=0.85
üîÅ Test 8/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014335 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 32.208 | n=350, leaves=96, min_child=60, subs=0.9, col=0.9
üîÅ Test 9/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013664 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.836 | n=350, leaves=128, min_child=30, subs=0.85, col=0.85
üîÅ Test 10/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014028 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.484 | n=350, leaves=128, min_child=30, subs=0.85, col=0.9
üîÅ Test 11/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013472 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.836 | n=350, leaves=128, min_child=30, subs=0.9, col=0.85
üîÅ Test 12/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013845 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.484 | n=350, leaves=128, min_child=30, subs=0.9, col=0.9
üîÅ Test 13/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013460 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.986 | n=350, leaves=128, min_child=60, subs=0.85, col=0.85
üîÅ Test 14/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014004 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.909 | n=350, leaves=128, min_child=60, subs=0.85, col=0.9
üîÅ Test 15/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013009 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.986 | n=350, leaves=128, min_child=60, subs=0.9, col=0.85
üîÅ Test 16/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.909 | n=350, leaves=128, min_child=60, subs=0.9, col=0.9
üîÅ Test 17/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012640 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.515 | n=350, leaves=256, min_child=30, subs=0.85, col=0.85
üîÅ Test 18/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014730 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.441 | n=350, leaves=256, min_child=30, subs=0.85, col=0.9
üîÅ Test 19/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012788 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.515 | n=350, leaves=256, min_child=30, subs=0.9, col=0.85
üîÅ Test 20/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014422 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.441 | n=350, leaves=256, min_child=30, subs=0.9, col=0.9
üîÅ Test 21/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014384 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.473 | n=350, leaves=256, min_child=60, subs=0.85, col=0.85
üîÅ Test 22/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013993 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.128 | n=350, leaves=256, min_child=60, subs=0.85, col=0.9
üîÅ Test 23/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013016 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.473 | n=350, leaves=256, min_child=60, subs=0.9, col=0.85
üîÅ Test 24/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014587 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 27.128 | n=350, leaves=256, min_child=60, subs=0.9, col=0.9
üîÅ Test 25/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013365 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 29.985 | n=470, leaves=96, min_child=30, subs=0.85, col=0.85
üîÅ Test 26/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014003 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.248 | n=470, leaves=96, min_child=30, subs=0.85, col=0.9
üîÅ Test 27/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012889 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 29.985 | n=470, leaves=96, min_child=30, subs=0.9, col=0.85
üîÅ Test 28/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013890 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 30.248 | n=470, leaves=96, min_child=30, subs=0.9, col=0.9
üîÅ Test 29/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013448 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 29.992 | n=470, leaves=96, min_child=60, subs=0.85, col=0.85
üîÅ Test 30/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013635 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 29.987 | n=470, leaves=96, min_child=60, subs=0.85, col=0.9
üîÅ Test 31/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012763 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 29.992 | n=470, leaves=96, min_child=60, subs=0.9, col=0.85
üîÅ Test 32/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 29.987 | n=470, leaves=96, min_child=60, subs=0.9, col=0.9
üîÅ Test 33/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013295 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.918 | n=470, leaves=128, min_child=30, subs=0.85, col=0.85
üîÅ Test 34/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013400 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.825 | n=470, leaves=128, min_child=30, subs=0.85, col=0.9
üîÅ Test 35/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.918 | n=470, leaves=128, min_child=30, subs=0.9, col=0.85
üîÅ Test 36/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014387 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.825 | n=470, leaves=128, min_child=30, subs=0.9, col=0.9
üîÅ Test 37/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012854 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.838 | n=470, leaves=128, min_child=60, subs=0.85, col=0.85
üîÅ Test 38/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013751 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.815 | n=470, leaves=128, min_child=60, subs=0.85, col=0.9
üîÅ Test 39/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012816 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.838 | n=470, leaves=128, min_child=60, subs=0.9, col=0.85
üîÅ Test 40/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013904 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 28.815 | n=470, leaves=128, min_child=60, subs=0.9, col=0.9
üîÅ Test 41/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012328 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 25.511 | n=470, leaves=256, min_child=30, subs=0.85, col=0.85
üîÅ Test 42/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014248 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 25.036 | n=470, leaves=256, min_child=30, subs=0.85, col=0.9
üîÅ Test 43/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012771 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 25.511 | n=470, leaves=256, min_child=30, subs=0.9, col=0.85
üîÅ Test 44/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013204 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 25.036 | n=470, leaves=256, min_child=30, subs=0.9, col=0.9
üîÅ Test 45/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012812 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 25.263 | n=470, leaves=256, min_child=60, subs=0.85, col=0.85
üîÅ Test 46/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013365 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 24.882 | n=470, leaves=256, min_child=60, subs=0.85, col=0.9
üîÅ Test 47/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 25.263 | n=470, leaves=256, min_child=60, subs=0.9, col=0.85
üîÅ Test 48/48
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012978 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 734
[LightGBM] [Info] Number of data points in the train set: 914443, number of used features: 52
[LightGBM] [Info] Start training from score 265.735938




‚û°Ô∏è MAE 24.882 | n=470, leaves=256, min_child=60, subs=0.9, col=0.9

üèÜ BESTE PARAMETER GEFUNDEN
MAE: 24.881507171181184
n_estimators: 470
num_leaves: 256
min_child_samples: 60
subsample: 0.85
colsample_bytree: 0.9

üì¶ Modells gespeichert!
