In [11]:
import pandas as pd
import numpy as np

def engineer_features(df):
    # 1. Garage-related
    df["GarageScore"] = df["GarageCars"] * df["GarageArea"]
    df["GarageAgeDiff"] = df["YrSold"] - df["GarageYrBlt"]
    df["GarageAgeDiff"] = df["GarageAgeDiff"].replace({np.nan: -1})
    df["GarageStyleCombo"] = df["GarageType"].fillna("None") + "_" + \
                             df["GarageFinish"].fillna("None") + "_" + \
                             df["GarageQual"].fillna("None") + "_" + \
                             df["GarageCond"].fillna("None")

    # 2. Bathroom combination
    df["TotalBath"] = (
        df["FullBath"] + 0.5 * df["HalfBath"] +
        df["BsmtFullBath"] + 0.5 * df["BsmtHalfBath"]
    )

    # 3. House age and remodel
    df["HouseAge"] = df["YrSold"] - df["YearBuilt"]
    df["RemodelAge"] = df["YrSold"] - df["YearRemodAdd"]
    df["IsRemodeled"] = (df["YearBuilt"] != df["YearRemodAdd"]).astype(int)

    # 4. Total square footage
    df["TotalSqFeet"] = df["GrLivArea"] + df["TotalBsmtSF"]

    # 5. Porch square footage
    df["TotalPorchSF"] = (
        df["OpenPorchSF"] + df["EnclosedPorch"] +
        df["3SsnPorch"] + df["ScreenPorch"]
    )

    # 6. Overall score
    df["OverallScore"] = df["OverallQual"] * df["OverallCond"]

    # 7. Total floors and rooms
    df["TotalFloors"] = df["1stFlrSF"] + df["2ndFlrSF"]
    df["TotalRooms"] = df["TotRmsAbvGrd"] + df["BedroomAbvGr"]

    return df


In [12]:
import pandas as pd
import numpy as np
# train.csv ve test.csv dosyalarƒ±nƒ± y√ºklediysen:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")




In [14]:
from sklearn.preprocessing import MinMaxScaler

def clean_and_encode(train, test):
    # SalePrice hedef deƒüi≈ükeni ayrƒ±lƒ±yor
    y = train["SalePrice"]
    train = train.drop("SalePrice", axis=1)

    # Yeni feature engineering uygulanƒ±yor
    train = engineer_features(train)
    test = engineer_features(test)

    # Train ve test birle≈üiyor
    all_data = pd.concat([train, test], axis=0)

    # Kullanƒ±cƒ±nƒ±n i≈üaretlediƒüi + otomatik belirlenen kategorik sayƒ±sallar
    pseudo_categoricals = [
        'OverallQual', 'GarageCars', 'FullBath', 'TotRmsAbvGrd', 'Fireplaces',
        'HalfBath', 'RsmtCullBath', 'BedroomAbvGr', 'KitchenAbvGr',
        'MSSubClass', 'OverallCond', 'BsmtFullBath', 'BsmtHalfBath',
        'PoolArea', 'MoSold', 'YrSold'
    ]

    none_fill = [
        'PoolQC', 'MiscFeature', 'Alley', 'Fence', 'FireplaceQu',
        'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond',
        'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
        'MasVnrType'
    ]
    for col in none_fill:
        if col in all_data.columns:
            all_data[col] = all_data[col].fillna("None")

    zero_fill = [
        'GarageYrBlt', 'GarageArea', 'GarageCars',
        'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
        'BsmtFullBath', 'BsmtHalfBath', 'MasVnrArea'
    ]
    for col in zero_fill:
        if col in all_data.columns:
            all_data[col] = all_data[col].fillna(0)

    # LotFrontage gruba g√∂re dolduruluyor
    if "LotFrontage" in all_data.columns and "Neighborhood" in all_data.columns:
        all_data["LotFrontage"] = all_data.groupby("Neighborhood")["LotFrontage"].transform(
            lambda x: x.fillna(x.median())
        )

    # Mod ile doldurulacaklar
    mode_fill = ['MSZoning', 'Electrical', 'KitchenQual', 'Exterior1st', 'Exterior2nd', 'SaleType', 'Functional']
    for col in mode_fill:
        if col in all_data.columns:
            all_data[col] = all_data[col].fillna(all_data[col].mode()[0])

    # Kalan eksikler ortalama ile
    all_data = all_data.fillna(all_data.mean(numeric_only=True))

    # Gereksiz kolon
    all_data = all_data.drop(columns=['Utilities'], errors='ignore')

    # Pseudo-kategorikleri string yap
    for col in pseudo_categoricals:
        if col in all_data.columns:
            all_data[col] = all_data[col].astype(str)

    # One-hot encoding
    all_data = pd.get_dummies(all_data)

    # MinMaxScaler ile normalizasyon
    scaler = MinMaxScaler()
    all_data_scaled = pd.DataFrame(
        scaler.fit_transform(all_data),
        columns=all_data.columns,
        index=all_data.index
    )

    # Geri ayƒ±r
    X_clean = all_data_scaled.iloc[:len(y)].copy()
    X_test_clean = all_data_scaled.iloc[len(y):].copy()

    return X_clean, X_test_clean, y



In [15]:
import pandas as pd

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [16]:
train = engineer_features(train)
test = engineer_features(test)


In [18]:
print("X_train temiz mi?", X_clean.isnull().sum().sum() == 0)
print("X_test temiz mi?", X_test_clean.isnull().sum().sum() == 0)
print("y_train temiz mi?", y_clean.isnull().sum() == 0)


X_train temiz mi? True
X_test temiz mi? True
y_train temiz mi? True


In [17]:
print("Kategorik (object) veri kaldƒ± mƒ±?", (X_clean.dtypes == 'object').sum())


Kategorik (object) veri kaldƒ± mƒ±? 0


In [19]:
print("T√ºm s√ºtunlar sayƒ±sal mƒ±?", all(np.issubdtype(dtype, np.number) for dtype in X_clean.dtypes))


T√ºm s√ºtunlar sayƒ±sal mƒ±? False


In [20]:
print("X ve y uzunluƒüu e≈üit mi?", len(X_clean) == len(y_clean))


X ve y uzunluƒüu e≈üit mi? True


S√ºtunlarƒ± sayƒ±sal hale getirelim 

In [21]:
non_numeric = X_clean.select_dtypes(exclude=[np.number])
print("Sayƒ±sal olmayan s√ºtunlar:", non_numeric.columns.tolist())


Sayƒ±sal olmayan s√ºtunlar: ['MSZoning_C (all)', 'MSZoning_FV', 'MSZoning_RH', 'MSZoning_RL', 'MSZoning_RM', 'Street_Grvl', 'Street_Pave', 'Alley_Grvl', 'Alley_None', 'Alley_Pave', 'LotShape_IR1', 'LotShape_IR2', 'LotShape_IR3', 'LotShape_Reg', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Low', 'LandContour_Lvl', 'LotConfig_Corner', 'LotConfig_CulDSac', 'LotConfig_FR2', 'LotConfig_FR3', 'LotConfig_Inside', 'LandSlope_Gtl', 'LandSlope_Mod', 'LandSlope_Sev', 'Neighborhood_Blmngtn', 'Neighborhood_Blueste', 'Neighborhood_BrDale', 'Neighborhood_BrkSide', 'Neighborhood_ClearCr', 'Neighborhood_CollgCr', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_NPkVill', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood

In [10]:
from sklearn.preprocessing import MinMaxScaler

def clean_and_encode(train, test):
    y = train["SalePrice"]
    train = train.drop("SalePrice", axis=1)

    all_data = pd.concat([train, test], axis=0)

    # Kullanƒ±cƒ±nƒ±n i≈üaretlediƒüi + otomatik belirlenen kategorik sayƒ±sallar
    pseudo_categoricals = [
        'OverallQual', 'GarageCars', 'FullBath', 'TotRmsAbvGrd', 'Fireplaces',
        'HalfBath', 'RsmtCullBath', 'BedroomAbvGr', 'KitchenAbvGr',
        'MSSubClass', 'OverallCond', 'BsmtFullBath', 'BsmtHalfBath',
        'PoolArea', 'MoSold', 'YrSold'
    ]

    none_fill = [
        'PoolQC', 'MiscFeature', 'Alley', 'Fence', 'FireplaceQu',
        'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond',
        'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
        'MasVnrType'
    ]
    for col in none_fill:
        all_data[col] = all_data[col].fillna("None")

    zero_fill = [
        'GarageYrBlt', 'GarageArea', 'GarageCars',
        'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
        'BsmtFullBath', 'BsmtHalfBath', 'MasVnrArea'
    ]
    for col in zero_fill:
        all_data[col] = all_data[col].fillna(0)

    # LotFrontage gruba g√∂re dolduruluyor
    all_data["LotFrontage"] = all_data.groupby("Neighborhood")["LotFrontage"].transform(
        lambda x: x.fillna(x.median())
    )

    mode_fill = ['MSZoning', 'Electrical', 'KitchenQual', 'Exterior1st', 'Exterior2nd', 'SaleType', 'Functional']
    for col in mode_fill:
        all_data[col] = all_data[col].fillna(all_data[col].mode()[0])

    all_data = all_data.fillna(all_data.mean(numeric_only=True))
    all_data = all_data.drop(columns=['Utilities'], errors='ignore')

    # Pseudo-kategorikleri string yap
    for col in pseudo_categoricals:
        if col in all_data.columns:
            all_data[col] = all_data[col].astype(str)

    # One-hot encoding
    all_data = pd.get_dummies(all_data)

    # MinMaxScaler ile normalizasyon (sadece sayƒ±sallar kaldƒ±)
    scaler = MinMaxScaler()
    all_data_scaled = pd.DataFrame(
        scaler.fit_transform(all_data),
        columns=all_data.columns,
        index=all_data.index
    )

    X_clean = all_data_scaled.iloc[:len(y)].copy()
    X_test_clean = all_data_scaled.iloc[len(y):].copy()

    return X_clean, X_test_clean, y



In [22]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

X_clean, X_test_clean, y_clean = clean_and_encode(train, test)


In [23]:
print("T√ºm s√ºtunlar sayƒ±sal mƒ±?", all(np.issubdtype(dtype, np.number) for dtype in X_clean.dtypes))

T√ºm s√ºtunlar sayƒ±sal mƒ±? True


In [24]:
import optuna
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_log_error
from xgboost import XGBRegressor
import numpy as np

# RMSLE fonksiyonu
def rmsle_func(y_true, y_pred):
    return np.sqrt(mean_squared_log_error(y_true, np.maximum(0, y_pred)))

# Objective fonksiyonu
def objective(trial):
    params = {
    "n_estimators": trial.suggest_int("n_estimators", 221, 223),
    "max_depth": trial.suggest_int("max_depth", 4, 4),  # zaten sabit
    "learning_rate": trial.suggest_float("learning_rate", 0.06370142103, 0.06382895140),
    "subsample": trial.suggest_float("subsample", 0.822180468, 0.823826475),
    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.892465403, 0.894252120),
    "gamma": trial.suggest_float("gamma", 0.00123973, 0.00124222),
    "random_state": 42,
    "tree_method": "hist"
}

    model = XGBRegressor(**params)

    # KFold ile manuel CV (3 katlƒ±)
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    rmsle_scores = []

    for train_idx, val_idx in kf.split(X_clean):
        X_train, X_val = X_clean.iloc[train_idx], X_clean.iloc[val_idx]
        y_train, y_val = y_clean.iloc[train_idx], y_clean.iloc[val_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        rmsle_scores.append(rmsle_func(y_val, y_pred))

    return np.mean(rmsle_scores)  # Artƒ±k POZƒ∞Tƒ∞F skor d√∂ner, direction="minimize" ile uyumlu

# Optuna √ßalƒ±≈ütƒ±rma
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# En iyi sonucu yaz
print("\n‚úÖ En iyi parametreler:", study.best_params)
print(f"üîç En d√º≈ü√ºk RMSLE skoru: {study.best_value:.5f}")


[I 2025-06-19 14:25:31,816] A new study created in memory with name: no-name-33e52d13-d505-4322-98a6-fca9f88c36f9
[I 2025-06-19 14:25:35,791] Trial 0 finished with value: 0.13331047502066826 and parameters: {'n_estimators': 223, 'max_depth': 4, 'learning_rate': 0.06374940324936736, 'subsample': 0.8229350976298081, 'colsample_bytree': 0.8942111689963909, 'gamma': 0.0012409690597274883}. Best is trial 0 with value: 0.13331047502066826.
[I 2025-06-19 14:25:39,724] Trial 1 finished with value: 0.13366570923519708 and parameters: {'n_estimators': 221, 'max_depth': 4, 'learning_rate': 0.06376963244267374, 'subsample': 0.8235633289565751, 'colsample_bytree': 0.8942116118070733, 'gamma': 0.001241296147538519}. Best is trial 0 with value: 0.13331047502066826.
[I 2025-06-19 14:25:43,570] Trial 2 finished with value: 0.13194569087892125 and parameters: {'n_estimators': 222, 'max_depth': 4, 'learning_rate': 0.06380494896449357, 'subsample': 0.822581777292526, 'colsample_bytree': 0.8928531009758691


‚úÖ En iyi parametreler: {'n_estimators': 222, 'max_depth': 4, 'learning_rate': 0.0637767126620895, 'subsample': 0.8228772661355781, 'colsample_bytree': 0.8931557375467725, 'gamma': 0.0012416662836039147}
üîç En d√º≈ü√ºk RMSLE skoru: 0.13139


In [25]:
study.optimize(objective, n_trials=250)

[I 2025-06-19 14:29:36,827] Trial 50 finished with value: 0.132343301392711 and parameters: {'n_estimators': 221, 'max_depth': 4, 'learning_rate': 0.06379129150926857, 'subsample': 0.8231264630184596, 'colsample_bytree': 0.8928797394330408, 'gamma': 0.0012421373903024476}. Best is trial 27 with value: 0.13139034756777287.
[I 2025-06-19 14:29:40,643] Trial 51 finished with value: 0.13221986267290473 and parameters: {'n_estimators': 221, 'max_depth': 4, 'learning_rate': 0.06376502700803577, 'subsample': 0.822746354382372, 'colsample_bytree': 0.8937609985996064, 'gamma': 0.0012419129894396157}. Best is trial 27 with value: 0.13139034756777287.
[I 2025-06-19 14:29:44,455] Trial 52 finished with value: 0.13212616357839327 and parameters: {'n_estimators': 222, 'max_depth': 4, 'learning_rate': 0.06380095637828798, 'subsample': 0.8227975020000498, 'colsample_bytree': 0.8938983803971663, 'gamma': 0.0012418133222281131}. Best is trial 27 with value: 0.13139034756777287.
[I 2025-06-19 14:29:48,36

In [26]:
study.optimize(objective, n_trials=250)

[I 2025-06-19 20:32:26,208] Trial 300 finished with value: 0.13131142929916875 and parameters: {'n_estimators': 222, 'max_depth': 4, 'learning_rate': 0.06373403055812574, 'subsample': 0.8227237778518471, 'colsample_bytree': 0.8939234010491445, 'gamma': 0.0012422145426129164}. Best is trial 171 with value: 0.1307258902368719.
[I 2025-06-19 20:32:30,046] Trial 301 finished with value: 0.13129151289370902 and parameters: {'n_estimators': 222, 'max_depth': 4, 'learning_rate': 0.06373251826542736, 'subsample': 0.8226897043875767, 'colsample_bytree': 0.8938301790562656, 'gamma': 0.001242061356762015}. Best is trial 171 with value: 0.1307258902368719.
[I 2025-06-19 20:32:33,868] Trial 302 finished with value: 0.13171237275041736 and parameters: {'n_estimators': 222, 'max_depth': 4, 'learning_rate': 0.0637303060355993, 'subsample': 0.8226501130123555, 'colsample_bytree': 0.894007877990979, 'gamma': 0.001242132931270385}. Best is trial 171 with value: 0.1307258902368719.
[I 2025-06-19 20:32:37,

In [29]:
import pandas as pd

threshold = 0.14

# 0.14'ten d√º≈ü√ºk skora sahip denemeleri al
good_trials = [t for t in study.trials if t.value is not None and t.value < threshold]
good_trials = sorted(good_trials, key=lambda t: t.value)

# üî¢ Listeyi DataFrame'e √ßevir
trials_df = pd.DataFrame([{"RMSLE": t.value, **t.params} for t in good_trials])

# üìä Her parametrenin min ve max deƒüerini i√ßeren √∂zet obje
min_max_summary = {}

for col in trials_df.columns:
    if col == "RMSLE":
        continue
    min_max_summary[col] = {
        "min": trials_df[col].min(),
        "max": trials_df[col].max()
    }

# üñ®Ô∏è Yazdƒ±r
print("\n‚úÖ Parametre Min/Max √ñzeti (RMSLE < 0.14 olanlar i√ßin):")
for param, values in min_max_summary.items():
    print(f"{param}: min = {values['min']}, max = {values['max']}")

# üßæ ƒ∞steƒüe baƒülƒ±: Denemeleri ve √∂zeti g√∂rmek i√ßin DataFrame d√∂nd√ºr
trials_df  # veya min_max_summary





‚úÖ Parametre Min/Max √ñzeti (RMSLE < 0.14 olanlar i√ßin):
n_estimators: min = 128, max = 694
max_depth: min = 3, max = 9
learning_rate: min = 0.019165137001864868, max = 0.1816767239307247
subsample: min = 0.5001709217810926, max = 0.7870808209826683
colsample_bytree: min = 0.5035535801926848, max = 0.8384171026743712


Unnamed: 0,RMSLE,n_estimators,max_depth,learning_rate,subsample,colsample_bytree
0,0.134232,489,6,0.030109,0.547974,0.538904
1,0.134666,496,6,0.025978,0.548746,0.543852
2,0.13477,487,4,0.02919,0.571174,0.546106
3,0.135025,364,4,0.056816,0.525279,0.770995
4,0.135235,467,5,0.035978,0.540308,0.781558
5,0.135315,472,5,0.028575,0.530589,0.793831
6,0.135339,500,6,0.027989,0.503265,0.563586
7,0.135586,493,6,0.023439,0.568984,0.54496
8,0.135625,435,3,0.060236,0.506858,0.604906
9,0.136331,375,4,0.046942,0.529468,0.75412


In [30]:
# RMSLE fonksiyonu
def rmsle_func2(y_true, y_pred):
    return np.sqrt(mean_squared_log_error(y_true, np.maximum(0, y_pred)))

# Objective fonksiyonu
def objective2(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 700),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.10),
        "subsample": trial.suggest_float("subsample", 0.5, 0.8),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 0.8),
        "random_state": 42,
        "tree_method": "hist"
    }

    model = XGBRegressor(**params)

    # KFold ile manuel CV (3 katlƒ±)
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    rmsle_scores2 = []

    for train_idx, val_idx in kf.split(X_clean):
        X_train, X_val = X_clean.iloc[train_idx], X_clean.iloc[val_idx]
        y_train, y_val = y_clean.iloc[train_idx], y_clean.iloc[val_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        rmsle_scores2.append(rmsle_func2(y_val, y_pred))

    return np.mean(rmsle_scores2)  # Artƒ±k POZƒ∞Tƒ∞F skor d√∂ner, direction="minimize" ile uyumlu

# Optuna √ßalƒ±≈ütƒ±rma
study = optuna.create_study(direction="minimize")
study.optimize(objective2, n_trials=50)

# En iyi sonucu yaz
print("\n‚úÖ En iyi parametreler:", study.best_params)
print(f"üîç En d√º≈ü√ºk RMSLE skoru: {study.best_value:.5f}")

[I 2025-06-19 12:07:11,336] A new study created in memory with name: no-name-35db12f1-fec7-40a2-9e96-32f91a7efe18
[I 2025-06-19 12:07:16,965] Trial 0 finished with value: 0.1466637815916251 and parameters: {'n_estimators': 189, 'max_depth': 10, 'learning_rate': 0.08666234673329146, 'subsample': 0.7511966192433286, 'colsample_bytree': 0.6505718695954363}. Best is trial 0 with value: 0.1466637815916251.
[I 2025-06-19 12:07:28,582] Trial 1 finished with value: 0.14151691539238256 and parameters: {'n_estimators': 524, 'max_depth': 8, 'learning_rate': 0.07453878990356756, 'subsample': 0.5678035858577171, 'colsample_bytree': 0.7841194833387289}. Best is trial 1 with value: 0.14151691539238256.
[I 2025-06-19 12:07:45,566] Trial 2 finished with value: 0.14310003274610097 and parameters: {'n_estimators': 640, 'max_depth': 10, 'learning_rate': 0.0525988882862962, 'subsample': 0.6289074103227212, 'colsample_bytree': 0.6151507082923544}. Best is trial 1 with value: 0.14151691539238256.
[I 2025-06-


‚úÖ En iyi parametreler: {'n_estimators': 698, 'max_depth': 3, 'learning_rate': 0.04693288882188203, 'subsample': 0.6042586435209938, 'colsample_bytree': 0.7629615716137506}
üîç En d√º≈ü√ºk RMSLE skoru: 0.13257


In [32]:
study.optimize(objective2, n_trials=100)

[I 2025-06-19 12:27:30,183] Trial 50 finished with value: 0.14510117353766236 and parameters: {'n_estimators': 597, 'max_depth': 10, 'learning_rate': 0.06206934185448353, 'subsample': 0.6720248884465294, 'colsample_bytree': 0.6827623087728693}. Best is trial 23 with value: 0.13316829146922565.
[I 2025-06-19 12:27:42,316] Trial 51 finished with value: 0.14093612349168547 and parameters: {'n_estimators': 616, 'max_depth': 8, 'learning_rate': 0.03521693911783199, 'subsample': 0.6722438076899052, 'colsample_bytree': 0.5247201193987633}. Best is trial 23 with value: 0.13316829146922565.
[I 2025-06-19 12:27:52,522] Trial 52 finished with value: 0.13985899235271637 and parameters: {'n_estimators': 575, 'max_depth': 7, 'learning_rate': 0.05981961378019297, 'subsample': 0.5011812349177048, 'colsample_bytree': 0.5819274369128947}. Best is trial 23 with value: 0.13316829146922565.
[I 2025-06-19 12:28:00,326] Trial 53 finished with value: 0.13988093346989686 and parameters: {'n_estimators': 534, '

In [31]:
# RMSLE fonksiyonu
def rmsle_func3(y_true, y_pred):
    return np.sqrt(mean_squared_log_error(y_true, np.maximum(0, y_pred)))

# Objective fonksiyonu
def objective3(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 650, 750),
        "max_depth": trial.suggest_int("max_depth", 3, 6),
        "learning_rate": trial.suggest_float("learning_rate", 0.042, 0.052),
        "subsample": trial.suggest_float("subsample", 0.55, 0.65),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.71, 0.81),
        "random_state": 42,
        "tree_method": "hist"
    }

    model = XGBRegressor(**params)

    # KFold ile manuel CV (3 katlƒ±)
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    rmsle_scores3 = []

    for train_idx, val_idx in kf.split(X_clean):
        X_train, X_val = X_clean.iloc[train_idx], X_clean.iloc[val_idx]
        y_train, y_val = y_clean.iloc[train_idx], y_clean.iloc[val_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        rmsle_scores3.append(rmsle_func3(y_val, y_pred))

    return np.mean(rmsle_scores3)  # Artƒ±k POZƒ∞Tƒ∞F skor d√∂ner, direction="minimize" ile uyumlu

# Optuna √ßalƒ±≈ütƒ±rma
study = optuna.create_study(direction="minimize")
study.optimize(objective3, n_trials=50)

# En iyi sonucu yaz
print("\n‚úÖ En iyi parametreler:", study.best_params)
print(f"üîç En d√º≈ü√ºk RMSLE skoru: {study.best_value:.5f}")

[I 2025-06-19 12:17:27,020] A new study created in memory with name: no-name-21664ed2-9c32-40d1-9f4f-f53ecf35da6c
[I 2025-06-19 12:17:37,772] Trial 0 finished with value: 0.13653374025254683 and parameters: {'n_estimators': 727, 'max_depth': 5, 'learning_rate': 0.04318566753166336, 'subsample': 0.5825565869731222, 'colsample_bytree': 0.7790839294303251}. Best is trial 0 with value: 0.13653374025254683.
[I 2025-06-19 12:17:48,854] Trial 1 finished with value: 0.13650602048633134 and parameters: {'n_estimators': 674, 'max_depth': 6, 'learning_rate': 0.04351295110212105, 'subsample': 0.607960905470717, 'colsample_bytree': 0.8057138478828815}. Best is trial 1 with value: 0.13650602048633134.
[I 2025-06-19 12:17:58,923] Trial 2 finished with value: 0.1357366576589806 and parameters: {'n_estimators': 698, 'max_depth': 5, 'learning_rate': 0.04834662146796772, 'subsample': 0.5992048770103681, 'colsample_bytree': 0.784418088491908}. Best is trial 2 with value: 0.1357366576589806.
[I 2025-06-19 


‚úÖ En iyi parametreler: {'n_estimators': 685, 'max_depth': 4, 'learning_rate': 0.04938412803048766, 'subsample': 0.6066477170565708, 'colsample_bytree': 0.7721560729268948}
üîç En d√º≈ü√ºk RMSLE skoru: 0.13317
