In [None]:
pip install optuna

In [None]:
import pandas as pd
import optuna
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# ---------- data ----------
df = pd.read_csv('/content/drive/MyDrive/ML_22/data_all_2020.csv')
le = LabelEncoder()
y = le.fit_transform(df['crop_types'])
X = df.drop(columns=['crop_types']).select_dtypes(include=[np.number])

# ---------- objective ----------
def objective(trial, data=X, target=y):
    Xtr, Xte, ytr, yte = train_test_split(
        data, target, test_size=0.33, stratify=target, shuffle=True, random_state=42
    )

    params = {
        'tree_method': 'hist',   # use 'gpu_hist' if your xgboost build supports CUDA
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree',
                                                      [0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]),
        'subsample': trial.suggest_categorical('subsample',
                                               [0.4,0.5,0.6,0.7,0.8,1.0]),
        'learning_rate': trial.suggest_categorical('learning_rate',
                                                   [0.008,0.01,0.012,0.014,0.016,0.018,0.02]),
        'max_depth': trial.suggest_categorical('max_depth',
                                               [5,7,9,11,13,15,17]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
        'n_estimators': 500,     # fixed since early stopping isn’t available
        'objective': 'multi:softmax',
        'num_class': int(np.unique(target).size),
        'eval_metric': 'mlogloss',
        'n_jobs': -1,
    }

    model = xgb.XGBClassifier(**params)
    model.fit(Xtr, ytr, eval_set=[(Xte, yte)], verbose=False)

    preds = model.predict(Xte)
    rmse = float(np.sqrt(mean_squared_error(yte, preds)))  # <-- no 'squared='
    return rmse

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=3)

print('Number of finished trials:', len(study.trials))
print('Best trial params:', study.best_trial.params)
print('Best RMSE:', study.best_trial.value)
