In [2]:
import lightgbm
lightgbm.__version__

'4.3.0'

## Python API 

In [9]:
import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target
# X.shape, y.shape

# 데이터셋 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# 이게 핵심 코드
train_data = lgb.Dataset(X_train, label = y_train)
test_data = lgb.Dataset(X_test, label = y_test)

# type(train_data)
params = {
    'objective': 'multiclass',
    'num_class': 3,  # Number of classes in the dataset
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

# 모델 학습
num_round = 5 # 경사하강법, learning_rate
model = lgb.train(params, train_data, num_round, valid_sets = [test_data])

# 예측
y_pred = model.predict(X_test)
y_pred_max = [list(x).index(max(x)) for x in y_pred]

# 모델 평가
accuracy = accuracy_score(y_test, y_pred_max)
print("Accuracy:", accuracy)

Accuracy: 0.9666666666666667


## Python Scikit-Learn API

In [11]:
import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target
# X.shape, y.shape

# 데이터셋 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

# 모델 학습
lgb_clf = lgb.LGBMClassifier(objective='multiclass',
                             num_class=3,  # Number of classes in the dataset
                             num_leaves=31,
                             learning_rate=0.05,
                             feature_fraction=0.9,
                             bagging_fraction=0.8,
                             bagging_freq=5)

lgb_clf.fit(X_train, y_train)

y_pred = lgb_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 91
[LightGBM] [Info] Number of data points in the train set: 120, number of used features: 4
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.073920
[LightGBM] [Info] Start training from score -1.123930


1.0

In [2]:
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create XGBoost Classifier
xgb_classifier = xgb.XGBClassifier(objective='multi:softmax',
                                   num_class=3,  # Number of classes in the dataset
                                   max_depth=6,
                                   learning_rate=0.1,
                                   n_estimators=100,
                                   subsample=0.9,
                                   colsample_bytree=0.9,
                                   gamma=0.1)

# Train the model
xgb_classifier.fit(X_train, y_train)

# Predict
y_pred = xgb_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [3]:
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the dataset into DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set parameters for XGBoost
param = {
    'objective': 'multi:softmax',  # Multiclass classification
    'num_class': 3,  # Number of classes in the dataset
    'max_depth': 6,
    'learning_rate': 0.1,
    'subsample': 0.9,
    'colsample_bytree': 0.9,
    'gamma': 0.1
}

# Train the model
num_round = 100
bst = xgb.train(param, dtrain, num_round)

# Predict
y_pred = bst.predict(dtest)

# Convert float predictions to integers
y_pred = [int(round(pred)) for pred in y_pred]

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0
