In [2]:
import lightgbm as lgb
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [3]:
# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Create LightGBM datasets
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

In [5]:
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9
}

In [6]:
# Train the model
num_round = 100
bst = lgb.train(params, train_data, num_round, valid_sets=[test_data])

[LightGBM] [Info] Number of positive: 393, number of negative: 407
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.491250 -> initscore=-0.035004
[LightGBM] [Info] Start training from score -0.035004


In [7]:
# Make predictions
y_pred = bst.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score

# Convert probabilities to binary predictions
y_pred_binary = [1 if p >= 0.5 else 0 for p in y_pred]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Accuracy: {accuracy}")

Accuracy: 0.89


In [9]:
# Save the model
bst.save_model('model.txt')

# Load the model
loaded_bst = lgb.Booster(model_file='model.txt')

In [10]:
import lightgbm as lgb
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Generate sample regression data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create LightGBM datasets
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# Set parameters for linear leaf tree model
params = {
    'objective': 'regression',
    'metric': 'mse',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0,
    'linear_tree': True,  # Enable linear tree
    'linear_lambda': 0.01,  # L2 regularization for linear model
    'max_depth': -1,  # No limit on tree depth
    'min_data_in_leaf': 20,
    'min_sum_hessian_in_leaf': 1e-3
}

# Train the model
num_round = 100
bst = lgb.train(params, train_data, num_round, valid_sets=[test_data])

# Make predictions
y_pred = bst.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Feature importance
feature_importance = bst.feature_importance()
feature_names = [f'feature_{i}' for i in range(X.shape[1])]
for i, importance in enumerate(feature_importance):
    print(f"{feature_names[i]}: {importance}")

# Save the model
bst.save_model('linear_leaf_tree_model.txt')

# Load the model (if needed)
loaded_bst = lgb.Booster(model_file='linear_leaf_tree_model.txt')

# Optionally, you can visualize the tree structure
# Note: This might not be as informative for linear leaf trees
# lgb.plot_tree(bst, tree_index=0)
# lgb.plot_importance(bst, max_num_features=20)

print("Model training and evaluation complete.")

Mean Squared Error: 1115.83424955278
feature_0: 254
feature_1: 274
feature_2: 46
feature_3: 27
feature_4: 315
feature_5: 16
feature_6: 311
feature_7: 18
feature_8: 35
feature_9: 12
feature_10: 168
feature_11: 235
feature_12: 18
feature_13: 40
feature_14: 18
feature_15: 211
feature_16: 22
feature_17: 309
feature_18: 29
feature_19: 18
Model training and evaluation complete.
