In [1]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor, Booster
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

print(f"lightgbm version: {lgb.__version__}")

lightgbm version: 3.2.1


In [2]:

# Load the data
# Replace 'train.csv' and 'test.csv' with your actual file paths
train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')

# Assuming 'target' is your target variable
# Modify these according to your actual feature and target columns
X = train_data.drop('target', axis=1)
y = train_data['target']

# Split training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:

# Initialize LightGBM model
model = LGBMRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    num_leaves=31,
    random_state=42
)

# Train the model
model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric='rmse',
)

# Make predictions on validation set
val_predictions = model.predict(X_val)

# Evaluate the model
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
val_r2 = r2_score(y_val, val_predictions)

print(f"Validation RMSE: {val_rmse:.4f}")
print(f"Validation R²: {val_r2:.4f}")


X_test = test_data.drop('target', axis=1)
y_test = test_data['target']
# Make predictions on test set
test_predictions = model.predict(X_test)

# Evaluate the model on test set
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))
test_r2 = r2_score(y_test, test_predictions)

print()
print(f"Test RMSE: {test_rmse:.4f}")
print(f"Test R²: {test_r2:.4f}")



[1]	valid_0's rmse: 146.933	valid_0's l2: 21589.4
[2]	valid_0's rmse: 138.5	valid_0's l2: 19182.4
[3]	valid_0's rmse: 131.029	valid_0's l2: 17168.5
[4]	valid_0's rmse: 124.401	valid_0's l2: 15475.6
[5]	valid_0's rmse: 118.509	valid_0's l2: 14044.3
[6]	valid_0's rmse: 113.109	valid_0's l2: 12793.7
[7]	valid_0's rmse: 108.017	valid_0's l2: 11667.7
[8]	valid_0's rmse: 103.255	valid_0's l2: 10661.6
[9]	valid_0's rmse: 99.1064	valid_0's l2: 9822.07
[10]	valid_0's rmse: 95.4278	valid_0's l2: 9106.46
[11]	valid_0's rmse: 91.9335	valid_0's l2: 8451.78
[12]	valid_0's rmse: 88.5862	valid_0's l2: 7847.52
[13]	valid_0's rmse: 85.5227	valid_0's l2: 7314.14
[14]	valid_0's rmse: 82.6334	valid_0's l2: 6828.28
[15]	valid_0's rmse: 79.9964	valid_0's l2: 6399.43
[16]	valid_0's rmse: 77.7225	valid_0's l2: 6040.78
[17]	valid_0's rmse: 75.6129	valid_0's l2: 5717.31
[18]	valid_0's rmse: 73.5668	valid_0's l2: 5412.08
[19]	valid_0's rmse: 71.6545	valid_0's l2: 5134.37
[20]	valid_0's rmse: 69.5701	valid_0's l2:

In [4]:
#save the model
model.booster_.save_model('model.txt')
print("Model saved as model.txt")

Model saved as model.txt


In [5]:
model2 = Booster(
    model_file='model.txt'
)

X_test = test_data.drop('target', axis=1)
y_test = test_data['target']
# Make predictions on test set
test_predictions = model2.predict(X_test)

# Evaluate the model
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))
test_r2 = r2_score(y_test, test_predictions)

print()
print(f"Test RMSE: {test_rmse:.4f}")
print(f"Test R²: {test_r2:.4f}")




Test RMSE: 26.7448
Test R²: 0.9727
