# Import Libraries

In [1]:
import pandas as pd 
import numpy as np 
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pickle

# Model

In [2]:
directory = '../data/'

Mounted at /content/drive


In [3]:
# Load the data split from file
with open(directory + 'data_split.pkl', 'rb') as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

In [4]:
# Create the LightGBM datasets
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
# Define the model parameters
params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'learning_rate': 0.05,
    'num_leaves': 31,
    'max_depth': -1,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

# Train the model
model = lgb.train(params, train_data, valid_sets=[valid_data], num_boost_round=1000)

# Model Evaluation
# Make predictions on the test set
y_pred = model.predict(X_test, num_iteration=model.best_iteration)

In [7]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f' Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R-squared error: {r2}')

 Mean Squared Error: 0.2284191969094807
Mean Absolute Error: 0.09534924616511659
R-squared error: 0.7584068025549024


# Export

In [6]:
# Save predictions and metrics
results = {
    'y_pred': y_pred,
    'mse': mse,
    'mae': mae,
    'r2': r2
}

# Save the predictions
with open(directory + 'lightgbm_results.pkl', 'wb') as file:
    pickle.dump(results, file)