In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import mean_squared_error


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [3]:
# Step 1: Generate synthetic data
num_nodes = 100
data = {
    "node_id": np.arange(1, num_nodes + 1),
    "node_state": np.random.choice(["active", "inactive", "sleep"], num_nodes),
    "traffic_load": np.random.uniform(0, 100, num_nodes),
    "energy_level": np.random.uniform(0, 100, num_nodes),
    "transmission_delay": np.random.uniform(0, 10, num_nodes)
}
df = pd.DataFrame(data)

In [4]:
# Step 2: Preprocess the data
# Encode categorical variables (node_state)
df = pd.get_dummies(df, columns=["node_state"], drop_first=True)

# Define features (X) and target (y)
X = df.drop(columns=["transmission_delay"])  # Features
y = df["transmission_delay"]  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Step 3: Train a LightGBM model
# Create Dataset for LightGBM
train_data = lgb.Dataset(X_train, label=y_train)

# Set hyperparameters
params = {
    "objective": "regression",  # Regression task
    "metric": "mse",  # Mean Squared Error
    "boosting_type": "gbdt",  # Gradient Boosting Decision Tree
    "learning_rate": 0.1,
    "num_leaves": 31,
    "max_depth": 3,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "seed": 42
}

# Train the model
num_boost_round = 100
model = lgb.train(params, train_data, num_boost_round=num_boost_round)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026077 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 88
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 5
[LightGBM] [Info] Start training from score 5.122614


In [6]:
# Step 4: Make predictions
y_pred = model.predict(X_test)

In [7]:
# Step 5: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse}")


Mean Squared Error (MSE): 8.612700561296869
