In [None]:
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

## Placeholder Data : You must replace them with your actual dataset here. This is just a placeholder
X_train_df = pd.DataFrame(np.random.rand(100, 10))
y_train_df = pd.Series(np.random.randint(0, 2, 100))
X_val_df = pd.DataFrame(np.random.rand(50, 10))
y_val_df = pd.Series(np.random.randint(0, 2, 50))
X_test_df = pd.DataFrame(np.random.rand(50, 10))
y_test_df = pd.Series(np.random.randint(0, 2, 50))


##This can be done for any ensemble model. Just replace the model with model of interest
def run_experiment(random_state, X_train, y_train, X_val, y_val, X_test, y_test):
    """
    Trains and evaluates a LightGBM model.
    """
    ## If no GPU is available, change 'gpu' to 'cpu'.
    model = lgb.LGBMClassifier(
        device_type='gpu',
        n_estimators=6,
        max_depth=12,
        learning_rate=0.3,
        random_state=random_state,
        num_leaves=50
    )

    model.fit(X_train, y_train)

    ## Get predictions
    predictions_train = model.predict(X_train)
    predictions_val = model.predict(X_val)
    predictions_test = model.predict(X_test)

    ## Calculate performance metrics
    acc_train = accuracy_score(y_train, predictions_train)
    acc_val = accuracy_score(y_val, predictions_val)
    acc_test = accuracy_score(y_test, predictions_test)

    f1_train = f1_score(y_train, predictions_train, average='weighted')
    f1_val = f1_score(y_val, predictions_val, average='weighted')
    f1_test = f1_score(y_test, predictions_test, average='weighted')

    # The confusion matrix: if needed to evaluate the class wise performance.
    print(f"Confusion Matrix for seed {random_state}:\n{confusion_matrix(y_test, predictions_test)}\n")
    ## We have not returned the cfm here, if needed it can be returned.
    return acc_train, acc_val, acc_test, f1_train, f1_val, f1_test

## Running the experiment three times with different random seeds. You can choose any seed or any number of seeds.
results = []
seeds = [20, 42, 100]
for seed in seeds:
    ## Pass data as arguments to the function
    experiment_results = run_experiment(seed, X_train_df, y_train_df, X_val_df, y_val_df, X_test_df, y_test_df)
    results.append(experiment_results)

results_np = np.array(results)
mean_results = results_np.mean(axis=0)


# Print final aggregated results. Compute the std if required.
print("--- Final Results ---")
print(f"Mean Accuracy (Train | Val | Test): {mean_results[0]:.4f} | {mean_results[1]:.4f} | {mean_results[2]:.4f}")


print(f"Mean F1-Score (Train | Val | Test): {mean_results[3]:.4f} | {mean_results[4]:.4f} | {mean_results[5]:.4f}")
