In [None]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, cohen_kappa_score, confusion_matrix, ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt
from xgboost import XGBClassifier

# ---------- Evaluation ----------
def evaluate(y_true, y_pred, title=""):
    acc = accuracy_score(y_true, y_pred)
    pre = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)

    print(f"\n--- {title} ---")
    print("Accuracy :", round(acc, 4))
    print("Precision:", round(pre, 4))
    print("Recall   :", round(rec, 4))
    print("F1 Score :", round(f1, 4))
    print("Kappa    :", round(kappa, 4))

    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap="Blues")
    plt.show()

In [None]:
# ---------- Load data ----------
df = pd.read_excel(r".....xlsx")
target = df[".."]
data = df.iloc[:, 1:]  # skip first col if it's ID

In [None]:
# ---------- Train/test split ----------
xtrain, xtest, ytrain, ytest = train_test_split(
    data, target, test_size=0.2, random_state=28, stratify=target)

# ---------- Standardize ----------
scaler = StandardScaler()
xtrains = scaler.fit_transform(xtrain)
xtests = scaler.transform(xtest)
joblib.dump(scaler, r"....pkl")

In [None]:
# ---------- Train model (XGBoost as example) ----------
model = XGBClassifier(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    random_state=42,
    n_jobs=-1,
    use_label_encoder=False,
    eval_metric="logloss"
)

model.fit(xtrains, ytrain)

joblib.dump(model, r"D:\Desktop\global\model\co2\CO2.pkl")

In [None]:
# ---------- Train set performance ----------
ypred_train = model.predict(xtrains)
evaluate(ytrain, ypred_train, title="Train Set")

# ---------- Test set performance ----------
ypred_test = model.predict(xtests)
evaluate(ytest, ypred_test, title="Test Set")
