# 1- Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import log_loss



In [2]:
# Set SEED
SEED = 42


# 2- Load data

In [3]:
train_data = pd.read_csv("/kaggle/input/playground-series-s3e26/train.csv", index_col="id")
test_data = pd.read_csv("/kaggle/input/playground-series-s3e26/test.csv", index_col="id")


# 3- Feature Engineering

# Define target

In [4]:
target = "Status"


# Separate features and target

In [5]:
X = train_data.drop(columns=[target])
y = train_data[target]

# Encode target variable

In [6]:
le = LabelEncoder().fit(y)
y_encoded = le.transform(y)

# One-hot encode categorical features

In [7]:
X = pd.get_dummies(X)

# 4- Model Building

# Initialize models

In [8]:
xgb_clf = XGBClassifier(random_state=SEED, n_jobs=-1)
lgb_clf = LGBMClassifier(random_state=SEED, n_jobs=-1)


In [9]:
rf_clf = RandomForestClassifier(random_state=SEED, n_jobs=-1)
svm_clf = SVC(probability=True, random_state=SEED)
nn_clf = MLPClassifier(random_state=SEED)

# Advanced ensemble techniques

In [10]:
stacking_clf = StackingClassifier(
    estimators=[('xgb', xgb_clf), ('lgb', lgb_clf)],
    final_estimator=GradientBoostingClassifier(random_state=SEED)
)


# Ensemble with voting

In [11]:
voting_clf = VotingClassifier(
    estimators=[("xgb", xgb_clf), ("lgb", lgb_clf), ("rf", rf_clf), ("svm", svm_clf), ("nn", nn_clf)],
    voting="soft"
)


# Standardize data for models that benefit from it

In [12]:
scaler = StandardScaler()

# Advanced ensemble with stacking, voting, and feature scaling

In [13]:
advanced_eclf = StackingClassifier(
    estimators=[('xgb', xgb_clf), ('lgb', lgb_clf), ('rf', rf_clf), ('svm', svm_clf), ('nn', nn_clf)],
    final_estimator=GradientBoostingClassifier(random_state=SEED)
)


# 5- Model Evaluation

In [14]:
models = [xgb_clf, lgb_clf, stacking_clf, voting_clf, advanced_eclf]
for model in models:
    print(f"\nModel name: {type(model).__name__}.")
    try:
        scores = cross_val_score(model, X, y_encoded, cv=5, scoring='neg_log_loss')
        mean_score = np.mean(scores)
        std_score = np.std(scores)
        print(f"Mean log_loss: {mean_score:.4f} +/- {std_score:.4f}")
    except Exception as e:
        print(f"Error: {e}")


Model name: XGBClassifier.
Mean log_loss: -0.5154 +/- 0.0395

Model name: LGBMClassifier.
Mean log_loss: -0.4704 +/- 0.0330

Model name: StackingClassifier.
Mean log_loss: -0.4677 +/- 0.0252

Model name: VotingClassifier.
Mean log_loss: -0.5023 +/- 0.0255

Model name: StackingClassifier.
Mean log_loss: -0.4645 +/- 0.0212


# Fit the final advanced ensemble model

In [15]:
advanced_eclf.fit(X, y_encoded)


# Make predictions on the test set

In [16]:

X_test = pd.get_dummies(test_data)
test_predictions = advanced_eclf.predict_proba(X_test)


# 6- Submission File

In [17]:
# Prepare submission
submission_df = pd.DataFrame({
    'id': test_data.index,
    'Status_C': test_predictions[:, 0],
    'Status_CL': test_predictions[:, 1],
    'Status_D': test_predictions[:, 2]
})

# Save submission file
submission_df.to_csv('advanced_submission.csv', index=False)
print("Your advanced submission was successfully saved!")

Your advanced submission was successfully saved!
