In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, GradientBoostingClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from scipy.stats import randint, uniform


In [7]:
# Load dataset
df = pd.read_csv('/kaggle/input/finaldata/Final.csv')

# Encoding categorical labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# Feature-target split
X = df.drop(columns=['label'])
y = df['label']

In [8]:
# Handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [9]:

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Hyperparameter tuning for Random Forest
rf_params = {
    'n_estimators': [400, 500, 600],  
    'max_depth': [30, 40, 50],  
    'min_samples_split': [2, 5, 10], 
    'min_samples_leaf': [1, 2, 5]
}

In [11]:
rf = RandomForestClassifier(random_state=42)
rf_random = RandomizedSearchCV(rf, rf_params, n_iter=7, cv=3, n_jobs=-1, scoring='accuracy', random_state=42)
rf_random.fit(X_train_scaled, y_train)
best_rf = rf_random.best_estimator_




In [13]:
# Hyperparameter tuning for LightGBM
lgbm_params = {
    'n_estimators': [400, 500, 600],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [10, 15, 20],
    'num_leaves': [31, 40, 50]
}

In [14]:
lgbm = LGBMClassifier()
lgbm_random = RandomizedSearchCV(lgbm, lgbm_params, n_iter=7, cv=3, n_jobs=-1, scoring='accuracy', random_state=42)
lgbm_random.fit(X_train_scaled, y_train)
best_lgbm = lgbm_random.best_estimator_


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 78960, number of used features: 15
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585


In [15]:
# Hyperparameter tuning for XGBoost
xgb_params = {
    'n_estimators': [400, 500, 600],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [10, 15, 20],
    'subsample': [0.7, 0.8, 0.9]
}

xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb_random = RandomizedSearchCV(xgb, xgb_params, n_iter=7, cv=3, n_jobs=-1, scoring='accuracy', random_state=42)
xgb_random.fit(X_train_scaled, y_train)
best_xgb = xgb_random.best_estimator_




In [None]:
# Building MLP Neural Network Model
mlp_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(np.unique(y_train)), activation='softmax')
])

mlp_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
mlp_model.fit(X_train_scaled, y_train, epochs=30, batch_size=32, validation_data=(X_test_scaled, y_test), verbose=1)


In [None]:
# Predict with MLP model
mlp_pred = np.argmax(mlp_model.predict(X_test_scaled), axis=1)

# Stacking Classifier with stronger meta-classifier
stacking_clf = StackingClassifier(
    estimators=[('rf', best_rf), ('lgbm', best_lgbm), ('xgb', best_xgb)],
    final_estimator=GradientBoostingClassifier(n_estimators=300, learning_rate=0.05, max_depth=5, random_state=42)
)

stacking_clf.fit(X_train_scaled, y_train)
stacking_pred = stacking_clf.predict(X_test_scaled)

# Print accuracy
print("Stacking Classifier Accuracy:", accuracy_score(y_test, stacking_pred))
print("MLP Model Accuracy:", accuracy_score(y_test, mlp_pred))

# Print classification report
print("Stacking Classifier Report:\n", classification_report(y_test, stacking_pred))
print("MLP Model Report:\n", classification_report(y_test, mlp_pred))