In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.combine import SMOTEENN
from imblearn.over_sampling import ADASYN
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, LeakyReLU, Dropout
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load Dataset
data = pd.read_csv(r"C:\Users\abdul\Desktop\research work\motherDataset.csv")

# Encode target variable
label_encoder = LabelEncoder()
data['RiskLevel'] = label_encoder.fit_transform(data['RiskLevel'])

# Split features and target
X = data.drop('RiskLevel', axis=1)
y = data['RiskLevel']

# Handle Imbalanced Dataset
smoteenn = SMOTEENN(random_state=42)
X_resampled, y_resampled = smoteenn.fit_resample(X, y)

adasyn = ADASYN(random_state=42)
X_resampled, y_resampled = adasyn.fit_resample(X_resampled, y_resampled)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.25, stratify=y_resampled, random_state=42)

# Scale Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Hyperparameter Tuning with RandomizedSearch for XGBoost
xgb_param_grid = {
    'n_estimators': [100, 200, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [6, 8, 10],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}
xgb = XGBClassifier(random_state=42)
random_search_xgb = RandomizedSearchCV(xgb, xgb_param_grid, n_iter=50, cv=3, scoring='accuracy', n_jobs=-1, verbose=1, random_state=42)
random_search_xgb.fit(X_train_scaled, y_train)
xgb_best = random_search_xgb.best_estimator_

# Hyperparameter Tuning for LightGBM
lgbm_param_grid = {
    'n_estimators': [100, 200, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [6, 8, 10],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}
lgbm = LGBMClassifier(random_state=42)
random_search_lgbm = RandomizedSearchCV(lgbm, lgbm_param_grid, n_iter=50, cv=3, scoring='accuracy', n_jobs=-1, verbose=1, random_state=42)
random_search_lgbm.fit(X_train_scaled, y_train)
lgbm_best = random_search_lgbm.best_estimator_

# Add CatBoost Classifier
catboost = CatBoostClassifier(verbose=0, random_state=42, learning_rate=0.005, iterations=300)

# Enhanced Neural Network with RMSProp Optimizer
def build_nn_with_rmsprop(input_dim):
    model = Sequential([
        Dense(256, input_dim=input_dim),
        BatchNormalization(),
        LeakyReLU(alpha=0.1),
        Dropout(0.4),
        Dense(128),
        BatchNormalization(),
        LeakyReLU(alpha=0.1),
        Dropout(0.3),
        Dense(64),
        BatchNormalization(),
        LeakyReLU(alpha=0.1),
        Dropout(0.2),
        Dense(3, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.5),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)

# Measure training time for Neural Network
nn_model = build_nn_with_rmsprop(X_train_scaled.shape[1])
start_time = time.time()
nn_model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test),
             epochs=100, batch_size=64, verbose=1, callbacks=[early_stopping, reduce_lr])
nn_train_time = time.time() - start_time

# Measure training time for Stacking Classifier
estimators = [('xgb', xgb_best), ('lgbm', lgbm_best), ('catboost', catboost)]
stacking_model = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), cv=5)
start_time = time.time()
stacking_model.fit(X_train_scaled, y_train)
stacking_train_time = time.time() - start_time

# Measure testing time for Neural Network predictions
start_time = time.time()
nn_probs = nn_model.predict(X_test_scaled)
nn_test_time = time.time() - start_time

# Measure testing time for Stacking Classifier predictions
start_time = time.time()
stacking_probs = stacking_model.predict_proba(X_test_scaled)
stacking_test_time = time.time() - start_time



# Combine predictions for hybrid model
start_time = time.time()
final_probs = (0.4 * stacking_probs + 0.6 * nn_probs)
final_preds = np.argmax(final_probs, axis=1)
hybrid_test_time = time.time() - start_time

# Evaluate Hybrid Model
hybrid_accuracy = accuracy_score(y_test, final_preds)
hybrid_macro_precision = precision_score(y_test, final_preds, average='macro')
hybrid_macro_recall = recall_score(y_test, final_preds, average='macro')
hybrid_macro_f1 = f1_score(y_test, final_preds, average='macro')

# Print Hybrid Model Metrics
print(f"\nHybrid Model - Macro Precision: {hybrid_macro_precision:.4f}")
print(f"Hybrid Model - Macro Recall: {hybrid_macro_recall:.4f}")
print(f"Hybrid Model - Macro F1 Score: {hybrid_macro_f1:.4f}")
print(f"Hybrid Model - Accuracy: {hybrid_accuracy:.4f}")