In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, Embedding
from scikeras.wrappers import KerasClassifier
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import LSTM

from sklearn.metrics import accuracy_score

In [8]:
# Example dataset loading (replace with actual data loading code)
# Assuming 'features' is your feature matrix and 'target' is your target variable
X = np.random.rand(1000, 100)  # Example feature matrix (1000 samples, 100 features)
y = np.random.randint(0, 10, 1000)  # Example target variable (10 classes)

# Encode target labels if they are categorical
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Feature scaling (optional, depends on the model)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the number of folds for cross-validation
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Results dictionary to store model performance
results = {
    'RandomForest': [],
    'SVM': [],
    'MLP': [],
    'CNN': [],
    'LSTM': [],
    'CNN_LSTM': []
}

In [9]:
def build_mlp(input_shape):
    model = Sequential()
    model.add(Dense(128, input_shape=(input_shape,), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))  # Assuming 10 classes for multi-class classification
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
def build_cnn(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(input_shape, 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [6]:
def build_lstm(input_shape):
    model = Sequential()
    model.add(Embedding(input_dim=5000, output_dim=128, input_length=input_shape))
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(LSTM(64))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
def build_cnn_lstm(input_shape):
    model = Sequential()
    model.add(Embedding(input_dim=5000, output_dim=128, input_length=input_shape))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(LSTM(64))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Model Instances
models = {
    'RandomForest': RandomForestClassifier(n_estimators=100),
    'SVM': SVC(kernel='rbf', probability=True),
    'MLP': KerasClassifier(build_fn=build_mlp, input_shape=X_scaled.shape[1], epochs=10, batch_size=32, verbose=0),
    'CNN': KerasClassifier(build_fn=build_cnn, input_shape=X_scaled.shape[1], epochs=10, batch_size=32, verbose=0),
    'LSTM': KerasClassifier(build_fn=build_lstm, input_shape=X_scaled.shape[1], epochs=10, batch_size=32, verbose=0),
    'CNN_LSTM': KerasClassifier(build_fn=build_cnn_lstm, input_shape=X_scaled.shape[1], epochs=10, batch_size=32, verbose=0)
}

In [None]:
# Cross-Validation and Training
for train_index, val_index in kf.split(X_scaled):
    X_train, X_val = X_scaled[train_index], X_scaled[val_index]
    y_train, y_val = y_encoded[train_index], y_encoded[val_index]
    
    # Train and evaluate each model
    for model_name, model in models.items():
        if model_name in ['MLP', 'CNN', 'LSTM', 'CNN_LSTM']:
            model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=[EarlyStopping(monitor='val_loss', patience=3)], verbose=0)
            y_pred = model.predict(X_val)
            y_pred = np.argmax(y_pred, axis=1)  # Convert probabilities to class labels
        else:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_val)
        
        accuracy = accuracy_score(y_val, y_pred)
        results[model_name].append(accuracy)

In [None]:
# Calculate the mean accuracy for each model
for model_name, accuracies in results.items():
    mean_accuracy = np.mean(accuracies)
    print(f'{model_name} Mean Accuracy: {mean_accuracy:.4f} (+/- {np.std(accuracies):.4f})')