In [57]:
# Required imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.ensemble import VotingClassifier
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier


# Read the features dataset
features_df = pd.read_csv('images.csv')

# Preprocessing MLP (tabular data) part
X = features_df.drop(columns=['Image_Name', 'Target', 'Path'])
y = features_df['Target']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

# MLP pipeline with scaling
mlp_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Scale the features
    ('mlp', MLPClassifier(
        activation='tanh', 
        alpha=0.0001, 
        hidden_layer_sizes=(100, 50), 
        learning_rate_init=0.01, 
        max_iter=500, 
        solver='adam', 
        random_state=42
    ))
])

# Fit the MLP model
mlp_pipeline.fit(X_train, y_train)

# Evaluate MLP model
mlp_predictions = mlp_pipeline.predict(X_test)
mlp_accuracy = accuracy_score(y_test, mlp_predictions)
print(f'MLP Test Accuracy: {mlp_accuracy}')

# -----------------------------------
# CNN model for images
def preprocess_images(df, target_size=(128, 128)):
    images = []
    labels = []
    
    for index, row in df.iterrows():
        # Load image and resize
        image = load_img(row['Path'], target_size=target_size)
        image = img_to_array(image)  # Convert to numpy array
        image = image / 255.0  # Normalize pixel values to [0, 1]
        
        # Append the image and its corresponding label
        images.append(image)
        labels.append(row['Target'])

    return np.array(images), np.array(labels)

# Load the image dataset and preprocess images
df_images = pd.read_csv('labelled_images.csv')
X_img, y_img = preprocess_images(df_images)

# Split the image data into train and test sets
X_train_img, X_test_img, y_train_img, y_test_img = train_test_split(X_img, y_img, test_size=0.2, random_state=42)

# Encode labels for CNN
label_encoder = LabelEncoder()
y_train_img_encoded = label_encoder.fit_transform(y_train_img)
y_test_img_encoded = label_encoder.transform(y_test_img)

# CNN model function
def create_cnn_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(len(np.unique(y_img)), activation='softmax')  # Output layer based on unique labels
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Wrapping CNN model with KerasClassifier
cnn_model = KerasClassifier(build_fn=create_cnn_model, epochs=5, batch_size=32, verbose=1)

# Train CNN model
cnn_model.fit(X_train_img, y_train_img_encoded)

# Evaluate CNN model
cnn_accuracy = cnn_model.score(X_test_img, y_test_img_encoded)
print(f'CNN Test Accuracy: {cnn_accuracy}')

# -----------------------------------
# Ensemble model (Voting Classifier)
# Create an ensemble of both MLP and CNN models
ensemble_model = VotingClassifier(estimators=[
    ('mlp', mlp_pipeline),
    ('cnn', cnn_model)
], voting='soft')

# Fit ensemble model
ensemble_model.fit(X_train, y_train)

# Evaluate ensemble model
ensemble_predictions = ensemble_model.predict(X_test)
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print(f'Ensemble Test Accuracy: {ensemble_accuracy}')


MLP Test Accuracy: 1.0


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 145ms/step - accuracy: 0.4445 - loss: 0.8596
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 133ms/step - accuracy: 0.5466 - loss: 0.6910
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 131ms/step - accuracy: 0.6091 - loss: 0.6556
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 142ms/step - accuracy: 0.5942 - loss: 0.6464
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 133ms/step - accuracy: 0.6972 - loss: 0.6010
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
CNN Test Accuracy: 0.5360824742268041
Epoch 1/5


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("sequential_5_1/Cast:0", shape=(None, 18), dtype=float32). Expected shape (None, 128, 128, 3), but input has incompatible shape (None, 18)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 18), dtype=int64)
  • training=True
  • mask=None

In [58]:
# Required imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier
from sklearn.ensemble import VotingClassifier

# Read the features dataset
features_df = pd.read_csv('images.csv')

# Preprocessing MLP (tabular data) part
X = features_df.drop(columns=['Image_Name', 'Target', 'Path'])
y = features_df['Target']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

# MLP pipeline with scaling
mlp_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Scale the features
    ('mlp', MLPClassifier(
        activation='tanh', 
        alpha=0.0001, 
        hidden_layer_sizes=(100, 50), 
        learning_rate_init=0.01, 
        max_iter=500, 
        solver='adam', 
        random_state=42
    ))
])

# Fit the MLP model
mlp_pipeline.fit(X_train, y_train)

# Evaluate MLP model
mlp_predictions = mlp_pipeline.predict(X_test)
mlp_accuracy = accuracy_score(y_test, mlp_predictions)
print(f'MLP Test Accuracy: {mlp_accuracy}')

# -----------------------------------
# CNN model for images
def preprocess_images(df, target_size=(128, 128)):
    images = []
    labels = []
    
    for index, row in df.iterrows():
        # Load image and resize
        image = load_img(row['Path'], target_size=target_size)
        image = img_to_array(image)  # Convert to numpy array
        image = image / 255.0  # Normalize pixel values to [0, 1]
        
        # Append the image and its corresponding label
        images.append(image)
        labels.append(row['Target'])

    return np.array(images), np.array(labels)

# Load the image dataset and preprocess images
df_images = pd.read_csv('labelled_images.csv')
X_img, y_img = preprocess_images(df_images)

# Split the image data into train and test sets
X_train_img, X_test_img, y_train_img, y_test_img = train_test_split(X_img, y_img, test_size=0.2, random_state=42)

# Encode labels for CNN
label_encoder = LabelEncoder()
y_train_img_encoded = label_encoder.fit_transform(y_train_img)
y_test_img_encoded = label_encoder.transform(y_test_img)

# CNN model function
def create_cnn_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(len(np.unique(y_img)), activation='softmax')  # Output layer based on unique labels
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# Wrapping CNN model with KerasClassifier
cnn_model = KerasClassifier(build_fn=create_cnn_model, epochs=5, batch_size=32, verbose=1)

# Train CNN model
cnn_model.fit(X_train_img, y_train_img_encoded)

# Evaluate CNN model
cnn_accuracy = cnn_model.score(X_test_img, y_test_img_encoded)
print(f'CNN Test Accuracy: {cnn_accuracy}')

# -----------------------------------
# Ensemble model (Voting Classifier)
# Since VotingClassifier doesn't support Keras models, let's handle them separately

# Create a custom voting ensemble
class CustomVotingClassifier:
    def __init__(self, estimators):
        self.estimators = estimators
    
    def fit(self, X, y):
        for name, model in self.estimators:
            model.fit(X, y)
    
    def predict(self, X):
        # Collect predictions from each model
        predictions = np.zeros((X.shape[0], len(self.estimators)))
        for i, (name, model) in enumerate(self.estimators):
            predictions[:, i] = model.predict(X)
        # Majority vote
        return np.argmax(np.bincount(predictions.astype(int), axis=1), axis=1)

# Create the ensemble model
ensemble_model = CustomVotingClassifier(estimators=[
    ('mlp', mlp_pipeline),
    ('cnn', cnn_model)
])

# Fit ensemble model (using X_train and y_train)
ensemble_model.fit(X_train, y_train)

# Evaluate ensemble model
ensemble_predictions = ensemble_model.predict(X_test)
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print(f'Ensemble Test Accuracy: {ensemble_accuracy}')


MLP Test Accuracy: 1.0
Epoch 1/5


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 137ms/step - accuracy: 0.5072 - loss: 0.7548
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 134ms/step - accuracy: 0.5347 - loss: 0.6901
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 129ms/step - accuracy: 0.5891 - loss: 0.6526
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 128ms/step - accuracy: 0.6292 - loss: 0.6437
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 183ms/step - accuracy: 0.7505 - loss: 0.5835
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
CNN Test Accuracy: 0.6494845360824743
Epoch 1/5


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("sequential_7_1/Cast:0", shape=(None, 18), dtype=float32). Expected shape (None, 128, 128, 3), but input has incompatible shape (None, 18)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 18), dtype=int64)
  • training=True
  • mask=None

In [None]:
# Read the features dataset
features_df = pd.read_csv('images.csv')

# Preprocessing MLP (tabular data) part
X = features_df.drop(columns=['Image_Name', 'Target', 'Path'])
y = features_df['Target']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [None]:
# Load the image dataset and preprocess images
df_images = pd.read_csv('labelled_images.csv')
X_img, y_img = preprocess_images(df_images)

# Split the image data into train and test sets
X_train_img, X_test_img, y_train_img, y_test_img = train_test_split(X_img, y_img, test_size=0.2, random_state=42)

# Encode labels for CNN
label_encoder = LabelEncoder()
y_train_img_encoded = label_encoder.fit_transform(y_train_img)
y_test_img_encoded = label_encoder.transform(y_test_img)

In [59]:
# Import necessary libraries
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

# Assuming X_train, X_test, y_train, y_test are already defined (for tabular data)
# Assuming X_train_img, X_test_img, y_train_img_encoded, y_test_img_encoded are already defined (for image data)

# 1. Train MLP model (for tabular data)
mlp_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Example: scale the data if necessary
    ('mlp', MLPClassifier(hidden_layer_sizes=(128,), max_iter=1000))
])

mlp_pipeline.fit(X_train, y_train)
mlp_predictions_train = mlp_pipeline.predict(X_train)
mlp_predictions_test = mlp_pipeline.predict(X_test)
mlp_proba_train = mlp_pipeline.predict_proba(X_train)
mlp_proba_test = mlp_pipeline.predict_proba(X_test)

print(f'MLP Accuracy: {accuracy_score(y_test, mlp_predictions_test)}')

# 2. Train CNN model (for image data)
cnn_model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')  # Adjust output units to match number of classes
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_img, y_train_img_encoded, epochs=10, batch_size=32)

cnn_predictions_train = cnn_model.predict(X_train_img)
cnn_predictions_test = cnn_model.predict(X_test_img)

cnn_proba_train = cnn_predictions_train  # Assuming CNN outputs probabilities (not just labels)
cnn_proba_test = cnn_predictions_test

print(f'CNN Accuracy: {accuracy_score(y_test_img_encoded, np.argmax(cnn_proba_test, axis=1))}')

# 3. Stack predictions from both models
# Stack predicted probabilities from both models for the meta-model training
X_train_meta = np.hstack([mlp_proba_train, cnn_proba_train])
X_test_meta = np.hstack([mlp_proba_test, cnn_proba_test])

# 4. Train the meta-model (Logistic Regression)
meta_model = LogisticRegression()
meta_model.fit(X_train_meta, y_train)  # Train on stacked predictions

# 5. Make predictions with the meta-model
meta_predictions = meta_model.predict(X_test_meta)

# 6. Evaluate the ensemble model
ensemble_accuracy = accuracy_score(y_test, meta_predictions)
print(f'Ensemble Model Accuracy: {ensemble_accuracy}')


MLP Accuracy: 1.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 84ms/step - accuracy: 0.3909 - loss: 5.9824
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 0.5139 - loss: 1.8150
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 0.6991 - loss: 0.6445
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.8395 - loss: 0.4575
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - accuracy: 0.9094 - loss: 0.3052
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step - accuracy: 0.9245 - loss: 0.2472
Epoch 7/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.9833 - loss: 0.1492
Epoch 8/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - accuracy: 0.9844 - loss: 0.0956
Epoch 9/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━

In [63]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.base import BaseEstimator, ClassifierMixin

class KerasClassifierCustom(BaseEstimator, ClassifierMixin):
    def __init__(self, build_fn=None, epochs=10, batch_size=32, verbose=0):
        self.build_fn = build_fn
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose

    def fit(self, X, y):
        model = self.build_fn()
        model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose)
        self.model_ = model
        return self

    def predict(self, X):
        return self.model_.predict(X)

    def score(self, X, y):
        return self.model_.evaluate(X, y)


from sklearn.model_selection import GridSearchCV

# Function to create the model
def create_model():
    model = Sequential()
    model.add(Dense(64, input_dim=8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Create the custom classifier
keras_classifier = KerasClassifierCustom(build_fn=create_model)

# Parameter grid for GridSearchCV
param_grid = {
    'epochs': [10, 20],
    'batch_size': [32, 64]
}

# Grid search
grid_search = GridSearchCV(estimator=keras_classifier, param_grid=param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters and score
print(f"Best parameters: {grid_search.best_params_}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: 
All the 12 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
8 fits failed with the following error:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/var/folders/sc/9ntffqn97434sv_965czfhhc0000gn/T/ipykernel_38542/2112356618.py", line 14, in fit
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/keras/src/layers/input_spec.py", line 227, in assert_input_compatibility
    raise ValueError(
ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 8, but received input with shape (None, 18)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 18), dtype=int64)
  • training=True
  • mask=None

--------------------------------------------------------------------------------
4 fits failed with the following error:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/var/folders/sc/9ntffqn97434sv_965czfhhc0000gn/T/ipykernel_38542/2112356618.py", line 14, in fit
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/keras/src/layers/input_spec.py", line 227, in assert_input_compatibility
    raise ValueError(
ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_2" is incompatible with the layer: expected axis -1 of input shape to have value 8, but received input with shape (None, 18)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 18), dtype=int64)
  • training=True
  • mask=None
