<a href="https://colab.research.google.com/github/jced226/CS498Project/blob/main/CS460_group.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount your google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd '/content/drive/MyDrive/Final project' #Create a new folder and add the scoliosis data google sheet and this notebook to that folder

In [None]:
# Install neccessary packages

!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install tensorflow
!pip install tensorflow-addons
!pip install gspread

***The Ensemble Classifier***

In [None]:
import os
import pickle
import random
import numpy as np
import tensorflow
import tensorflow.keras.backend as K
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, ReLU, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.random import set_seed
from sklearn.metrics import balanced_accuracy_score
from tensorflow.keras.callbacks import LearningRateScheduler


# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0
# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set the `python` built-in pseudo-random generator at a fixed value
random.seed(seed_value)
# 3. Set the `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
set_seed(seed_value)


class EnsembleNNClassifier():
    """EnsembleNNClassifier represents an Ensemble Model of Neural Networks
    for binary classification.
    """
    def __init__(self,
                 input_shape,
                 layers_units,
                 n_members=5):
        """
        Args:
            input_shape (Tuple): Shape of the input data. (27,)
            layers_units (Tuple): Tuple representing the number of units in each layer. (64, 64,)
            n_members (int, optional): Number of members in the ensemble model. Defaults to 5.
        """
        self.input_shape = input_shape
        self.layers_units = layers_units
        self.num_hidden_layers = len(layers_units)
        self.n_members = n_members
        self.members = []
        self.score = 0
        self._fitted = False
        if len(self.members)> 0:
            self._fitted = True
        self._build_ensemble()

  #KN added cosine_annealing
    def cosine_annealing(self, epoch, max_lr, min_lr, T_max):
        lr = min_lr + (max_lr - min_lr) * (1 + np.cos(np.pi * epoch / T_max)) / 2
        return lr

  #KN added compile_model
    def _compile_model(self, model, learning_rate=0.001, min_lr=0.0001, T_max=100):
        optimizer = Adam(learning_rate=learning_rate)
        accuracy = BinaryAccuracy()
        model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[accuracy])
        # Define the Learning Rate Scheduler
        lr_scheduler = LearningRateScheduler(lambda epoch: cosine_annealing(epoch, learning_rate, min_lr, T_max))


    def _build_ensemble(self):
        """Builds the ensemble model.
        """
        for model_index in range(self.n_members):
            model_number = model_index + 1
            self._build_model(model_number)

    def compile_ensemble(self):
        """Compiles all models in the ensemble.
        """
        for member in self.members:
            self._compile_model(member)

    def fit_evaluate(self,x_train,y_train, x_test, y_test, epochs=100, batch_size=8, verbose=0):
        """Trains all the models in the ensemble for a fixed number of epochs on the training data,
        and evaluates its performance on the test data

        Args:
            x_train: Input data for training.
            y_train: Target data for training.
            x_test: Input data for testing.
            y_test: Target data for testing.
            epochs (int, optional): number of training epochs. Defaults to 100.
            batch_size (int, optional): batch size. Defaults to 8.
            verbose (int, optional): Verbosity mode. Defaults to 0.
        """
        if self._fitted:
            raise Exception('The model is already fitted')
        #fit all models
        self.members = []
        self._build_ensemble()
        self.compile_ensemble()

        self.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)

        #evaluate ensemble
        score = self.evaluate(x_test, y_test)
        if verbose>0:
            print('> %.3f' % score)
        self.score = score

    def fit(self, x_train, y_train, epochs=100, batch_size=8):
        """Trains all the models in the ensemble for a fixed number of epochs (iterations on a dataset).

        Args:
            x_train (Input data): It could be:
                - A Numpy array (or array-like), or a list of arrays
                    (in case the model has multiple inputs).
                - A TensorFlow tensor, or a list of tensors
                    (in case the model has multiple inputs).
                - A dict mapping input names to the corresponding array/tensors,
                    if the model has named inputs.
                - A `tf.data` dataset. Should return a tuple
                    of either `(inputs, targets)` or
                    `(inputs, targets, sample_weights)`.
                - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
                    or `(inputs, targets, sample_weights)`.
                - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
                    callable that takes a single argument of type
                    `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
                    `DatasetCreator` should be used when users prefer to specify the
                    per-replica batching and sharding logic for the `Dataset`.
                    See `tf.keras.utils.experimental.DatasetCreator` doc for more
                    information.
            y_train (Target data.): Like the input data `x_train`,
                it could be either Numpy array(s) or TensorFlow tensor(s).
                It should be consistent with `x` (you cannot have Numpy inputs and
                tensor targets, or inversely). If `x` is a dataset, generator,
                or `keras.utils.Sequence` instance, `y` should
                not be specified (since targets will be obtained from `x`).
            epochs (int, optional): An epoch is an iteration over the entire `x` and `y`
                data provided. The model is not trained for a number of iterations
                given by `epochs`, but merely until the epoch of index `epochs`
                is reached. Defaults to 100.
            batch_size (int, optional): Number of samples per gradient update. Defaults to 8.
        """
        if self._fitted:
            raise Exception('The model is already fitted')

        self.members = []
        self._build_ensemble()
        self.compile_ensemble()
        for model in self.members:
            self._fit_model(model,  x_train, y_train,
                            epochs=epochs,
                            batch_size=batch_size)
        self._fitted = True

    def predict(self, X):
        """Generates output predictions for the input samples.

        Args:
            X: Input samples.

        Returns:
            Numpy array(s) of predictions.
        """
        return np.round(self.predict_members(X))

    #evaluate ensemble model
    def evaluate(self, x_test, y_test):
        """Evaluates the ensemble model's predictions
        using balanced_accuracy_score.

        Args:
            x_test: Input data for testing.
            y_test: Target data for testing.

        Returns:
            float: balanced_accuracy_score
        """
        #make prediction
        y_pred = self.predict(x_test)
        #calculate accuracy
        return balanced_accuracy_score(y_test, y_pred)

    def predict_members(self, X):
        """Generates the class probabilities of the input samples X.

        Args:
            X: Input samples.

        Returns:
            Numpy array(s) of the class probabilities of the input samples.
        """
        if self._fitted:
            y_hats = [model.predict(X) for model in self.members]
            y_hats = np.array(y_hats)
            # mean of predictions
            predictions = np.median(y_hats, axis=0)
            return predictions
        raise Exception("The model should be fitted to make predictions")


    def _build_model(self, model_number=1):
        """Builds a single feed forward neural nework.

        Args:
            model_number (int, optional): The number of the model. Defaults to 1.
        """
        # add the input layer
        inputs = self._add_model_input(model_number)
        # add hiddden layers
        hidden_layers = self._add_hidden_layers(inputs)
        # add the output layer
        outputs = self._add_model_outputs(hidden_layers)
        # add the model to the ensemble
        self.members.append(Model(inputs=inputs, outputs=outputs, name=f"member_{model_number}"))

    def _add_model_input(self, model_number):
        """Instantiates a Keras tensor and sets the input shape of the encoder.

        Args:
            model_number (int): The number of the model.

        Returns:
             A `tensor`.
        """
        return Input(shape=self.input_shape, name=f"model_{model_number}_input")

    def _add_hidden_layers(self, inputs):
        """Creates all the neural blocks in the neural network.

        Args:
            inputs (tensor): The input layer.

        Returns:
            _type_: the graph of layers in the neural netwok.
        """
        x = inputs
        for layer_index in range(self.num_hidden_layers):
            x = self._add_hidden_layer(layer_index, x)
        return x

    def _add_hidden_layer(self, layer_index, x):
        """Adds a neural block to the graph of layers, consisting of a dense
        layer + ReLU + batch normalization.

        Args:
            layer_index (int): index of the layer to create.
            x (_type_): the graph of layers already in the neural netwok.

        Returns:
            _type_: the graph of layers in the neural netwok
            including the newly added layer.
        """
        layer_number = layer_index + 1
        hidden_layer = Dense(self.layers_units[layer_index], name=f"dense_layer_{layer_number}")
        x = hidden_layer(x)
        x = ReLU(name=f"relu_{layer_number}")(x)
        x = BatchNormalization(name=f"batch_normalization_{layer_number}")(x)
        return x

    def _add_model_outputs(self, x):
        """Adds an output layer to the graph of layers in the network.

        Args:
            x (tensor): the graph of layers in the network.

        Returns:
            tensor: The graph of layers in the network plus the output layer.
        """
        logits = Dense(units=1, name=f"model_logits")(x)
        output = Activation('sigmoid', name=f"sigmoid_layer")(logits)
        return output

#KN changed fit_model
    def _fit_model(self, model, x_train, y_train, epochs=100, batch_size=8):
        """Trains the model for a fixed number of epochs (iterations on a dataset).

        Args:
            model: Model to train.
            x_train (Input data): It could be:
                - A Numpy array (or array-like), or a list of arrays
                    (in case the model has multiple inputs).
                - A TensorFlow tensor, or a list of tensors
                    (in case the model has multiple inputs).
                - A dict mapping input names to the corresponding array/tensors,
                    if the model has named inputs.
                - A `tf.data` dataset. Should return a tuple
                    of either `(inputs, targets)` or
                    `(inputs, targets, sample weights)`.
                - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
                    or `(inputs, targets, sample weights)`.
                - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
                    callable that takes a single argument of type
                    `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
                    `DatasetCreator` should be used when users prefer to specify the
                    per-replica batching and sharding logic for the `Dataset`.
                    See `tf.keras.utils.experimental.DatasetCreator` doc for more
                    information.
            y_train (Target data.): Like the input data `x_train`,
                it could be either Numpy array(s) or TensorFlow tensor(s).
                It should be consistent with `x` (you cannot have Numpy inputs and
                tensor targets, or inversely). If `x` is a dataset, generator,
                or `keras.utils.Sequence` instance, `y` should
                not be specified (since targets will be obtained from `x`).
            epochs (int, optional): An epoch is an iteration over the entire `x` and `y`
                data provided.
                The model is not trained for a number of iterations
                given by `epochs`, but merely until the epoch
                of index `epochs` is reached. Defaults to 100.
            batch_size (int, optional): Number of samples per gradient update. Defaults to 8.
        """
        # Define the Learning Rate Scheduler and Early Stopping
        lr_scheduler = LearningRateScheduler(lambda epoch: self.cosine_annealing(epoch, 0.001, 0.0001, 100))
        monitor = EarlyStopping(monitor='binary_accuracy', min_delta=1e-3, patience=5, verbose=1, mode='auto')
        callbacks = [monitor, lr_scheduler]

        model.fit(x_train, y_train,
                  epochs=epochs,
                  batch_size=batch_size,
                  shuffle=True,
                  callbacks=callbacks,
                  verbose=0)
        return model

# Below is the original
#   def _fit_model(self,model, x_train, y_train, epochs=100, batch_size=8):
        """Trains the model for a fixed number of epochs (iterations on a dataset).

        Args:
            model: Model to train.
            x_train (Input data): It could be:
                - A Numpy array (or array-like), or a list of arrays
                    (in case the model has multiple inputs).
                - A TensorFlow tensor, or a list of tensors
                    (in case the model has multiple inputs).
                - A dict mapping input names to the corresponding array/tensors,
                    if the model has named inputs.
                - A `tf.data` dataset. Should return a tuple
                    of either `(inputs, targets)` or
                    `(inputs, targets, sample_weights)`.
                - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
                    or `(inputs, targets, sample_weights)`.
                - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
                    callable that takes a single argument of type
                    `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
                    `DatasetCreator` should be used when users prefer to specify the
                    per-replica batching and sharding logic for the `Dataset`.
                    See `tf.keras.utils.experimental.DatasetCreator` doc for more
                    information.
            y_train (Target data.): Like the input data `x_train`,
                it could be either Numpy array(s) or TensorFlow tensor(s).
                It should be consistent with `x` (you cannot have Numpy inputs and
                tensor targets, or inversely). If `x` is a dataset, generator,
                or `keras.utils.Sequence` instance, `y` should
                not be specified (since targets will be obtained from `x`).
            epochs (int, optional): An epoch is an iteration over the entire `x` and `y`
                data provided.
                The model is not trained for a number of iterations
                given by `epochs`, but merely until the epoch
                of index `epochs` is reached. Defaults to 100.
            batch_size (int, optional): Number of samples per gradient update. Defaults to 8.

        Returns:
            A `History` object. Its `History.history` attribute is
            a record of training loss values and metrics values
            at successive epochs.
        """
#        monitor = EarlyStopping(monitor='binary_accuracy', min_delta=1e-3,
#                        patience=5, verbose=1, mode='auto')
        #fit model
#        model.fit(x_train, y_train,
#                  epochs=epochs,
#                  batch_size=batch_size,
#                  shuffle=True,
#                  callbacks=[monitor],
#                  verbose=0)
#        return model



    def _create_folder_if_it_doesnt_exist(self, folder_path):
        """Creates a folder if it does not exist in the given path.

        Args:
            folder_path (str): Path of the folder.
        """
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

    def _save_parameters(self, folder_path):
        """Saves the parameters of the model.

        Args:
            folder_path (str): Path of the folder where the parameters will be saved.
        """
        parameters = [
            self.input_shape,
            self.layers_units,
            self.n_members
        ]
        save_path = os.path.join(folder_path, "parameters.pkl")
        with open(save_path, "wb") as f:
            pickle.dump(parameters, f)

    def _save_weights(self, folder_path):
        """Saves the weights of the model.

        Args:
            folder_path (str):  Path of the folder where the weights will be saved.
        """
        for idx, model in zip(range(self.n_members), self.members):
            file_path = os.path.join(folder_path, f"weights_{idx}.h5")
            model.save_weights(file_path)

    def save(self, folder_path="."):
        """Creates a folder if it does not exist in the given path.
        And, saves the parameters and weights of the model in `folder_path`.

        Args:
            folder_path (str, optional): Path of the folder.
            Defaults to the path of the current directory.
        """
        self._create_folder_if_it_doesnt_exist(folder_path)
        self._save_parameters(folder_path)
        self._save_weights(folder_path)

    def load_weights(self, weights_path):
        """Loads the weights of the model saved in the given path.

        Args:
            weights_path (str): Path of the file where the weights are saved.
        """
        for model in self.members:
            model.load_weights(weights_path)

    @classmethod
    def load(cls, folder_path="."):
        """Loads the model from the given folder `folder_path`.

        Args:
            folder_path (str, optional): Path of the folder.
            Defaults to the path of the current directory.

        Returns:
            EnsembleNNClassifier: The model saved in the given folder.
        """
        parameters_path = os.path.join(folder_path, "parameters.pkl")
        with open(parameters_path, "rb") as f:
            parameters = pickle.load(f)
        ensemble_nn = cls(*parameters)

        for idx, model in zip(range(ensemble_nn.n_members), ensemble_nn.members):
            weights_path = os.path.join(folder_path, f"weights_{idx}.h5")
            model.load_weights(weights_path)
        ensemble_nn._fitted = True
        return ensemble_nn


**Install the gspread package so the code can read from a gsheet file**

**Train Ensemble**

This trains the model and uses the EnsembleNNClassifier

In [None]:
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn import metrics
from numpy import max, random, array, round
from google.colab import auth
import gspread
from google.auth import default, iam

auth.authenticate_user()

def load_scoliosis_data():
    creds, project = default()
    client = gspread.authorize(creds.with_scopes(['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']))
    spreadsheet = client.open('scoliosis_data')
    worksheet = spreadsheet.worksheet('health-scoliotic patients')
    data = worksheet.get_all_values()
    dataset = DataFrame(data[1:], columns=data[0])


    X = dataset.drop(['Patients','y'], axis=1)
    y = dataset['y']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=50)

    return X_train, y_train, X_test, y_test

if __name__ == "__main__":
    X_train, y_train, X_test, y_test = load_scoliosis_data()

    # Cast X_train and X_test to floats
    X_train = X_train.astype(float)
    X_test = X_test.astype(float)

    # Cast y_train and y_test to floats
    y_train = y_train.astype(float)
    y_test = y_test.astype(float)

    clf = EnsembleNNClassifier(input_shape=X_train.iloc[0].shape, layers_units=(64,64,), n_members=4)

    print('Start training...')
    clf.fit(X_train, y_train,
                     epochs= 100,
                     batch_size= 8)
    print('End training')
    acc = clf.evaluate(X_test, y_test)
    print(f'Balanced accuracy of the model: {round(acc,3)*100}')
    print('Saving the model')
    clf.save('model')
    print('Loading the model')
    loaded_clf = EnsembleNNClassifier.load('model')
    print('Evaluating the loaded model:', loaded_clf.evaluate(X_test, y_test))

    ensemble_predictions = [x[0] for x in loaded_clf.predict(X_test.iloc[:10])]

    print('Making predictions using the loaded model',ensemble_predictions )


**This bit of code is visualizng the predictions of the first 10 patients and showing the probability if the patient has scoliosis. 1 meaning they do have scoliosis, and 0 meaning they do not have scoliosis.**

In [None]:
import matplotlib.pyplot as plt

# Load the model
loaded_clf = EnsembleNNClassifier.load('model')

# Load X_test
X_test, _ = load_scoliosis_data()[-2:]

# Cast X_test to float
X_test = X_test.astype(float)


# Make predictions for the first 10 rows in the test set
predictions = loaded_clf.predict(X_test.iloc[:10])

# Extract predicted probabilities of having scoliosis
probabilities = predictions[:, 0]

# Create a bar graph to visualize the predictions
plt.figure(figsize=(10, 6))
plt.bar(range(10), probabilities, color='blue')
plt.xlabel('Patients')
plt.ylabel('Probabilities Patient has Scoliosis')
plt.title('Predicted Probabilities for the First 10 Patients')
plt.xticks(range(10), [f'Patient {i}' for i in range(1, 11)])
plt.show()


**Main**

Contains the load data

In [None]:
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn import metrics
from google.colab import auth
import gspread
from google.auth import default, iam

auth.authenticate_user()

def load_scoliosis_data():
    creds, project = default()
    client = gspread.authorize(creds.with_scopes(['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']))
    spreadsheet = client.open('scoliosis_data')
    worksheet = spreadsheet.worksheet('health-scoliotic patients')
    data = worksheet.get_all_values()
    dataset = DataFrame(data[1:], columns=data[0])

    X = dataset.drop(['Patients','y'], axis=1)
    y = dataset['y']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=50)
    return X_train, y_train, X_test, y_test

if __name__ == "__main__":
    X_train, y_train, X_test, y_test = load_scoliosis_data()

    # Cast X_train and X_test to floats
    X_train = X_train.astype(float)
    X_test = X_test.astype(float)

    # Cast y_train and y_test to floats
    y_train = y_train.astype(float)
    y_test = y_test.astype(float)

    print("Shape of X_train:", X_train.shape)
    print("Shape of y_train:", y_train.shape)
    print("Shape of X_test:", X_test.shape)
    print("Shape of y_test:", y_test.shape)


    print('Loading the model')
    # Assuming you have already loaded the EnsembleNNClassifier
    loaded_clf = EnsembleNNClassifier.load('model')
    status_dict = {1: 'has Scoliosis', 0: 'is Healthy'}

    # feature_names = 'x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12','x13','x14','x15','x16','x17','x18','x19','x20','x21','x22','x23','x24','x25','x26','x27'

    # Making predictions for the first 10 rows in the test set
    ensemble_predictions = [f'Subject {i} ' + status_dict[int(x[0])] for i, x in enumerate(loaded_clf.predict(X_test.iloc[:10]))]
    print()
    print('-------------------------------------------------------------------------' )
    print()
    print('Making predictions for the first 10 rows in the test set' )
    print(ensemble_predictions)
    print()
    print('-------------------------------------------------------------------------' )
    print()
    # Making predictions for two candidate subjects
    candidate_subject = [30.525,-12.75,-3,1.66,12.325,0.395,-0.000718327,-0.3780369,-0.672629766,-0.810873309,-0.909343173,67.085,26.475,41.875,32.18,17.385,3.95,-6.335,0.72,-6.335,7.055,-3.345,2.95,6.23,6.23,-3.33,9.56]
    candidate_subject_1 = [8.37,-12,6,2.12,12.93,3.35,0.007217427,-0.329721479,-0.562601289,-0.787001093,-0.932781819,63.14,37.33,45.21,26.18,10.71,2.17,5.28,5.28,-1.32,6.61,0.91,1.58,-3.13,1.94,-3.13,5.07]
    prediction = loaded_clf.predict([candidate_subject])[0]
    status = status_dict[int(prediction)]
    print(f'The candidate subject {status}')
    print()
    print('-------------------------------------------------------------------------' )
    print()
    prediction = loaded_clf.predict([candidate_subject_1])[0]
    status = status_dict[int(prediction)]
    print(f'The candidate subject {status}')
    print()
    print('-------------------------------------------------------------------------' )
    print()

    # Randomly select a candidate subject
    random_index = random.randint(0, len(X_test) - 1)
    random_patient_number = X_test.index[random_index]  # Extracting the patient number
    random_candidate = X_test.iloc[random_index]
    random_candidate_label = y_test.iloc[random_index]

    # Reshape the random candidate to match the input shape of the model
    random_candidate = array(random_candidate).reshape(1, -1)

    # Make predictions for the random candidate
    prediction = loaded_clf.predict(random_candidate)[0]
    status = status_dict[int(prediction)]
    print(f'The randomly selected candidate subject {status}.')
    print(f'Patient number: {random_patient_number}')  # Printing the patient number



**Creates .png files showing the architecture of each one of the models**

In [None]:
from tensorflow.keras.utils import plot_model

# Assuming you have loaded the ensemble model
loaded_clf = EnsembleNNClassifier.load('model')

# Visualize each individual model in the ensemble
for i, model in enumerate(loaded_clf.members):
    plot_model(model, to_file=f'model_{i}_architecture.png', show_shapes=True)


**Bagging Method**

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# Define base classifier
base_classifier = DecisionTreeClassifier()

# Create bagging classifier
bagging_clf = BaggingClassifier(base_classifier, n_estimators=10, random_state=42)

# Fit the BaggingClassifier on training data
bagging_clf.fit(X_train, y_train)

# Evaluate the BaggingClassifier
accuracy = bagging_clf.score(X_test, y_test)
print("Bagging Classifier Accuracy:", (accuracy*100).round(3))


**Boosting Method**

In [None]:
from sklearn.ensemble import AdaBoostClassifier

# Create AdaBoost classifier
adaboost_clf = AdaBoostClassifier(n_estimators=50, random_state=42)

# Fit the AdaBoostClassifier on training data
adaboost_clf.fit(X_train, y_train)

# Evaluate the AdaBoostClassifier
accuracy = adaboost_clf.score(X_test, y_test)
print("AdaBoost Classifier Accuracy:", (accuracy*100).round(3))


**Stacking Method**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict

# Define base classifiers
base_classifiers = [
    RandomForestClassifier(n_estimators=100, random_state=42),
    GradientBoostingClassifier(n_estimators=100, random_state=42)
]

# Train base classifiers using cross-validation
base_predictions = []
for clf in base_classifiers:
    predictions = cross_val_predict(clf, X_train, y_train, cv=5, method='predict_proba')
    base_predictions.append(predictions)

# Concatenate base predictions
X_meta_train = np.concatenate(base_predictions, axis=1)

# Train meta-model
meta_model = LogisticRegression()
meta_model.fit(X_meta_train, y_train)

# Generate meta features for test set
base_test_predictions = []
for clf in base_classifiers:
    clf.fit(X_train, y_train)
    predictions = clf.predict_proba(X_test)
    base_test_predictions.append(predictions)

X_meta_test = np.concatenate(base_test_predictions, axis=1)

# Make predictions using meta-model
ensemble_predictions = meta_model.predict(X_meta_test)
accuracy = metrics.accuracy_score(y_test, ensemble_predictions)
print("Stacking Ensemble Accuracy:", (accuracy*100).round(3))



**MLP Classifier**

In [None]:
import os
import pickle
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import balanced_accuracy_score

class MLPModelClassifier:
    def __init__(self, input_shape, hidden_layer_sizes=(64, 64), n_estimators=5):
        self.input_shape = input_shape
        self.hidden_layer_sizes = hidden_layer_sizes
        self.n_estimators = n_estimators
        self.classifiers = []

    def fit(self, X_train, y_train):
        for _ in range(self.n_estimators):
            classifier = MLPClassifier(hidden_layer_sizes=self.hidden_layer_sizes)
            classifier.fit(X_train, y_train)
            self.classifiers.append(classifier)

    def predict(self, X):
        predictions = []
        for classifier in self.classifiers:
            predictions.append(classifier.predict(X))
        return np.median(predictions, axis=0)

    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        return balanced_accuracy_score(y_test, y_pred)

    def save(self, folder_path="."):
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        parameters = {
            "input_shape": self.input_shape,
            "hidden_layer_sizes": self.hidden_layer_sizes,
            "n_estimators": self.n_estimators
        }
        with open(os.path.join(folder_path, "parameters.pkl"), "wb") as f:
            pickle.dump(parameters, f)
        for idx, classifier in enumerate(self.classifiers):
            with open(os.path.join(folder_path, f"classifier_{idx}.pkl"), "wb") as f:
                pickle.dump(classifier, f)

    @classmethod
    def load(cls, folder_path="."):
        with open(os.path.join(folder_path, "parameters.pkl"), "rb") as f:
            parameters = pickle.load(f)
        ensemble = cls(parameters["input_shape"], parameters["hidden_layer_sizes"], parameters["n_estimators"])
        for idx in range(ensemble.n_estimators):
            with open(os.path.join(folder_path, f"classifier_{idx}.pkl"), "rb") as f:
                classifier = pickle.load(f)
                ensemble.classifiers.append(classifier)
        return ensemble


**Main that Runs the MLP Classifier**

In [None]:
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn import metrics
from google.colab import auth
import gspread
from google.auth import default, iam
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt

auth.authenticate_user()

def load_scoliosis_data():
    creds, project = default()
    client = gspread.authorize(creds.with_scopes(['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']))
    spreadsheet = client.open('scoliosis_data')
    worksheet = spreadsheet.worksheet('health-scoliotic patients')
    data = worksheet.get_all_values()
    dataset = DataFrame(data[1:], columns=data[0])

    X = dataset.drop(['Patients','y'], axis=1)
    y = dataset['y'].astype(int) #KN added .astype(int). This is to ensure y is of integer type.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=50)
    return X_train, y_train, X_test, y_test

if __name__ == "__main__":
    X_train, y_train, X_test, y_test = load_scoliosis_data()

    print("Shape of X_train:", X_train.shape)
    print("Shape of y_train:", y_train.shape)
    print("Shape of X_test:", X_test.shape)
    print("Shape of y_test:", y_test.shape)

    # Initialize MLPClassifier
    clf = MLPModelClassifier(input_shape=(27,), hidden_layer_sizes=(64, 64), n_estimators=5)

   # clf.predict() - old code.

#KN added below codes instead of clf.predict()
    # Fit the model with training data
    clf.fit(X_train, y_train)

    # Predict using the trained model on the test data
    predictions = clf.predict(X_test)
    print("Predictions:", predictions)

    # Evaluate the model
    accuracy = clf.evaluate(X_test, y_test)
    print("Balanced accuracy of the model:", accuracy*100)

