In [936]:
import os
import pandas as pd

In [937]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, balanced_accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
## PCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import SMOTE

## svm
from sklearn.svm import SVC
import pandas as pd


In [938]:
ML_CLASSIFIER = ['MLP'] # "ELM"

def classify(X_train, y_train, X_test, y_test, classifier, search_type='grid'):
    """
    Classify the data using a Random Forest Classifier
    :param X_train: Training data
    :param y_train: Training labels
    :param X_test: Testing data
    :param y_test: Testing labels
    :param search_type: Type of search to perform
    :return: Accuracy of the classifier
    """
    # Create a Random Forest Classifier
    clf = RandomForestClassifier()

   
    # clf = MLPClassifier(activation='logistic', hidden_layer_sizes=(100,), max_iter=500, solver='sgd') ### getting error using this

    # Define the parameters to search
    # param_dict = {
    #     'hidden_layer_sizes': [(50,50,50),(100,100, 50), (150,100,50),],
    #     'activation': ['identity', 'logistic', 'tanh', 'relu'],
    #     'solver': ['lbfgs', 'sgd', 'adam'],
    #     'max_iter': [500, 1000],
        
    # }
    
    param_dict = {
    'n_estimators': [200],
    'criterion': ['gini'],
    'max_depth': [10],
    'min_samples_split': [5],
    # 'min_samples_leaf': [1],
    'min_weight_fraction_leaf': [0.4],
    'max_features': ['sqrt'],
    # 'max_leaf_nodes': [None],
    'min_impurity_decrease': [0.4],
    'bootstrap': [True],
    'oob_score': [False],
    # 'n_jobs': [-1],
    'random_state': [42],
    # 'verbose': [1],
    # 'warm_start': [True],
    # 'class_weight': [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}],
    'ccp_alpha': [0.45],
    # 'max_samples': [0.99],
    # 'monotonic_cst': [0],
} ### Zahid
    
    

    # Perform the search
    if search_type == 'grid':
        search = GridSearchCV(clf, param_grid=param_dict, cv=3, n_jobs=-1, scoring='balanced_accuracy')
    else:
        search = RandomizedSearchCV(clf, param_distributions=param_dict, cv=3, n_iter=50)
   
    search.fit(X_train, y_train)

    # Get the best parameters
    best_params = search.best_params_
    # print("Best parameters for MLP: ", best_params)

    ## best parameters for MLP: {'activation': 'logistic', 'hidden_layer_sizes': (100,), 'max_iter': 500, 'solver': 'sgd'}
    # Fit the model
    clf.fit(X_train, y_train)

    # Predict the test data
    y_pred = clf.predict(X_test)

    # Calculate the accuracy
    accuracy = balanced_accuracy_score(y_test, y_pred)
    
    return accuracy


In [939]:
data = pd.read_csv('BT-large-4c-dataset_results.csv')
data.head()

Unnamed: 0,Model,XGBoost,MLP,GaussianNB,Adaboost,KNN,RFClassifier,SVM_linear,SVM_sigmoid,SVM_RBF
0,resnet50,0.757914,0.76661,0.474908,0.37547,0.529114,0.378158,0.785267,0.240452,0.751452
1,resnet101,0.734727,0.742036,0.337184,0.415615,0.623156,0.353422,0.736031,0.274348,0.701952
2,densenet121,0.764345,0.726575,0.438129,0.541186,0.691637,0.375494,0.731575,0.646214,0.701006
3,densenet169,0.750967,0.722318,0.429917,0.469985,0.643436,0.385585,0.716531,0.65303,0.693462
4,vgg16,0.770259,0.747759,0.604003,0.522462,0.583584,0.356552,0.774233,0.623185,0.721531


In [940]:
## taking row-wise average of all the data 
data['average'] = data.drop("Model", axis=1).mean(axis=1)


In [941]:
data.drop("Model", axis=1).mean(axis=0)

XGBoost         0.744580
MLP             0.747059
GaussianNB      0.490830
Adaboost        0.474236
KNN             0.651222
RFClassifier    0.420770
SVM_linear      0.737906
SVM_sigmoid     0.546750
SVM_RBF         0.699480
average         0.612537
dtype: float64

In [942]:
## add coumn-wise average of all the data
#data.loc['average'] = data.drop("Model", axis=1).mean(axis=0)

In [943]:
data

Unnamed: 0,Model,XGBoost,MLP,GaussianNB,Adaboost,KNN,RFClassifier,SVM_linear,SVM_sigmoid,SVM_RBF,average
0,resnet50,0.757914,0.76661,0.474908,0.37547,0.529114,0.378158,0.785267,0.240452,0.751452,0.562149
1,resnet101,0.734727,0.742036,0.337184,0.415615,0.623156,0.353422,0.736031,0.274348,0.701952,0.546497
2,densenet121,0.764345,0.726575,0.438129,0.541186,0.691637,0.375494,0.731575,0.646214,0.701006,0.624018
3,densenet169,0.750967,0.722318,0.429917,0.469985,0.643436,0.385585,0.716531,0.65303,0.693462,0.607248
4,vgg16,0.770259,0.747759,0.604003,0.522462,0.583584,0.356552,0.774233,0.623185,0.721531,0.63373
5,vgg19,0.759245,0.753919,0.529857,0.46964,0.587515,0.304348,0.757488,0.551889,0.711272,0.602797
6,alexnet,0.738231,0.740731,0.533478,0.464235,0.637101,0.354683,0.771137,0.460598,0.669762,0.596662
7,resnext50_32x4d,0.730279,0.688534,0.411075,0.376697,0.523486,0.347885,0.662356,0.233913,0.643613,0.513093
8,resnext101_32x8d,0.734357,0.720009,0.396856,0.394528,0.629681,0.342444,0.670952,0.227758,0.713753,0.536704
9,shufflenet_v2_x1_0,0.734401,0.756058,0.546385,0.40859,0.664128,0.385223,0.753649,0.680268,0.728541,0.628583


In [944]:
## sort the data by average
data = data.sort_values(by='average', ascending=False)
data

Unnamed: 0,Model,XGBoost,MLP,GaussianNB,Adaboost,KNN,RFClassifier,SVM_linear,SVM_sigmoid,SVM_RBF,average
24,vit_base_patch32_384,0.764706,0.767432,0.496328,0.529875,0.703555,0.519078,0.777297,0.644541,0.681181,0.653777
15,vit_small_patch32_224,0.803164,0.771137,0.549337,0.519842,0.679366,0.45634,0.760776,0.599472,0.724225,0.651518
19,vit_small_patch16_224,0.774932,0.779515,0.531658,0.512069,0.715422,0.523732,0.736022,0.584319,0.694293,0.650218
20,vit_base_patch16_384,0.754515,0.747432,0.492955,0.51577,0.749045,0.528537,0.74188,0.617693,0.696222,0.649339
17,vit_base_patch8_224,0.744527,0.758919,0.491719,0.479298,0.704191,0.543143,0.764797,0.566646,0.705601,0.639871
13,vit_base_patch32_224,0.745541,0.719988,0.485991,0.550373,0.686108,0.514329,0.740405,0.630315,0.680065,0.639235
23,vit_small_patch16_384,0.699257,0.77215,0.508307,0.50074,0.722403,0.503193,0.748096,0.62602,0.671933,0.639122
12,vit_base_patch16_224,0.740922,0.778311,0.494005,0.532313,0.661403,0.482346,0.752162,0.613363,0.688947,0.638197
22,vit_small_patch32_384,0.74411,0.771419,0.48585,0.512402,0.722052,0.43762,0.745088,0.588813,0.718226,0.636176
4,vgg16,0.770259,0.747759,0.604003,0.522462,0.583584,0.356552,0.774233,0.623185,0.721531,0.63373


In [945]:
list(data.head(3)['Model'])

['vit_base_patch32_384', 'vit_small_patch32_224', 'vit_small_patch16_224']

### Top 3 performance model

In [946]:
top_model_combinations = [
                          ['vit_base_patch32_384', 'vit_small_patch32_224'],
                          ['vit_base_patch32_384', 'vit_small_patch16_224'],
                          ['vit_small_patch32_224', 'vit_small_patch16_224'],
                          ['vit_base_patch32_384', 'vit_small_patch32_224', 'vit_small_patch16_224']
                          ]

Ensemble vgg16 with Top 2 networks ['vit_base_patch32_384', 'vit_small_patch32_224', 'vgg16']

In [947]:



# top_model_combinations = [
#                           ['vit_base_patch32_384', 'vit_small_patch32_224'],
#                           ['vit_base_patch32_384', 'vgg16'],
#                           ['vit_small_patch32_224', 'vgg16'],
#                           ['vit_base_patch32_384', 'vit_small_patch32_224', 'vgg16']
#                           ]

In [948]:
#columns = ['Model', "XGBoost", 'MLP', 'GaussianNB', "Adaboost", "KNN", "RFClassifier", "SVM_linear", "SVM_sigmoid", "SVM_RBF", ]

# dataframe = pd.DataFrame(columns=columns)
# # add 12 rows to the dataframe with zero values 
# for model_list in top_model_combinations:
#     model_name = ' + '.join(model_list)
#     new_row = {'Model': model_name, "XGBoost":0, 'MLP': 0, 'GaussianNB': 0, "Adaboost": 0, "KNN": 0, "RFClassifier": 0, "SVM_linear": 0, "SVM_sigmoid": 0, "SVM_RBF": 0} 
#     dataframe.loc[len(dataframe)] = new_row

main_path = 'extracted_features_BT-large-4c'
for ml_classifier in ML_CLASSIFIER:
    for model_list in top_model_combinations:
        print('Model List:', model_list)
        ensemble_X_train = []
        ensemble_X_test = []

        for model in model_list:
            sub_dir = os.path.join(main_path, model)
            # Load the data
            X_train = np.load(os.path.join(sub_dir, 'train_data_array_features.npy'))
            y_train = np.load(os.path.join(sub_dir, 'train_data_array_labels.npy'))
            X_test = np.load(os.path.join(sub_dir, 'test_data_array_features.npy'))
            y_test = np.load(os.path.join(sub_dir, 'test_data_array_labels.npy'))


            ## squeeze the dimensions 1 from the features 
            X_train = np.squeeze(X_train, axis=1)
            X_test = np.squeeze(X_test, axis=1)

            ## concatenate the features on axis 1
            ensemble_X_train.append(X_train)
            ensemble_X_test.append(X_test)
            

        X_train = np.concatenate(ensemble_X_train, axis=1)
        y_train = y_train
        X_test = np.concatenate(ensemble_X_test, axis=1)
        y_test = y_test

        ## apply the standard scaler and PCA
        scaler = StandardScaler()
        X_train_normalized = scaler.fit_transform(X_train)
        X_test_normalized = scaler.transform(X_test)

        # Step 2 & 3: Apply PCA
        n_components = int(0.50 * X_train.shape[1])
        pca = PCA(n_components=n_components)
        X_train = pca.fit_transform(X_train_normalized)
        X_test = pca.transform(X_test_normalized)

        ## no of example in each class
        print("no of example in each class before SMOTE:", np.bincount(y_train))


        ## over sample the data
        smote = SMOTE(sampling_strategy='minority')
        X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

        ## no of example in each class
        print("no of example in each class after SMOTE:", np.bincount(y_resampled))


        print("no of features in X_train:", X_train.shape)

        
        # Classify the data
        accuracy = classify(X_train, y_train, X_test, y_test, ml_classifier)
        print('Accuracy:', accuracy)
        # model_name = ' + '.join(model_list)
        break

    #     dataframe.loc[dataframe['Model'] == model_name, ml_classifier] = accuracy

    # print(dataframe)
    # dataframe.to_csv('BT-large-4c-dataset_results_top_three_normalized_PCA.csv', index=False)


Model List: ['vit_base_patch32_384', 'vit_small_patch32_224']
no of example in each class before SMOTE: [2475 2441 1124 2511]
no of example in each class after SMOTE: [2475 2441 2511 2511]
no of features in X_train: (8551, 576)


ValueError: Invalid parameter 'bootstrap' for estimator MLPClassifier(activation='logistic', max_iter=500, solver='sgd'). Valid parameters are: ['activation', 'alpha', 'batch_size', 'beta_1', 'beta_2', 'early_stopping', 'epsilon', 'hidden_layer_sizes', 'learning_rate', 'learning_rate_init', 'max_fun', 'max_iter', 'momentum', 'n_iter_no_change', 'nesterovs_momentum', 'power_t', 'random_state', 'shuffle', 'solver', 'tol', 'validation_fraction', 'verbose', 'warm_start'].

In [933]:
dataframe

NameError: name 'dataframe' is not defined

In [22]:
dataframe

Unnamed: 0,Model,XGBoost,MLP,GaussianNB,Adaboost,KNN,RFClassifier,SVM_linear,SVM_sigmoid,SVM_RBF
0,vit_base_patch32_384 + vit_small_patch32_224,0.77188,0.767432,0.325871,0.60863,0.698297,0.439245,0.787015,0.682358,0.746419
1,vit_base_patch32_384 + vit_small_patch16_224,0.748502,0.772297,0.331926,0.607455,0.689115,0.445041,0.774797,0.657906,0.732488
2,vit_small_patch32_224 + vit_small_patch16_224,0.753231,0.757759,0.410204,0.545602,0.734597,0.392153,0.719953,0.61969,0.748621
3,vit_base_patch32_384 + vit_small_patch32_224 +...,0.742297,0.767432,0.328057,0.664034,0.685562,0.408848,0.761419,0.674428,0.739988


In [26]:
dataframe

Unnamed: 0,Model,XGBoost,MLP,GaussianNB,Adaboost,KNN,RFClassifier,SVM_linear,SVM_sigmoid,SVM_RBF
0,vit_base_patch32_384 + vit_small_patch32_224,0.77215,0.770259,0.268695,0.53974,0.742775,0.444307,0.800811,0.682358,0.746419
1,vit_base_patch32_384 + vit_small_patch16_224,0.752623,0.769932,0.281045,0.515953,0.730466,0.44948,0.787297,0.670613,0.732488
2,vit_small_patch32_224 + vit_small_patch16_224,0.75661,0.769189,0.309645,0.494572,0.747957,0.455855,0.753919,0.620195,0.746002
3,vit_base_patch32_384 + vit_small_patch32_224 +...,0.756554,0.774189,0.249423,0.546492,0.740222,0.381939,0.769054,0.653766,0.734988


: 

: 