In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp "/content/drive/My Drive/Data/dataset.zip" /content/
!unzip dataset.zip

In [None]:
!pip install spectral
!pip install pysptools
!pip install scikit-plot
!pip install pygco

In [None]:
!sudo apt-get -y install python-pip
!sudo -H pip install setuptools pandas numpy scipy scikit-learn -U

In [None]:
import scipy.io
import numpy as np
import os
import time
import math
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import spectral as spy
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
import time
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score
import shutil
import itertools
import xgboost as xgb
import scikitplot as skplt
import seaborn as sns

In [None]:
def list2array(X,isdata=True):
    if isdata:
        Y = np.zeros(shape=(1,X[0].shape[1]))
        for k in range(len(X)):
            Y = np.vstack((Y,X[k]))
        Y = np.delete(Y,(0),axis=0)
    else:
        Y = np.zeros(shape=(1,))
        for k in range(len(X)):
            Y = np.vstack((Y,X[k]))
        Y = np.delete(Y,(0),axis=0)
    return Y

indianpines_class_names = ['background',
                           'alfalfa',           'corn-notill',               'corn-min',               'corn',
                           'grass/pasture',     'grass/trees',    'grass/pasture-mowed',      'hay-windrowed',
                           'oats',          'soybeans-notill',           'soybeans-min',      'soybean-clean',
                           'wheat',                   'woods', 'bldg-grass-tree-drives', 'stone-steel towers']

def bar_plot(df):
    plt.figure(figsize=(14, 8))
    ax = sns.countplot(x='class', data=df[['class']])
    for p in ax.patches:
        ax.annotate('{:.1f}%'.format(100 * p.get_height() / df.shape[0]), (p.get_x() + 0.1, p.get_height() + 5))
    plt.ylabel('Class count with percentage', fontsize=14)
    plt.xlabel('class', fontsize=14)
    plt.title('Bar Plot', fontsize=16)
    plt.show()

def box_plot(n, df):
    plt.figure(figsize=(16, 6))
    sns.boxplot(x=df["class"], y=df['band-' + str(n)], width=0.3);
    plt.title('Box Plot', fontsize=16)
    plt.xlabel('Class', fontsize=14)
    plt.ylabel(f'Band-{n}', fontsize=14)
    plt.show()

def distribution_plot(n, df):
    plt.figure(figsize=(16, 6))
    sns.distplot(df['band-' + str(n)], color='mediumSpringGreen', bins=100, hist_kws={'alpha': 0.4});
    plt.xlabel('Band - ' + str(n), fontsize=14)
    plt.title('Distribution Plot of Band - ' + str(n), fontsize=16)
    plt.show()

def plot_confusion_matrix(y_true, y_pred, classes,
                          title='Confusion Matrix',
                          normalize=False,
                          cmap=plt.cm.Blues):
    cm = confusion_matrix(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized Confusion Matrix")
    else:
        print('Confusion Matrix')

    plt.figure(figsize=(15, 15))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(f'{title}.png')
    plt.show()



def classification_pipeline(classifier,X_train,y_train,X_test,y_test,data_all, width,height,num_classes,test_indices, train_indices, num_train_each_class, model_selection=False):
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test  = scaler.transform(X_test)
    data_all = scaler.transform(data_all)

    if classifier=="KNN":
        start_time = time.time()
        if model_selection==True:
            Clf = KNeighborsClassifier()
            param_grid = {'n_neighbors':[3,5,7,9]}
            nfolds=10
            best_params = param_selection(Clf, X_train, y_train, param_grid, nfolds)
            print("KNN")
            print("The parameter grid is:")
            print(param_grid)
            print("The best parameter is:")
            print(best_params)
            KNN = KNeighborsClassifier(n_neighbors=best_params['n_neighbors']).fit(X_train,y_train)

        if model_selection==False:
            Clf = KNeighborsClassifier()
            param_grid = {'n_neighbors':[9]}
            nfolds=10
            best_params, KNN = param_selection(Clf, X_train, y_train, param_grid, nfolds)
        Cla_Map = KNN.predict(data_all).reshape(width,height).astype(int).transpose(1,0)
        predict_prob = KNN.predict_proba(data_all)
        print('(KNN) Train_Acc=%.3f, Test_Cla_Acc=%.3f, (Time_cost=%.3f)'
         % (KNN.score(X_train,y_train),KNN.score(X_test,y_test), (time.time()-start_time)))
        y1 = KNN.predict(X_test)
        print(f'Accuracy: {accuracy_score(y_test, y1)}%')
        print("KNN Class Report: \n",classification_report(y_test,y1))
        plt.figure(figsize=(15, 15))
        plot_confusion_matrix(y_test, y1, classes=indianpines_class_names, normalize= True, title='KNN Confusion Matrix')
        prob = KNN.predict_proba(X_test)
        skplt.metrics.plot_roc(y_test, prob,title='KNN ROC Curves',  figsize=(15,15))

        pre = y1
        clmap = [0]*X.shape[0]
        for i in range(len(train_indices)):
            clmap[train_indices[i]] = y[train_indices[i]]

        for i in range(len(test_indices)):
            clmap[test_indices[i]] = pre[i]

        plt.figure(figsize=(10, 10))
        plt.imshow(np.array(clmap).reshape((145, 145)), cmap='jet')
        plt.colorbar()
        plt.axis('off')
        plt.title('Classification Map (KNN)')
        plt.savefig('KNN_classification_map.png')
        plt.show()
        cla_accuracy = KNN.score(X_test,y_test)


    if classifier=="RBF-SVM":
        start_time = time.time()
        if model_selection==True:
            Clf = SVC(probability=True)
            param_grid = {'C':[2**(-9),2**(-8),2**(-7),2**(-6),2**(-5),2**(-4),2**(-3),2**(-2),
                               2**(-1),2**(0),2**(1),2**(2),2**(3),2**(4),2**(5),2**(6),2**(7),2**(8),2**(9), 100],
                         'gamma': ['scale']
                         }
            nfolds=10
            best_params = param_selection(Clf, X_train, y_train, param_grid, nfolds)
            print("RBF_SVM")
            print("The parameter grid is:")
            print(param_grid)
            print("The best parameter is:")
            print(best_params)
            SVM = SVC(C=best_params['C'],probability=True, gamma='scale').fit(X_train, y_train)

        if model_selection==False:
            Clf = SVC(probability=True)
            param_grid = {'C':[2**(5)],
                          'gamma': ['scale']
                          }
            nfolds=10
            best_params, SVM = param_selection(Clf, X_train, y_train, param_grid, nfolds)

        Cla_Map = SVM.predict(data_all).reshape(width,height).astype(int).transpose(1,0)
        predict_prob = SVM.predict_proba(data_all)

        print('(RBF_SVM) Train_Acc=%.3f, Test_Cla_Acc=%.3f,(Time_cost=%.3f)' %
              (SVM.score(X_train,y_train),SVM.score(X_test,y_test), (time.time()-start_time)))
        y1 = SVM.predict(X_test)
        print(f'Accuracy: {accuracy_score(y_test, y1)}%')
        print("RBF_SVM Class Report: \n",classification_report(y_test,y1))
        plt.figure(figsize=(15, 15))
        plot_confusion_matrix(y_test, y1, classes=indianpines_class_names, normalize = True,title= 'RBF_SVM Confusion_Matrix')
        prob =SVM.predict_proba(X_test)
        skplt.metrics.plot_roc(y_test, prob, title='RBF_SVM ROC Curves',  figsize=(15,15))

        pre = y1
        clmap = [0]*X.shape[0]
        for i in range(len(train_indices)):
            clmap[train_indices[i]] = y[train_indices[i]]

        for i in range(len(test_indices)):
            clmap[test_indices[i]] = pre[i]

        plt.figure(figsize=(10, 10))
        plt.imshow(np.array(clmap).reshape((145, 145)), cmap='jet')
        plt.colorbar()
        plt.axis('off')
        plt.title('Classification Map (RBF_SVM)')
        plt.savefig('RBF-SVM_classification_map.png')
        plt.show()
        cla_accuracy = SVM.score(X_test,y_test)

    if classifier=="Poly-SVM":
        start_time = time.time()
        if model_selection==True:
            Clf = SVC(probability=True)
            param_grid = {'C':[2**(-9),2**(-8),2**(-7),2**(-6),2**(-5),2**(-4),2**(-3),2**(-2),
                               2**(-1),2**(0),2**(1),2**(2),2**(3),2**(4),2**(5),2**(6),2**(7),2**(8),2**(9)],
                         'gamma': ['scale'],
                          'kernel': ['poly'],
                          'degree': [ 2, 3, 4, 5, 6, 7, 8, 9, 10]
                         }
            nfolds=10
            best_params = param_selection(Clf, X_train, y_train, param_grid, nfolds)
            print("Poly_SVM")
            print("The parameter grid is:")
            print(param_grid)
            print("The best parameter is:")
            print(best_params)
            SVM = SVC(C=best_params['C'],probability=True).fit(X_train, y_train)

        if model_selection==False:
            Clf = SVC(probability=True)
            param_grid = {'C':[1000],
                          'gamma': ['scale'],
                          'kernel': ['poly'],
                          'degree': [2],
                          }
            nfolds=10
            best_params, SVM = param_selection(Clf, X_train, y_train, param_grid, nfolds)

        Cla_Map = SVM.predict(data_all).reshape(width,height).astype(int).transpose(1,0)
        predict_prob = SVM.predict_proba(data_all)

        print('(Poly_SVM) Train_Acc=%.3f, Test_Cla_Acc=%.3f,(Time_cost=%.3f)'
              % (SVM.score(X_train,y_train),SVM.score(X_test,y_test),
                  (time.time()-start_time)))
        y1 = SVM.predict(X_test)
        print(f'Accuracy: {accuracy_score(y_test, y1)}%')
        print("Poly SVM Class Report: \n", classification_report(y_test,y1))
        plt.figure(figsize=(15, 15))
        plot_confusion_matrix(y_test, y1, classes=indianpines_class_names, normalize = True, title= 'Poly_SVM Confusion Matrix')
        prob = SVM.predict_proba(X_test)
        skplt.metrics.plot_roc(y_test, prob, title='Poly_SVM ROC Curves',  figsize=(15,15))

        pre = y1
        clmap = [0]*X.shape[0]
        for i in range(len(train_indices)):
            clmap[train_indices[i]] = y[train_indices[i]]

        for i in range(len(test_indices)):
            clmap[test_indices[i]] = pre[i]

        plt.figure(figsize=(10, 10))
        plt.imshow(np.array(clmap).reshape((145, 145)), cmap='jet')
        plt.colorbar()
        plt.axis('off')
        plt.title('Classification Map (Poly SVM)')
        plt.savefig('Poly-SVM_classification_map.png')
        plt.show()
        cla_accuracy = SVM.score(X_test,y_test)

    if classifier=="Xgboost":
        start_time = time.time()
        if model_selection==True:
            Clf = xgb.XGBClassifier()
            param_grid = {'objective': ['multi:softmax'],
                      'num_class': [16],
                      'tree_method': ['auto'],
                      'eta': [0.1],
                      'gamma': [0],
                      'min_child_weight': [10],
                      'colsample_bytree': [1],
                      'subsample': [1],
                      'max_depth': [10],
                      'n_estimator': [3000],
                      'nthreads': [-1],
                      }
            nfolds = 10
            best_params = param_selection(Clf, X_train, y_train, param_grid, nfolds)
            print("Xgboost")
            print("The parameter grid is:")
            print(param_grid)
            print("The best parameter is:")
            print(best_params)
            XG = xgb.XGBClassifier(max_depth=best_params['max_depth'], objective='multi:softmax', learning_rate=0.1, gamma=best_params['gamma'],
                                   tree_method='auto', n_estimators=best_params['n_estimator'],min_child_weight=best_params['min_child_weight']).fit(X_train,y_train)

        if model_selection==False:
            Clf = xgb.XGBClassifier()
            param_grid = {'objective': ['multi:softmax'],
                          'num_class': [16],
                          'tree_method': ['auto'],
                          'eta': [0.1],
                          'gamma': [0],
                          'min_child_weight': [10],
                          'colsample_bytree': [1],
                          'subsample': [1],
                          'max_depth': [10],
                          'n_estimator': [3000],
                          'nthreads': [-1],
                          }
            nfolds=10
            best_params, XG = param_selection(Clf, X_train, y_train, param_grid, nfolds)

        Cla_Map = XG.predict(data_all).reshape(width,height).astype(int).transpose(1,0)
        predict_prob = XG.predict_proba(data_all)

        print('(Xgboost) Train_Acc=%.3f, Test_Cla_Acc=%.3f, (Time_cost=%.3f)'% (XG.score(X_train,y_train),XG.score(X_test,y_test),
                                                                                                   (time.time()-start_time)))
        y1 = XG.predict(X_test)
        print("Xgboost Class Report: \n",classification_report(y_test,y1))
        print(f'Accuracy: {accuracy_score(y_test, y1)}%')
        plt.figure(figsize=(15, 15))
        plot_confusion_matrix(y_test, y1, classes=indianpines_class_names, normalize = True, title='Xgboost Confusion Matrix')
        prob = XG.predict_proba(X_test)
        skplt.metrics.plot_roc(y_test, prob, title='Xgboost ROC Curves',  figsize=(15,15))

        pre = y1
        clmap = [0]*X.shape[0]
        for i in range(len(train_indices)):
            clmap[train_indices[i]] = y[train_indices[i]]

        for i in range(len(test_indices)):
            clmap[test_indices[i]] = pre[i]

        plt.figure(figsize=(10, 10))
        plt.imshow(np.array(clmap).reshape((145, 145)), cmap='jet')
        plt.colorbar()
        plt.axis('off')
        plt.title('Classification Map (XgBoost)')
        plt.savefig('XgBoost_classification_map.png')
        plt.show()
        cla_accuracy = XG.score(X_test,y_test)


    if classifier=="RF":
        start_time = time.time()
        if model_selection==True:
            Clf = RandomForestClassifier()
            param_grid = {'n_estimators':[5,10,20,50,100,200,300,400,500,600,700],
                           'min_samples_split': [2,5,10, 15, 20, 25, 30,35, 40,100 ],
                         'min_samples_leaf': [1,2,3,4,5,6,7,8,9,10]}
            nfolds=10
            best_params = param_selection(Clf, X_train, y_train, param_grid, nfolds)
            print("Random Forest")
            print("The parameter grid is:")
            print(param_grid)
            print("The best parameter is:")
            print(best_params)
            RF = RandomForestClassifier(n_estimators=best_params['n_estimators']).fit(X_train,y_train)
        if model_selection==False:
            Clf = RandomForestClassifier()
            param_grid = {'n_estimators':[300],
                          'min_samples_split': [20],
                          'min_samples_leaf': [6]}
            nfolds=10
            best_params, RF = param_selection(Clf, X_train, y_train, param_grid, nfolds)

        Cla_Map = RF.predict(data_all).reshape(width,height).astype(int).transpose(1,0)
        predict_prob = RF.predict_proba(data_all)

        print('(Random Forest) Train_Acc=%.3f, Test_Cla_Acc=%.3f, (Time_cost=%.3f)'
              % (RF.score(X_train,y_train),RF.score(X_test,y_test),
                 (time.time()-start_time)))
        y1 = RF.predict(X_test)
        print(f'Accuracy: {accuracy_score(y_test, y1)}%')
        print("Random Forest Class Report: \n", classification_report(y_test,y1))
        plt.figure(figsize=(15, 15))
        plot_confusion_matrix(y_test, y1, classes=indianpines_class_names, normalize = True, title='Random Forest Confusion Matrix')
        prob = RF.predict_proba(X_test)
        skplt.metrics.plot_roc(y_test, prob, title='Random Forest ROC Curves',  figsize=(15,15))

        pre = y1
        clmap = [0]*X.shape[0]
        for i in range(len(train_indices)):
            clmap[train_indices[i]] = y[train_indices[i]]

        for i in range(len(test_indices)):
            clmap[test_indices[i]] = pre[i]

        plt.figure(figsize=(10, 10))
        plt.imshow(np.array(clmap).reshape((145, 145)), cmap='jet')
        plt.colorbar()
        plt.axis('off')
        plt.title('Classification Map (Random Forest)')
        plt.savefig('RF_classification_map.png')
        plt.show()
        cla_accuracy = RF.score(X_test,y_test)

    if classifier=="GB":
        start_time = time.time()
        if model_selection==True:
            Clf = GradientBoostingClassifier()
            param_grid = {'n_estimators':[10,50,100,200,300]}
            nfolds=10
            best_params = param_selection(Clf, X_train, y_train, param_grid, nfolds)
            print("GB")
            print("The parameter grid is:")
            print(param_grid)
            print("The best parameter is:")
            print(best_params)
            GB = GradientBoostingClassifier(n_estimators=best_params['n_estimators']).fit(X_train,y_train)
        if model_selection==False:
            Clf = GradientBoostingClassifier()
            param_grid = {'n_estimators':[300]}
            nfolds=10
            best_params, GB = param_selection(Clf, X_train, y_train, param_grid, nfolds)
        Cla_Map = GB.predict(data_all).reshape(width,height).astype(int).transpose(1,0)
        predict_prob = GB.predict_proba(data_all)
        print('(Gradient Boosting) Train_Acc=%.3f, Test_Cla_Acc=%.3f,(Time_cost=%.3f)'
              % (GB.score(X_train,y_train),GB.score(X_test,y_test),
                  (time.time()-start_time)))
        y1 = GB.predict(X_test)
        print(f'Accuracy: {accuracy_score(y_test, y1)}%')
        print("Gradient Boosting Class Report: \n", classification_report(y_test, y1))
        plt.figure(figsize=(15, 15))
        plot_confusion_matrix(y_test, y1, classes=indianpines_class_names, normalize=True,
                              title='Gradient Boosting Confusion Matrix')
        prob = GB.predict_proba(X_test)
        skplt.metrics.plot_roc(y_test, prob, title='GB ROC Curves', figsize=(15,15))

        pre = y1
        clmap = [0]*X.shape[0]
        for i in range(len(train_indices)):
            clmap[train_indices[i]] = y[train_indices[i]]

        for i in range(len(test_indices)):
            clmap[test_indices[i]] = pre[i]

        plt.figure(figsize=(10, 10))
        plt.imshow(np.array(clmap).reshape((145, 145)), cmap='jet')
        plt.colorbar()
        plt.axis('off')
        plt.title('Classification Map (GB)')
        plt.savefig('GB_classification_map.png')
        plt.show()
        cla_accuracy = GB.score(X_test,y_test)


    if classifier=="MLR":
        start_time = time.time()
        if model_selection==True:
            Clf = MLPClassifier()
            param_grid = {'hidden_layer_sizes':[[50,50],[50,100],[50,200],[100,100],
                                                [100,200],[200,100],[200,200],[200,300],
                                                [200,500],[300,300],[300,400],[300,500],[400,500],[500,500]]}
            nfolds=10
            best_params = param_selection(Clf, X_train, y_train, param_grid, nfolds)
            print("MLR")
            print("The parameter grid is:")
            print(param_grid)
            print("The best parameter is:")
            print(best_params)
            MLP = MLPClassifier(hidden_layer_sizes=best_params['hidden_layer_sizes']).fit(X_train,y_train)
        if model_selection==False:
            Clf = MLPClassifier()
            param_grid = {'hidden_layer_sizes':[400,500]}
            nfolds=10
            best_params, MLP = param_selection(Clf, X_train, y_train, param_grid, nfolds)
        Cla_Map = MLP.predict(data_all).reshape(width,height).astype(int).transpose(1,0)
        predict_prob = MLP.predict_proba(data_all)
        print('(MLP) Train_Acc=%.3f, Test_Cla_Acc=%.3f,(Time_cost=%.3f)'
              % (MLP.score(X_train,y_train),MLP.score(X_test,y_test),
                 (time.time()-start_time)))
        y1 = MLP.predict(X_test)
        print(f'Accuracy: {accuracy_score(y_test, y1)}%')
        print("MLR Class Report: \n", classification_report(y_test, y1))
        plt.figure(figsize=(15, 15))
        plot_confusion_matrix(y_test, y1, classes=indianpines_class_names, normalize=True,
                              title='MLR Confusion Matrix')
        prob = MLP.predict_proba(X_test)
        skplt.metrics.plot_roc(y_test, prob, title='MLR ROC Curves',  figsize=(15,15))

        pre = y1
        clmap = [0]*X.shape[0]
        for i in range(len(train_indices)):
            clmap[train_indices[i]] = y[train_indices[i]]

        for i in range(len(test_indices)):
            clmap[test_indices[i]] = pre[i]

        plt.figure(figsize=(10, 10))
        plt.imshow(np.array(clmap).reshape((145, 145)), cmap='jet')
        plt.colorbar()
        plt.axis('off')
        plt.title('Classification Map (MLP)')
        plt.savefig('MLP_classification_map.png')
        plt.show()
        cla_accuracy = MLP.score(X_test,y_test)

    return Cla_Map,cla_accuracy,

def param_selection(Clf, X_train, y_train, param_grid, nfolds):
    grid_search = GridSearchCV(Clf, param_grid, cv=nfolds)
    grid_search.fit(X_train, y_train)
    best_params = grid_search.best_params_
    return best_params, grid_search

# Load Data

In [None]:
df_soil = pd.read_csv("/content/dataset/Dataset_hyper.csv")
df_soil.loc[:,'class'].value_counts()
X = df_soil.iloc[:, :-1].values
y = df_soil.iloc[:, -1].values
label_all = y
data_all = X
data = X
label = y 
print(X.shape, y.shape)
height = 145
width = 145
band = 220
num_classes = 16
Label = label.reshape(height, width)
df_soil.head()

# Visualize Data

## Plot Bands

In [None]:
def plot_band(dataset):
    plt.figure(figsize=(8, 6))
    band_no = np.random.randint(dataset.shape[1])
    dataset = dataset.values.reshape(145,145,dataset.shape[1])
    plt.imshow(dataset[:,:, band_no], cmap='jet')
    plt.title(f'Band-{band_no}', fontsize=14)
    plt.axis('off')
    plt.colorbar()
    plt.show()

In [None]:
plot_band(df_soil)

## Visualizing ground truth of the image.

In [None]:
plt.figure(figsize=(8, 6))
ground_truth = df_soil['class']
ground_truth = ground_truth.values.reshape(145,145)
plt.imshow(ground_truth)
plt.axis('off')
plt.colorbar(ticks= range(0,16))
plt.show()

## Visualizing Spectral Signatures


In [None]:
def plot_signature(df):
    plt.figure(figsize=(12, 6))
    pixel_no = np.random.randint(df.shape[0])
    print("Pixel No: ",pixel_no)
    plt.plot(range(1, 221), df.iloc[pixel_no, :-1].values.tolist(), 'b--', label= f'Class - {df.iloc[pixel_no, -1]}')
    plt.legend()
    plt.title(f'Pixel({pixel_no}) signature', fontsize=14)
    plt.xlabel('Band Number', fontsize=14)
    plt.ylabel('Pixel Intensity', fontsize=14)
    plt.show()

In [None]:
plot_signature(df_soil)

In [None]:
box_plot(50, df_soil)

# Data Pre-processing

## Detecting outliers using the Inter Quantile Range(IQR)

In [None]:
outliers = []
def detect_outliers_iqr(data):
    data = sorted(data)
    q1 = np.percentile(data, 30)
    q3 = np.percentile(data, 70)
    # print(q1, q3)
    IQR = q3-q1
    lwr_bound = q1-(1.5*IQR)
    upr_bound = q3+(1.5*IQR)
    # print(lwr_bound, upr_bound)
    for i in data:
        if i<lwr_bound or i>upr_bound:
            outliers.append(i)
    return outliers


In [None]:
band = 50
sample_outliers = detect_outliers_iqr(df_soil[f'band-{band}'])
median = np.median(df_soil[f'band-{band}'])  # Replace with median
d_band = df_soil[f'band-{band}'].copy()
for i in sample_outliers:
    d_band[d_band == i] = median
sns.boxplot(x=df_soil["class"], y=d_band, width=0.5)

## Splitting and Training Data

In [None]:
X_train, X_test, y_train, y_test, train_indexes, test_indexes = \
train_test_split(X, y, range(X.shape[0]),train_size=0.8, random_state=123, stratify=y)
train_indices = list2array(train_indexes, isdata=False)
test_indices = list2array(test_indexes, isdata=False)

In [None]:
def print_data_summary_1(y_train,y_test,y,num_classes):
    df = pd.DataFrame(np.random.randn(num_classes, 3),
                      index=[indianpines_class_names[i] for i in range(0,num_classes)],
                  columns=['Train', 'Test', 'Total'])
    df['Train'] = [sum(y_train==i) for i in range(0,num_classes)]
    df['Total'] = [sum(y==i) for i in range(0,num_classes)]
    df['Test'] = np.array(df['Total']) - np.array(df['Train'])
    print('Summary of training and testing samples:')
    print(df)
    print("Training samples: %d" % len(y_train))
    print("Test samples: %d" % len(y_test))


In [None]:
print_data_summary_1(y_train, y_test, y, num_classes)
num_train_each_class = np.array([np.sum(y_train==i+1) for i in range(num_classes)])
print(num_train_each_class.shape)
classifiers = ["KNN","RBF-SVM","Poly-SVM", "Xgboost", "RF", "GB","MLR"]
model_selection = False

In [None]:
Cla_accuracy = np.zeros((np.size(classifiers),1))
        

In [None]:
for i in range(len(classifiers)):
        
        classifier = classifiers[i]
        Cla_Map,cla_accuracy = classification_pipeline(classifier,X_train,y_train,X_test,y_test,data_all,
                                                       width,height,num_classes,test_indexes,train_indexes,
                                                       num_train_each_class,model_selection)
        Cla_accuracy[i,0] = cla_accuracy

In [None]:
# Cla_accuracy = np.zeros((np.size(classifiers),1))
# Cla_accuracy[i,0] = cla_accuracy
Cla_Acc_Mean = np.mean(Cla_accuracy,axis=1)

In [None]:
df_result = pd.DataFrame(np.random.randn(np.size(classifiers),1),index=classifiers,                                 
                             columns=['Cla_Acc'])
df_result['Cla_Acc'] = Cla_Acc_Mean
print(df_result)

print('The best classifier for is ' + str(classifiers[Cla_Acc_Mean.argmax()]) + '.')