# Classification Performance

In [1]:
# Import libraries
import os
import pandas as pd
import numpy as np

import pipeline as pipe

from sklearn import preprocessing
from sklearn.pipeline import Pipeline
import xgboost as xgb
from sklearn.svm import SVC
from sklearn.model_selection  import GridSearchCV

import warnings
warnings.filterwarnings('ignore')

In [2]:
#Input directory
inDir = r'C:\Users\crist\OneDrive - Pontificia Universidad Javeriana Cali\MDPI'          
os.chdir(inDir)

In [3]:
#Data pre-processing
GTdata = pd.read_csv('NDSI.csv', index_col=0) 
GTdata = GTdata.drop(['ID','SCORE','SEASON'], axis=1)
le = preprocessing.LabelEncoder()
le.fit(GTdata.CLASS.unique())
GTdata.CLASS = le.transform(GTdata.CLASS) 

# XGBoost CV - Normalize Difference Spectral Indices (NDSI)

In [4]:
#NDSI features
NDSI_col = [col for col in GTdata if col.startswith(("NDRE","NDVI","GNDVI","BNDVI","ERVI","EGVI","EBVI","GRVI","GBVI","DATASET","CLASS","TYPE"))]

In [5]:
#Data standarization
NDSI_scaled = pd.DataFrame()
scaler = preprocessing.StandardScaler()
features = list(GTdata[NDSI_col].drop(['TYPE','DATASET','CLASS'], axis=1).columns)
for name, group in GTdata[NDSI_col].groupby('DATASET'):
    group[features] = scaler.fit_transform(group[features])
    NDSI_scaled = NDSI_scaled.append(group, ignore_index=True)

In [6]:
#Train Set
train = NDSI_scaled[NDSI_scaled.TYPE=='variety']
train = train.drop(['TYPE','DATASET'], axis=1)

X_train = train.drop('CLASS', axis=1)
y_train = train.CLASS

In [7]:
#Define pipeline and hyperparameters
pipe_xgb_global = Pipeline([('clf', xgb.XGBClassifier(nthread=-1, random_state=2019, eval_metric = 'auc', fpreproc=pipe.fpreproc))])

xgb_params_global = dict(clf__max_depth = list(range(2,5,1)),
                  clf__n_estimators = list(range(1,5,1)),
                  clf__min_samples_leaf = list(range(5,7,1)),
                  clf__colsample_bytree = list(np.arange(0.1, 1.1, 0.1)))

grid_xgb_global = GridSearchCV(pipe_xgb_global, param_grid=xgb_params_global, cv=10, n_jobs=-1, scoring= 'roc_auc', return_train_score=True)

In [8]:
#Fit the model
xgb_model_global = grid_xgb_global.fit(X_train,y_train)

In [9]:
#Results
results_xgb_global = pipe.ML(NDSI_scaled, xgb_model_global)

DATASET,AUC,TN,TP,AVG
2017B-1,0.91,0.89,0.71,0.87
2017B-2,0.91,0.87,0.73,0.86
2017B-3,0.91,0.87,0.67,0.85
2017B-4,0.91,0.87,0.67,0.86
2018B-1,0.91,0.89,0.32,0.83
2019A-1,0.91,0.82,0.57,0.81
2019A-2,0.91,0.83,0.69,0.82
2019A-3,0.91,0.83,0.62,0.82
2019A-4,0.91,0.86,0.47,0.84
2019B-1,0.91,0.92,0.42,0.87


# XGBoost Local - NDSI

In [10]:
#Define pipeline and hyperparameters
pipe_xgb_local = Pipeline([
            ('scale', preprocessing.StandardScaler()),
            ('clf', xgb.XGBClassifier(nthread=-1, random_state=2019, eval_metric = 'auc', fpreproc=pipe.fpreproc))])

xgb_params_local = dict(clf__max_depth = list(range(2,5,1)),
                  clf__n_estimators = list(range(1,5,1)),
                  clf__min_samples_leaf = list(range(5,7,1)),
                  clf__colsample_bytree = list(np.arange(0.1, 1.1, 0.1)))

grid_xgb = GridSearchCV(pipe_xgb_local, param_grid=xgb_params_local, cv=10, n_jobs=-1, scoring= 'roc_auc', return_train_score=True)

In [11]:
#Results
results_xgb_local = pipe.ML(GTdata[NDSI_col], grid_xgb, train=1)

DATASET,AUC,TN,TP,AVG
2017B-1,0.93,0.86,0.74,0.85
2017B-2,0.93,0.81,0.66,0.8
2017B-3,0.92,0.86,0.69,0.85
2017B-4,0.93,0.85,0.77,0.84
2018B-1,0.89,0.85,0.41,0.8
2019A-1,0.95,0.84,0.6,0.83
2019A-2,0.92,0.82,0.56,0.8
2019A-3,0.95,0.87,0.53,0.85
2019A-4,0.93,0.91,0.46,0.89
2019B-1,0.91,0.83,0.47,0.8


# SVM Local - NDSI

In [12]:
#Define pipeline and hyperparameters
pipe_svm = Pipeline([
        ('scale', preprocessing.StandardScaler()),
        ('clf', SVC(class_weight='balanced', random_state=2019))])

svm_params = dict(clf__C = [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                  clf__kernel = ['rbf', 'linear','sigmoid'],
                  clf__gamma = [1e-2, 1e-3, 1e-4, 1e-5])

grid_svm = GridSearchCV(pipe_svm, param_grid=svm_params, cv=10, n_jobs=-1, scoring= 'roc_auc', return_train_score=True)

In [13]:
#Results
results_svm_ndsi = pipe.ML(GTdata[NDSI_col], grid_svm, train=1)
results_svm_ndsi.to_csv('SVM_NDSI.csv', index = True)

DATASET,AUC,TN,TP,AVG
2017B-1,0.98,0.87,0.79,0.86
2017B-2,0.98,0.87,0.77,0.86
2017B-3,0.97,0.81,0.84,0.81
2017B-4,0.97,0.86,0.79,0.85
2018B-1,0.91,0.89,0.44,0.85
2019A-1,0.98,0.86,0.68,0.85
2019A-2,0.97,0.81,0.7,0.8
2019A-3,0.98,0.84,0.67,0.83
2019A-4,0.96,0.9,0.52,0.88
2019B-1,0.94,0.89,0.5,0.85


# SVM Local - Soil Based Vegetation Indices (SBVI)

In [14]:
#SBVI features
SBVI_col = [col for col in GTdata if col.startswith(("WDVI", "PVI", "MSAVI2","DATASET","CLASS","TYPE"))]

In [15]:
#Results
results_svm_sbvi = pipe.ML(GTdata[SBVI_col], grid_svm, train=1)

DATASET,AUC,TN,TP,AVG
2017B-1,0.94,0.92,0.51,0.89
2017B-2,0.94,0.9,0.51,0.86
2017B-3,0.94,0.93,0.47,0.89
2017B-4,0.95,0.93,0.57,0.9
2018B-1,0.88,0.84,0.37,0.79
2019A-1,0.97,0.88,0.49,0.86
2019A-2,0.95,0.86,0.5,0.85
2019A-3,0.96,0.87,0.55,0.86
2019A-4,0.95,0.92,0.43,0.89
2019B-1,0.94,0.83,0.47,0.79


# SVM Local NDSI + SBVI

In [16]:
#Results
results_svm_all = pipe.ML(GTdata, grid_svm, train=1)

DATASET,AUC,TN,TP,AVG
2017B-1,0.98,0.91,0.75,0.89
2017B-2,0.98,0.92,0.7,0.9
2017B-3,0.97,0.91,0.73,0.89
2017B-4,0.97,0.92,0.68,0.9
2018B-1,0.91,0.86,0.39,0.81
2019A-1,0.98,0.87,0.64,0.86
2019A-2,0.99,0.85,0.67,0.84
2019A-3,0.97,0.87,0.59,0.86
2019A-4,0.96,0.93,0.44,0.9
2019B-1,0.96,0.84,0.52,0.8
