# Evaluation

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    
    

def show_roc_curve(labels,scores):
    for i in range(labels.shape[1]):

        cur_labels = labels[:,i]
        cur_scores = scores[:,i]

        #Calculate roc_auc score
        fpr, tpr, thresholds = roc_curve(cur_labels, cur_scores)
        roc_auc = auc(fpr,tpr) 
        
        #+3 to ignore first colour set for background channel of masks
        r = Colours[i*3+0]/255
        g = Colours[i*3+1]/255
        b = Colours[i*3+2]/255
        lw=2
        plt.plot(fpr, tpr, color=(r, g, b),lw=lw, label='ROC-AUC: '+classes[i] + '(area = %0.2f)' % roc_auc)

    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC AUC for all classes')
    plt.legend(loc="lower right")
    plt.show()
    plt.figure()
    

def image_grid(array, ncols):
    index, height, width, channels = array.shape
    nrows = index//ncols
    
    img_grid = (array.reshape(nrows, ncols, height, width, channels)
              .swapaxes(1,2)
              .reshape(height*nrows, width*ncols,channels))
    
    return img_grid

In [None]:

# USe best param to fit the values again
n_components = int(len(cols)/5)

# Standardize
scaler = StandardScaler()
scaler.fit(X_train)
X_tr = scaler.transform(X_train)
X_test_tr = scaler.transform(X_test)

# Feature selection
pca = PCA(n_components = n_components)
pca.fit(X_tr)
X_tr = pca.transform(X_tr)
X_test_tr = pca.transform(X_test_tr)

# Train classifier
clf = svm.SVC(C = 5, class_weight = None, decision_function_shape = 'ovr', degree = 1, kernel = 'rbf')
#clf = DecisionTreeClassifier(random_state = 42, class_weight = None, max_depth = 15, max_features = 'auto'
                             #, min_samples_split = 2, splitter = 'best')
clf.fit(X_tr, y_train)

# Predict
y_pred_tr = clf.predict(X_tr)
y_pred_test = clf.predict(X_test_tr)

# Accuracy of training set
print('Training set accuracy: ', accuracy_score(y_train, y_pred_tr))

# Accuracy of test set
print('Test set accuracy: ', accuracy_score(y_test, y_pred_test))

# Confusion matrix of test set
print('Confusion matrix')
plot_confusion_matrix(confusion_matrix(y_test, y_pred_test), classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues)

# Classification report of test set
print('Classification report')
print(classification_report(y_test, y_pred_test))

In [None]:


# Binarize labels
y_tr_bin = label_binarize(y_train, classes=classes)
y_test_bin = label_binarize(y_test, classes=classes)

# Get y score of test set
y_score = clf.decision_function(X_test_tr)

# Compute macro-average ROC curve and ROC area
fprMacro, tprMacro, _ = roc_curve(y_test_bin.ravel(), y_score.ravel())
roc_auc = auc(fprMacro,tprMacro)

lw = 2
plt.figure()

# Plot the curve
plt.plot(fprMacro, tprMacro, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')

# Limit axis
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])

# Add labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")

# Display ROC curve for all classes cobined
plt.show()

# Display ROC curve for each class
Colours = [
           255,0,0,
            0,255,0,
           0,0,255,
           0,255,255,
           255,0,255,
           255,255,0,
          ]
show_roc_curve(y_test_bin, y_score)

In [None]:

precision = dict()
recall = dict()
average_precision = dict()
for i in range(len(classes)):
    r = Colours[i*3+0]/255
    g = Colours[i*3+1]/255
    b = Colours[i*3+2]/255
    precision[i], recall[i], _ = precision_recall_curve(y_test_bin[:, i],y_score[:, i])
    average_precision[i] = average_precision_score(y_test_bin[:, i], y_score[:, i])
    plt.plot(recall[i], precision[i], color=(r, g, b), label='Pre-Rec: '+classes[i] + ' = ' + str(round(average_precision[i], 2)))
    
precision["macro"], recall["macro"], _ = precision_recall_curve(y_test_bin.ravel(),y_score.ravel())
average_precision["macro"] = average_precision_score(y_test_bin, y_score,average="macro")

# Plot Precision-Recall curve
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall')
plt.legend(loc="lower left")
plt.show()

## Error Analysis

In [None]:


classes = ['Bacterial Spot','Early Blight','Healthy','Late Blight','Septorial Leaf Spot','Yellow Curl Virus']
algos = ['KNN','DT','RF','SVM']

cmKNN = [[54,1,0,2,0,0],
     [5,55,4,3,1,1],
     [0,1,72,1,1,0],
     [3,5,1,23,3,3],
     [3,3,5,4,33,4],
     [2,1,1,1,1,34]]

cmDT = [[45,3,4,1,0,4],
     [6,46,4,4,3,6],
     [2,5,58,5,5,0],
     [2,6,1,22,2,5],
     [6,7,5,7,24,3],
     [3,1,1,1,2,32]]

cmRF = [[51,3,0,1,1,1],
     [4,57,2,1,4,1],
     [0,2,73,0,0,0],
     [1,7,0,24,4,2],
     [2,0,3,2,43,2],
     [1,1,0,0,0,38]]

cmSVM = [[55,1,0,1,0,0],
     [3,57,1,5,1,2],
     [0,1,74,0,0,0],
     [2,5,0,24,3,4],
     [1,0,2,4,43,2],
     [1,0,0,0,1,38]]

cms = [cmKNN, cmDT, cmRF, cmSVM]
saveMR = [[],
         [],
         [],
         [],
         [],
         []]

for i in range(6):
    for j in range(i + 1, 6):
        print(classes[i],'   ', classes[j])
        avgRate = 0
        for algo, cm in zip(algos, cms):
            ii = cm[i][i]
            jj = cm[j][j]
            ij = cm[i][j]
            ji = cm[j][i]
            total = ii + jj + ij + ji
            falseLabels = ij + ji
            misclassificationRate = falseLabels/total
            avgRate += misclassificationRate
            print(algo,': ',round(misclassificationRate, 4))
        print('Avg: ',round(avgRate/4, 4))
        print('\n')

In [None]:

    
# To display the wrongly classified image
X_temp = dfFinal
y_temp = dfFinal

# split the dataset into train and test sets
X_train_temp, X_test_temp, y_train_temp, y_test_temp = train_test_split(X_temp, y_temp, random_state = 42)
indices = [i for i in range(len(y_test)) if y_test.iloc[i] != y_pred_test[i]]
wrong_prediction_fname = y_test_temp.iloc[indices,0]
wrong_prediction_cat = y_test.iloc[indices]

bs = []
eb = []
hl = []
lb = []
sls = []
ycv = []
for i in range(len(wrong_prediction_fname)):

    if wrong_prediction_cat.iloc[i] == 'Bacterial Spot':
        image = cv2.imread(segBS + wrong_prediction_fname.iloc[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        bs.append(image)
        
    if wrong_prediction_cat.iloc[i] == 'Early Blight':
        image = cv2.imread(segEB + wrong_prediction_fname.iloc[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        eb.append(image)
        
    if wrong_prediction_cat.iloc[i] == 'Healthy':
        image = cv2.imread(segHL + wrong_prediction_fname.iloc[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        hl.append(image)
        
    if wrong_prediction_cat.iloc[i] == 'Late Blight':
        image = cv2.imread(segLB + wrong_prediction_fname.iloc[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        lb.append(image)
       
    if wrong_prediction_cat.iloc[i] == 'Septorial Leaf Spot':
        image = cv2.imread(segSLS + wrong_prediction_fname.iloc[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        sls.append(image)
        
    if wrong_prediction_cat.iloc[i] == 'Yellow Curl Virus':
        image = cv2.imread(segYCV + wrong_prediction_fname.iloc[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        ycv.append(image)

In [None]:

result = image_grid(np.array(bs),len(bs))
fig = plt.figure(figsize=(20., 20.))
plt.title("Bacterial Spot")
plt.imshow(result)


result = image_grid(np.array(eb),len(eb))
fig = plt.figure(figsize=(20, 20))
plt.title("Early Blight")
plt.imshow(result)


result = image_grid(np.array(hl),len(hl))
fig = plt.figure(figsize=(20., 20.))
plt.title("Healthy")
plt.imshow(result)


result = image_grid(np.array(lb),len(lb))
fig = plt.figure(figsize=(20., 20.))
plt.title("Late Blight")
plt.imshow(result)


result = image_grid(np.array(sls),len(sls))
fig = plt.figure(figsize=(20., 20.))
plt.title("Septorial Leaf Spot")
plt.imshow(result)


result = image_grid(np.array(ycv),len(ycv))
fig = plt.figure(figsize=(20., 20.))
plt.title("Yellow Curl Virus")
plt.imshow(result)