In [None]:
import numpy as np
import os
import cv2
from imutils import paths
import shutil
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.cm import coolwarm
import matplotlib
import matplotlib.pyplot as plt

from keras.preprocessing import image
from keras.applications import vgg16
from keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model

import itertools

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from Utils import cmap_map
from sklearn.model_selection import KFold, StratifiedKFold

# Initialize keras model for VGG16 architecture
model = vgg16.VGG16(include_top=False,weights='imagenet',pooling = 'avg')

OUTPUT_DATASET =  "SEN_DGB"
FOL = "SEN_DGBR"
RESULTS = "Results_Classification_Cross_Validation"
NImages = ["NX_NY_Images_1", "length_125","length_100","NX_NY_Images_2","NX_NY_Images_3","NX_NY_Images_4",
            "NX_NY_Images_5","NX_NY_Images_6","NX_NY_Images_7",
            "NX_NY_Images_8", "NX_NY_Images_9","NX_NY_Images_10"]

Length_Scale = ['150','125','100','75','50','37.5','30','25','21.4','18.75','16.67','15']

L=0
for split in NImages:
    print("[INFO] processing '{} folder'...".format(split))
    img_DIR = os.path.sep.join([OUTPUT_DATASET, split])
    results_DIR = os.path.sep.join([FOL, RESULTS, split])
    if not os.path.exists(results_DIR):
        os.makedirs(results_DIR)
        
    if L==0:
        output_txt =  os.path.sep.join([FOL, RESULTS, "Accuracy.txt"])
        text_file = open(output_txt, "a")
        text_file.write('{:s}, {:s}, {:s} ,{:s}, {:s} \n'.format('Length_Scale', 'CV fold', 'Perplexity', 
                      'Classification Accuracy (Test)',
                      'Clustering Accuracy (Train)'))
        
    dirs = os.listdir(img_DIR)
    n_images = len(dirs)
    # Define a numpy.ndarray that stores extracted features by VGG16 CNN
    features = np.empty([n_images,512])
    # Crete an array for the targets and then define the targets
    target = np.empty(n_images)
    
    kf = KFold(n_splits=5, random_state=7, shuffle=True)
    
    fold=1
    for train_index, test_index in kf.split(dirs):
        train_dirs = np.array(dirs)[train_index.astype(int)]
        test_dirs = np.array(dirs)[test_index.astype(int)]
        train_tragets = np.array(target)[train_index.astype(int)]
        test_targets = np.array(target)[test_index.astype(int)]
        
        train_images = len(train_dirs)
        test_images = len(test_dirs)
        # convert them into a list
        targets = list(target)
        
        # Load images and extract the features with VGG16 for train and test images
        i = 0
        for item in train_dirs:
            img_path = os.path.join(img_DIR,item) 
            if os.path.isfile(img_path):
                filename = item.split(os.path.sep)[-1]
                curr_label1 = filename.split('_')[0]
                curr_label2 = filename.split('_')[1]
                if curr_label1 == "DF140T":
                    if curr_label2 == "DGB":
                        target[i]=(0)
                    elif curr_label2 == "SEN":
                        target[i]=1
                elif curr_label1 == "DP980":
                    if curr_label2 == "DGB":
                        target[i]=2
                    elif curr_label2 == "SEN":
                        target[i]=3
                
                img = image.load_img(img_path, target_size=(224, 224))
                x = image.img_to_array(img)
                x = np.expand_dims(x, axis=0)
                x = preprocess_input(x)
                features[i] = model.predict(x)
                i+=1
        
        for item in test_dirs:
           img_path = os.path.join(img_DIR,item) 
           if os.path.isfile(img_path):
               filename = item.split(os.path.sep)[-1]
               curr_label1 = filename.split('_')[0]
               curr_label2 = filename.split('_')[1]
               if curr_label1 == "DF140T":
                    if curr_label2 == "DGB":
                        target[i]=(0)
                    elif curr_label2 == "SEN":
                        target[i]=1
               elif curr_label1 == "DP980":
                    if curr_label2 == "DGB":
                        target[i]=2
                    elif curr_label2 == "SEN":
                        target[i]=3
               
               img = image.load_img(img_path, target_size=(224, 224))
               x = image.img_to_array(img)
               x = np.expand_dims(x, axis=0)
               x = preprocess_input(x)
               features[i] = model.predict(x)
               i+=1     
    
        # K-nearest neighbors parameters
        neighbors = 10
        weight_option = 'uniform' 
        #K means parameteres
        clusters = 4
        
        totalImages = len(features)
        if totalImages < 50:
            n_comp = totalImages
        else:
            n_comp = 50
        
        # DImensionality reduction to 50 using PCA
        X_pca_50 = PCA(n_components=n_comp).fit_transform(features)
        
        target_array = np.asarray(target,dtype=np.int8)
        # For 4 classes
        labels = ['DF140TDGB', 'DF140TSEN', 'DP980DGB', 'DP980SEN']
        colors = [0, 1, 2, 3]
        
        # Implement t-SNE dimensionality reduction for different values of perplexity to obtain the best projection
        for p in range(5, 50, 5):
            
            print('\n')
            print('Perplexity = {}'.format(p))
            print('\n')
            
            # Take the reduced features by PCA and insert them into t-sne
            X_tsne= TSNE(n_components=2, perplexity=p, n_iter=3000, random_state=7, verbose=0).fit_transform(X_pca_50)
            
            # Separate the train images and their ground truth labels
            X_tsne_train = X_tsne[:train_images]
            target_train = target_array[:train_images] 
            
            
            # Separate the test images and their ground truth labels
            X_tsne_test = X_tsne[train_images:]
            target_test = target_array[train_images:]
            
            
            # -------------------------------------------------------------------------------------------------------------------------
            #   Kmeans labeling
            #--------------------------------------------------------------------------------------------------------------------------
            
            kmeans = KMeans(n_clusters=clusters, init='k-means++', n_init=30, max_iter=1000, 
                            tol=0.0001, precompute_distances='deprecated', verbose=0, random_state=7, 
                            copy_x=True, n_jobs='deprecated', algorithm='auto')
            kmeans.fit(X_tsne_train)
            labs = kmeans.labels_
        
        
            correspond_labels = np.zeros(labs.shape)
            # Convert the k-Means labeling into the same colormap labeling as the ground truth labeling    
            correspond_labels[labs==0] = np.argmax(np.bincount(target_train[labs==0]))    
            correspond_labels[labs==1] = np.argmax(np.bincount(target_train[labs==1]))
            correspond_labels[labs==2] = np.argmax(np.bincount(target_train[labs==2]))    
            correspond_labels[labs==3] = np.argmax(np.bincount(target_train[labs==3]))
        
            
            # find the errors
            l = [k if t==k else max(target_train)+1 for t,k in zip(target_train, correspond_labels)]
            l = np.asarray(l,dtype=np.int8)
        
            # correct indices
            cor_idx = l<=max(target_train)    
            cor_labels = l[cor_idx]
                
            X_tsne_train_c = X_tsne_train[cor_idx]  
            
            kmeans.fit(X_tsne_test)
            labs_test = kmeans.labels_
        
        
            correspond_labels_test = np.zeros(labs_test.shape)
            # Convert the k-Means labeling into the same colormap labeling as the ground truth labeling    
            correspond_labels_test[labs_test==0] = np.argmax(np.bincount(target_test[labs_test==0]))    
            correspond_labels_test[labs_test==1] = np.argmax(np.bincount(target_test[labs_test==1]))
            correspond_labels_test[labs_test==2] = np.argmax(np.bincount(target_test[labs_test==2]))    
            correspond_labels_test[labs_test==3] = np.argmax(np.bincount(target_test[labs_test==3]))
        
            
            # find the errors
            l_test = [k if t==k else max(target_test)+1 for t,k in zip(target_test, correspond_labels_test)]
            l_test = np.asarray(l_test,dtype=np.int8)
        
            # correct indices
            cor_idx_test = l_test<=max(target_test)    
            cor_labels_test = l_test[cor_idx_test]
                
            X_tsne_test_c = X_tsne_test[cor_idx_test]  
        
            # -------------------------------------------------------------------------------------------------------------------------
            #   K Nearest Neighbors Classifier
            #--------------------------------------------------------------------------------------------------------------------------

                        
            kmeans_neigh = KNeighborsClassifier(n_neighbors=neighbors,weights=weight_option)
            kmeans_neigh.fit(X_tsne_train_c, cor_labels)
            
            print('\n')
            print('Plotting Decision Boundary')
            print('\n')   
            # Plot the decision boundary. For that, we will assign a color to each
            x_min, x_max = X_tsne_train_c[:, 0].min() - 20, X_tsne_train_c[:, 0].max() + 20
            y_min, y_max = X_tsne_train_c[:, 1].min() - 20, X_tsne_train_c[:, 1].max() + 20
            xx, yy = np.meshgrid(np.arange(x_min, x_max, 1),
                                  np.arange(y_min, y_max, 1))
            xy = np.c_[xx.ravel(), yy.ravel()]
            # Predict the label of each mexh point with K-nearest neighbors
            knn_labels = kmeans_neigh.predict(xy)
    
            # Put the result into a color plot
            zz = knn_labels.reshape(xx.shape)
             
            light_brg = cmap_map(lambda x: x/2 + 0.5, matplotlib.cm.brg)
            dark_brg = cmap_map(lambda x: x*0.75, matplotlib.cm.brg)        
            # Plot the result with scatter
            fig1 = plt.figure(figsize=(6, 4))
            ax1 = plt.axes(frameon=False)
            plt.setp(ax1, xticks=(), yticks=())
            plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=0.9,
                            wspace=0.0, hspace=0.0)
            mp1 = plt.pcolormesh(xx, yy, zz, cmap=light_brg )
            sc1 = plt.scatter(X_tsne_test[:, 0], X_tsne_test[:, 1], c=target_test, marker="D", edgecolors='k', cmap=light_brg)
            handles_1 = [plt.plot([],color=mp1.get_cmap()(mp1.norm(c)),ls="", marker="o")[0] for c in colors ]  
            plt.tight_layout()
            mp1 .set_clip_on(False) 

            legend1 = ax1.legend(handles_1, labels, loc='best',frameon='True',framealpha=0.5)
            ax1.add_artist(legend1)
            
            #Predict the the response for test dataset
            y_pred = kmeans_neigh.predict(X_tsne_test)
            
            #Import scikit-learn metrics module for accuracy calculation
            from sklearn import metrics
    
            text_file = open(output_txt, "a")
            text_file.write('{:s}, {:d}, {:d} ,{:.3f}, {:.3f} \n'.format(Length_Scale[L], fold, p, 
                         metrics.accuracy_score(target_test, y_pred),
                         metrics.accuracy_score(target_train, correspond_labels)))
            text_file.close()
            
            # save the figure with the specific perplexity
            out_name = 'KNN_fold_'+ str(fold) + '_perp_' + str(p) + '.png'
            plt.savefig(os.path.sep.join([results_DIR, out_name]))
            plt.close()
        fold+=1
    L+=1           
            
 