In [None]:
from skimage.io import imread
from skimage.color import gray2rgb,rgb2hsv
from skimage.transform import resize
import matplotlib.pyplot as plt
import numpy as np
import os
import os.path
from scipy.spatial.distance import euclidean
from sklearn.neighbors import NearestNeighbors
from scipy.stats import pearsonr
from scipy.stats import mode
from sklearn.metrics import confusion_matrix, accuracy_score
from scipy.sparse.linalg import svds
import pandas as pd
from sklearn.decomposition import TruncatedSVD

Block to read in all images, resize, create a NX64X64X3 np array. Also calculate RGB, HSV histograms from original images(not resized).

In [None]:
path='./Data/'
folders=['Animal','Fungus','Geological','Person','plant','Sport']
#labels are 0,1,2,3,4,5 respectively
#Data is inside ./Data/Folder_Name, Eg, ./Data/Animal

##Creating histograms here itself, so I don't have to read in the images again
bins1=np.arange(0,257,1) #for RGB the range is 0,255 per channel
bins2=np.arange(0,1+(1.0/256),1.0/256)  #for HSV range is 0,1 per channel
total=8987
RGB_all=np.zeros((total,256*3))
HSV_all=np.zeros((total,256*3))

#also creating a numpy array of resized images for Autoencoder and SVD
num=0
for i in range(0,len(folders)):

    f=folders[i]
    #create list of all image files
    temp_list=os.listdir(path+f)
    input_temp2=[]
    for each in temp_list:
        img = imread(path+f+r'/'+each)  #original image is 0-255
        if len(img.shape) == 2:
            img= gray2rgb(img)     #convert to RGB if in grayscale

        input_temp2.append(resize(img, (64,64,3)))  #64X64X3 resized images
        #RGB histogram, using original images
        single_hist1 = np.zeros((3, 256))
        for ch in range(0, 3):
            # 3 channels R,G,B, original image is 0-255
            single_hist1[ch], _ = np.histogram(img, bins=bins1)
        RGB_all[num] = np.concatenate((single_hist1[0], single_hist1[1], single_hist1[2]), axis=0)

        #HSV histogram
        img_hsv=rgb2hsv(img)
        single_hist2 = np.zeros((3, 256))
        for ch in range(0, 3):
            # 3 channels H,S,V
            single_hist2[ch], _ = np.histogram(img_hsv, bins=bins2)
        HSV_all[num] = np.concatenate((single_hist2[0], single_hist2[1], single_hist2[2]), axis=0)
        num += 1
    if i==0:
        input_resized=np.array(input_temp2)
        labels=np.zeros(len(temp_list))
        labels.fill(i)
    else:
        input_resized=np.concatenate((input_resized,np.array(input_temp2)),axis=0)
        temp=np.zeros(len(temp_list))
        temp.fill(i)
        labels=np.concatenate((labels,temp),axis=0)

Create test, train sets

In [None]:
#splitting the dataset into train and test data
#total=8987, making train set of 7190 and test set of 1797
#generate 0.2*total random indices

total=input_resized.shape[0]

#train and test sets for Autoencoder, SVD of resized images
indices=np.random.permutation(total)
indices_train=indices[:7190]
indices_test=indices[7190:]
x_train=input_resized[indices_train]
labels_train=labels[indices_train]
x_test=input_resized[indices_test]
labels_test=labels[indices_test]
total_train=x_train.shape[0]
total_test=x_test.shape[0]

#also split the RGB, HSV histogram vectors by the same indices
RGB_train=RGB_all[indices_train]
RGB_test=RGB_all[indices_test]
HSV_train=HSV_all[indices_train]
HSV_test=HSV_all[indices_test]

Common function for Euclidean and Correlation metrics

In [None]:
##KNN code

#For Euclidean distance: call with algorithm='ball_tree',metric='euclidean'
#For Pearson Coefficient: call with algorithm='brute', metric='correlation')

def KNN_5(test_set,train_set,labels_train,algorithm, metric):
    # for each image in test set, find the closest 5 from train set
    nbrs = NearestNeighbors(n_neighbors=5, algorithm=algorithm,metric=metric).fit(train_set)
    distances, indices = nbrs.kneighbors(test_set)
    pred=np.zeros(len(test_set))
    #get the 5 corresponding labels, choose majority
    for i in range(0,len(indices)):
        pred[i]=mode(labels_train[indices[i]])[0][0] #list of labels of 5 NN
    return pred

Convert input images to vectors: Autoencoder 

In [None]:
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
import matplotlib.pyplot as plt
from keras.callbacks import TensorBoard

input_img = Input(shape=(64, 64, 3))  

x = Conv2D(8, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
#16X16X8

x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

encoder=Model(input_img,encoded)
autoencoder = Model(input_img, decoded)
autoencoder.summary()
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy',metrics=['mse'])

autoencoder.fit(x_train,x_train, 
                epochs=10,
                batch_size=128,
                shuffle=True,
                validation_data=(x_test, x_test)) #CHANGE

#Using resized images as input
Auto_train_temp=encoder.predict(x_train)
Auto_test_temp=encoder.predict(x_test)

#Flatten both vectors
Auto_train=np.zeros((total_train,16*16*8))
for i in range(0,total_train):
    Auto_train[i]=np.ravel(Auto_train_temp[i])

Auto_test=np.zeros((total_test,16*16*8))
for i in range(0,total_test):
    Auto_test[i]=np.ravel(Auto_test_temp[i])

I reduced the number of filters per layer to get a more reduced length vector. The feature length per image is now 16*16*8=2048. The The blog used for Part 2 of the homework used grayscale images as input. To make the architecture work for RGB images, I tweaked the decoder half to produce 64X64X3 images.

Convert input images to vectors: SVD

In [None]:
#convert images to vector by SVD
#Using resized images as input
k= 256#number of sv to retain

svd_input_train=np.zeros((total_train,64*64*3))
for i in range(0,total_train):
    svd_input_train[i]=np.ravel(x_train[i])
svd_input_test = np.zeros((total_test, 64 * 64 * 3))
for i in range(0, total_test):
    svd_input_test[i] = np.ravel(x_test[i])
svd = TruncatedSVD(n_components=k)
svd.fit(svd_input_train)
SVD_train=svd.transform(svd_input_train)
SVD_test=svd.transform(svd_input_test)

Also tried, scipy.sparse.linalg.svds. Calculated vectors by svds, then split into test and train sets. It gave slightly lower accuracy than using TruncatedSVD. Hence, using TruncatedSVD.

In [None]:
#method to plot confusion matrix
def plot_cm(conf_matrix):
    folders=['Animal','Fungus','Geological','Person','plant','Sport']
    df = pd.DataFrame(conf_matrix, index =folders,columns = folders)
    plt.pcolor(df)
    plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
    plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns)
    plt.show()

 Autoencoder - Confusion matrix and accuracy

In [None]:
#Call for predicting, Confusion matrix calculation and accuracy calculation

print ("Results for Autoencoder- KNN+Euclidean distance")
knn_pred_Autoencoder=KNN_5(Auto_test,Auto_train, labels_train,algorithm='ball_tree',metric='euclidean')
print ("Confusion Matrix ")
conf_matrix=confusion_matrix(labels_test, knn_pred_Autoencoder)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,knn_pred_Autoencoder))
plot_cm(conf_matrix)

print("Results for Autoencoder- KNN+Pearson Correlation")
pear_pred_Autoencoder=KNN_5(Auto_test,Auto_train, labels_train,algorithm='brute', metric='correlation')
print ("Confusion Matrix )
conf_matrix=confusion_matrix(labels_test, pear_pred_Autoencoder)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,pear_pred_Autoencoder))
plot_cm(conf_matrix)

<img src="cf1.png">

<img src="cf2.png">

For Autoencoder reduced vector accuracy is ~33.5% when using 5-NN with Euclidean distance and ~37% when using Pearson coefficient. Accuracy using the 2 metrics is comparable. From the confusion matrix, we can see that Geological category is prediced well by this method. 

 SVD - Confusion matrix and accuracy

In [None]:
print("Results for SVD- KNN+Euclidean distance")
print ("Results for SVD")
knn_pred_SVD=KNN_5(SVD_test,SVD_train, labels_train,algorithm='ball_tree',metric='euclidean')
print ("Confusion Matrix )
conf_matrix=confusion_matrix(labels_test, knn_pred_SVD)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,knn_pred_SVD))
plot_cm(conf_matrix)

print("Results for SVD- KNN+Pearson Correlation")
pear_pred_SVD=KNN_5(SVD_test,SVD_train, labels_train,algorithm='brute', metric='correlation')
print ("Confusion Matrix )
conf_matrix=confusion_matrix(labels_test, pear_pred_SVD)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,pear_pred_SVD))
plot_cm(conf_matrix)

<img src="cf3.png">

<img src="cf4.png">

I tried small k values such as 10, 25, etc. As expected the accuracy is very low for smaller k values. I chose 256 because it gave good results and a k value much higher than 256 takes longer to run and didn't seem to improve accuracy noticeably. For SVD reduced vector, for feature length 256, accuracy is ~35.5% when using 5-NN with Euclidean distance and ~38% when using Pearson coefficient. Accuracy using the 2 metrics is comparable. It is slightly higher than that for Autoencoder reduced images. From the confusion matrix, we can see that Geological category is prediced well by this method also.

 RGB - Confusion matrix and accuracy

In [None]:
print("Results for RGB Histogram- KNN+Euclidean distance")
knn_pred_RGB=KNN_5(RGB_test,RGB_train, labels_train,algorithm='ball_tree',metric='euclidean')
print ("Confusion Matrix )
conf_matrix=confusion_matrix(labels_test, knn_pred_RGB)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,knn_pred_RGB))
plot_cm(conf_matrix)
       
print("Results for RGB Histogram- Pearson Correlation")
pear_pred_RGB=KNN_5(RGB_test,RGB_train, labels_train,algorithm='brute', metric='correlation')  #CHANGE
print ("Confusion Matrix )
conf_matrix=confusion_matrix(labels_test, pear_pred_RGB)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,pear_pred_RGB))
plot_cm(conf_matrix)

<img src="cf5_2.png">

<img src="cf6.png">

For RGB histogram reduced vector, feature length is 256*3, 256 bins per channel (R,G,B) of the original images. Accuracy is ~34.5% when using 5-NN with Euclidean distance and ~31% when using Pearson coefficient. Accuracy using the 2 metrics is comparable. From the confusion matrix, we can see that the predictions are more balanced for the categories as compared to those of Autoencoder, SVD predictions.

 HSV - Confusion matrix and accuracy

In [None]:
print("Results for HSV Histogram- KNN+Euclidean distance")  
knn_pred_HSV=KNN_5(HSV_test,HSV_train, labels_train,algorithm='ball_tree',metric='euclidean')
print ("Confusion Matrix )
conf_matrix=confusion_matrix(labels_test, knn_pred_HSV)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,knn_pred_HSV))
plot_cm(conf_matrix)
       
print("Results for HSV Histogram- KNN+Pearson Correlation")
pear_pred_HSV=KNN_5(HSV_test,HSV_train, labels_train,algorithm='brute', metric='correlation')
print ("Confusion Matrix )
conf_matrix=confusion_matrix(labels_test, pear_pred_HSV)
print (conf_matrix)
print ("ACCURACY = ",accuracy_score(labels_test,pear_pred_HSV))
plot_cm(conf_matrix)

<img src="cf7.png">

<img src="cf8.png">

For HSV histogram reduced vector, I used feature length 256*3, 256 bins per channel (H,S,V). Accuracy is ~42% when using 5-NN with Euclidean distance and ~40.5% when using Pearson coefficient. From the confusion matrix, we can see that the predictions are more balanced for the categories as compared to those of Autoencoder, SVD predictions.

Overall accuracy is highest for HSV Histogram vectors, for 5-NN using Euclidean distance. The next better one would be SVD reduced vectors for this autoencoder architecture and dataset. Although confusion matrix reflects more balanced predictions for HSV and RGB histograms than for Autoencoder and SVD reduced images. All accuracy values range from 31%-42%. Also, for Autoencoder, SVD Pearson coefficient seems to work better, whereas fro RGB, HSV histograms, predictions using Euclidean distance are better.