# Tasks
- One simple feature representation (color histogram)
- One more advanced feature extractor based on SIFT
- https://en.wikipedia.org/wiki/Scale-invariant_feature_transform
    - and subseqent visual bag of words
    - https://kushalvyas.github.io/BOV.html


- try a few algorithms
- especially MLPs
- try different parameter settings
- -> this is the baseline


- then try a deep convolutional neural network
    - Tensorflow
    
    
- detailed comparison and analysis
    - confusion matrix
    - other patterns
- log everything

## TODO
- relative path needed for standalone
- MLP tuning
- find example for CNN (check how many input nodes we need, number of layers)


    -> understand and create CNN
    -> improve KNN and MLP

In [None]:
import glob, os
import datetime

import numpy as np
import pandas as pd
# set pandas to show more columns
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 800)

from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
from sklearn.cluster import MiniBatchKMeans
from sklearn import neighbors
from sklearn.neural_network import MLPClassifier

#from matplotlib import pyplot as plt
#%matplotlib inline

import cv2
from PIL import Image

In [None]:
# cross validation
def run_cv(classifier, train_data, train_target, num_cv = 10):
    scores_acc = model_selection.cross_val_score(classifier, train_data, train_target, cv = num_cv)
    scores_f1 = model_selection.cross_val_score(classifier, train_data, train_target, cv = num_cv, scoring = 'f1_macro', n_jobs = -1)
    print(f"Accuracy: {scores_acc.mean():.3f} (+/- {scores_acc.std():.3f}), F1: {scores_f1.mean():.3f} (+/- {scores_f1.std():.3f})")

# Print confusion matrix
def conf_matrix(true, pred, classes):
    display(pd.DataFrame(metrics.confusion_matrix(true, pred), index=classes, columns=classes))
    
# Print result comparison
def comp_labels(true, pred, num=None):
    if (num == None): num = len(true)
    display(pd.DataFrame([pd.Series(true[:num]), pd.Series(pred[:num])], index=['true', 'pred']).T)

In [None]:
# use for packaged python file?
#os.path.dirname(os.path.abspath(__file__))

In [None]:
# import fruit dataset
def get_fruit_labels():
    """Get folder + file names from fruits dataset. Must be called when os.getcwd is the project folder."""
    print(f"Getting fruit labels - {datetime.datetime.now()}")
    file_names = glob.glob('FIDS30/*/*.jpg')
    print(f"Found {len(file_names)} files\n")

    # get target labels from file path
    file_names_trunc = []
    target_labels = []
    for file_name in file_names:
        #if (user == 'david'): 
        file_name_trunc = file_name[7:]
        file_names_trunc.append(file_name_trunc)
        try:
            pathSepIndex = file_name_trunc.index("\\") # david
        except: 
            pathSepIndex = file_name_trunc.index("/")
        target_labels.append(file_name_trunc[:pathSepIndex])
    print(f"Done getting labels - {datetime.datetime.now()}\n")
    return file_names_trunc, target_labels

def int_encode_labels(target_labels):
    # preprocessing needed as sklearn can only work with integer labels
    label_encoder = preprocessing.LabelEncoder()
    label_encoder.fit(target_labels) # find all unique class names, assign them to the numbers
    target = label_encoder.transform(target_labels)
    print(f"Classes transformed: {label_encoder.classes_}\n")
    return target, label_encoder

In [None]:
def get_car_labels():
    print(f"Getting car labels - {datetime.datetime.now()}")
    path = 'CarData/TrainImages'
    files = glob.glob(os.path.join(path, '*.pgm'))
    print("Found", len(files), "train files")
    
    images = []
    image_names = []

    for filename in files:
        image_names.append(os.path.basename(filename))
        with Image.open(filename) as img:
            images.append(np.array(img)) # we convert the images to a Numpy array and store them in a list
            
    classes = []
    for name in image_names:
        if name.startswith('neg'):
            classes.append(0)
        else:
            classes.append(1)
            
    test_path = 'CarData/TestImages'
    test_files = glob.glob(os.path.join(test_path, '*.pgm'))
    print("Found", len(test_files), "test files")
    
    test_images = []
    test_image_names = []

    for filename in test_files:
        test_image_names.append(os.path.basename(filename))
        with Image.open(filename) as img:
            img_resized = resize_and_crop(img,target_width=100,target_height=40)
            test_images.append(np.array(img_resized))
            
    test_classes = [1] * len(test_files)
    print(f"Done getting labels - {datetime.datetime.now()}\n")
    return image_names, classes, test_image_names, test_classes

In [None]:
def pil_extraction(images_path, file_names):
    """Extracts histogram features using PIL/PILLOW"""
    print(f"Starting PIL extraction - {datetime.datetime.now()}")

    # The simplest approach is via the PIL/PILLOW package; here we get a histogram over each RGB channel
    # Note: this doesn't really represent colours, as a colour is made up of the combination of the three channels!
    train_histo = []
    for index, file_name in enumerate(file_names):
        image_PIL = Image.open(images_path + file_name)
        image_PIL = image_PIL.convert('RGB')     
        feature_vector = image_PIL.histogram()

        if (len(feature_vector) != 768): # just a sanity check; with the transformation to RGB, this should never happen
            print("Unexpected length of feature vector: " + str(len(feature_vector)) + " in file: " + file_name)

        train_histo.append((feature_vector))
    print(f"Done - {datetime.datetime.now()}")
    return train_histo

def opencv_extraction(image_path, file_names):
    ## additional feature extraction (optional)
    # extracting more features / combinations

    print(f"Starting OpenCV extraction - {datetime.datetime.now()}")

    data_opencv_1D, data_opencv_2D, data_opencv_3D = [], [], []

    # use our own simple function to flatten the 2D arrays
    flatten = lambda l: [item for sublist in l for item in sublist]

    for file_name in file_names:

        # the easiest way would be to do the following:
        # imageOpenCV = cv2.imread(imagePath + fileName)

        # However, we have the same issue as before, and it is more difficult in OpenCV to convert to an RGB image
        # Thus we do this using PIL, and then convert to OpenCV ....
        image_PIL = Image.open(image_path + file_name)
        image_PIL = image_PIL.convert('RGB')
        image_opencv = np.array(image_PIL) 
        # Convert RGB to BGR 
        image_opencv = image_opencv[:, :, ::-1].copy() 

        # Now we split the image in the three channels, B / G / R
        channels = cv2.split(image_opencv)
        colors = ("b", "g", "r")

        # First we do also features per channel, but this time, we aggregate them into a smaller number of bins
        # I.e. we do not have 256 values per channel, but less
        features_opencv_1D = []
        bins_1D = 64
        for (channel, color) in zip(channels, colors): # we compute the histogram over each channel
            hist_opencv = cv2.calcHist([channel], [0], None, [bins_1D], [0, 256])
            features_opencv_1D.extend(hist_opencv)
        feature_vector_opencv_1D = flatten(features_opencv_1D) # and append this to our feature vector

        data_opencv_1D.append(feature_vector_opencv_1D) # now we append the feature vector to the dataset so far

        if (len(feature_vector_opencv_1D) != bins_1D*3): # sanity check, in case we had a wrong number of channels...
            print(f"Unexpected length of feature vector: {str(len(feature_vector_opencv_1D))} in file: {file_name}")

        # Next - features that look at two channels at the same time
        # E.g. we look at when green and blue have both "high values"
        # We reduce the size of bins further, to not have a too long feature vector
        features_opencv_2D = []
        bins_2D = 16
        # look at all combinations of channels (R & B, R & G, B & G)
        features_opencv_2D.extend(cv2.calcHist([channels[1], channels[0]], [0, 1], None, [bins_2D, bins_2D], [0, 256, 0, 256]))
        features_opencv_2D.extend(cv2.calcHist([channels[1], channels[2]], [0, 1], None, [bins_2D, bins_2D], [0, 256, 0, 256]))
        features_opencv_2D.extend(cv2.calcHist([channels[0], channels[2]], [0, 1], None, [bins_2D, bins_2D], [0, 256, 0, 256]))
        # and add that to our dataset
        feature_vector_opencv_2D = flatten(features_opencv_2D)
        data_opencv_2D.append(feature_vector_opencv_2D)

        # finally, we look at all three channels at the same time.
        # We further reduce our bin size, because otherwise, this would become very large...
        features_opencv_3D = cv2.calcHist([image_opencv], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
        # append to our dataset
        feature_vector_opencv_3D = features_opencv_3D.flatten()
        data_opencv_3D.append(feature_vector_opencv_3D)

    print(f"Done - {datetime.datetime.now()}")
    return data_opencv_1D, data_opencv_2D, data_opencv_3D

In [None]:
# SIFT feature extraction

def sift_extraction(image_path, file_names):
    print(f"Starting SIFT extraction - {datetime.datetime.now()}")

    sift_kp = []
    sift_desc = []
    for file_name in file_names:
        path = image_path + file_name
        img = cv2.imread(path)
        # Check if img has been read successfully
        if (not img is None):
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            sift = cv2.xfeatures2d.SIFT_create()
            kp, desc = sift.detectAndCompute(gray, None)
            sift_kp.append(kp)
            if (not desc is None):
                sift_desc.append(desc)
            else:
                arr = np.empty((1, 128))
                sift_desc.append(arr)
        else: 
            print(image_path + file_name)

    # cluster features into visual bag of words
    K = 8 # default value in sklearn
    cluster_model = MiniBatchKMeans(n_clusters=K)
    n_clusters = cluster_model.n_clusters

    # Concatenate all descriptors in the training set together
    # training_descs = [sift_desc[i] for i in training_idxs]
    training_descs = sift_desc
    all_train_descriptors = [desc for desc_list in training_descs for desc in desc_list]
    all_train_descriptors = np.array(all_train_descriptors)

    if all_train_descriptors.shape[1] != 128:
        raise ValueError('Expected SIFT descriptors to have 128 features, got', all_train_descriptors.shape[1])

    print('%i descriptors before clustering' % all_train_descriptors.shape[0])

    # Cluster descriptors to get codebook
    print('Using clustering model %s...' % repr(cluster_model))
    print('Clustering on training set to get codebook of %i words' % n_clusters)

    # train kmeans or other cluster model on those descriptors selected above
    cluster_model.fit(all_train_descriptors)
    print('Done clustering. Using clustering model to generate BoW histograms for each image.')

    # compute set of cluster-reduced words for each image
    img_clustered_words = [cluster_model.predict(raw_words) for raw_words in sift_desc]

    # finally make a histogram of clustered word counts for each image. These are the final features.
    img_bow_hist = np.array(
        [np.bincount(clustered_words, minlength=n_clusters) for clustered_words in img_clustered_words])

    print('Done generating BoW histograms.')
    print(f"Done - {datetime.datetime.now()}")
    return img_bow_hist

In [None]:
def train_test_split(X, y):
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)
    return X_train, X_test, y_train, y_test

def run_knn(X_train, X_test, y_train, y_test, classes, k = 11):
    print(f"Run KNN - {datetime.datetime.now()}\n")
    # define classifier
    k = 11
    knn = neighbors.KNeighborsClassifier(n_neighbors = k)
    # cross validation
    run_cv(knn, X_train, y_train)

    knn.fit(X_train, y_train)
    knnresult = knn.predict(X_test)
    
    acc = metrics.accuracy_score(y_test, knnresult)
    print(f"[Test data] Accuracy: {acc:.3f}")

    # compare results
    #comp_labels(y_test, knnresult, num=10)
    #conf_matrix(y_test, knnresult, classes)
    print(f"Done - {datetime.datetime.now()}\n")
    return knn

In [None]:
def run_mlp(X_train, X_test, y_train, y_test, classes,
            solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3, 2), random_state=1):
    print(f"Run MLP - {datetime.datetime.now()}\n")
    # MLP
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3, 2), random_state=1)
    run_cv(clf, X_train, y_train)
    
    clf.fit(X_train, y_train)
    clfresult = clf.predict(X_test)
    
    acc = metrics.accuracy_score(y_test, clfresult)
    print(f"[Test data] Accuracy: {acc:.3f}")

    # compare results
    #comp_labels(y_test, clfresult, num=10)
    #conf_matrix(y_test, clfresult, classes)
    print(f"Done - {datetime.datetime.now()}\n")
    return clf

In [None]:
# Fruit Data
fileNames, fruit_labels = get_fruit_labels()
fruit_labels, labelEncoder = int_encode_labels(fruit_labels)
print(f"Test inverse transform: 0, 1, 18 => {list(labelEncoder.inverse_transform([0, 1, 18]))}\n")

train_histo = pil_extraction('FIDS30/', fileNames)
data_opencv_1D, data_opencv_2D, data_opencv_3D = opencv_extraction('FIDS30/', fileNames)

train_bow = sift_extraction('FIDS30/', fileNames)

X_train, X_test, y_train, y_test = train_test_split(train_histo, fruit_labels)

knn_fruits = run_knn(X_train, X_test, y_train, y_test, labelEncoder.classes_)

mlp_fruits = run_mlp(X_train, X_test, y_train, y_test, labelEncoder.classes_)

In [None]:
# Car Data
car_images_train, car_labels_train, car_images_test, car_labels_test = get_car_labels()

car_histo_train = pil_extraction('CarData/TrainImages/', car_images_train)
car_histo_test = pil_extraction('CarData/TestImages/', car_images_test)

car_data_opencv_1D_train, car_data_opencv_2D_train, car_data_opencv_3D_train = opencv_extraction('CarData/TrainImages/', car_images_train)
car_data_opencv_1D_test, car_data_opencv_2D_test, car_data_opencv_3D_test = opencv_extraction('CarData/TestImages/', car_images_test)

car_bow_train = sift_extraction('CarData/TrainImages/', car_images_train)
car_bow_test = sift_extraction('CarData/TestImages/', car_images_test)

knn_car = run_knn(car_histo_train, car_histo_test, car_labels_train, car_labels_test, [0,1])
mlp_car = run_mlp(car_histo_train, car_histo_test, car_labels_train, car_labels_test, [0,1])

In [None]:
def resize_and_crop(img,target_width=224,target_height=224):
    width, height = img.size

    img_ratio = width / float(height)
    target_ratio = target_width / float(target_height)

    # 1) compare ratios and resize the larger side proportional to the target of the smaller side
    new_width, new_height = (target_width, target_height)

    if target_ratio > img_ratio:
        new_height = int(round(height * (target_width / float(width))))
    else:
        new_width = int(round(width * (target_height / float(height))))

    img_new = img.resize((new_width, new_height), Image.ANTIALIAS)
    # The filter argument can be one of NEAREST (use nearest neighbour), BILINEAR (linear interpolation in a
    # 2x2 environment), BICUBIC (cubic spline interpolation in a 4x4 environment), or ANTIALIAS (a high-quality downsampling filter).
    # If omitted, or if the image has mode "1" or "P", it is set to NEAREST.
    # Note that the bilinear and bicubic filters in the current version of PIL are not well-suited for large downsampling
    # ratios (e.g. when creating thumbnails). You should use ANTIALIAS unless speed is much more important than quality.

    # 2) crop to target size
    # offset to half of the remaining padding (one of them is 0)
    width_offset  = (new_width - target_width) / 2
    height_offset = (new_height - target_height) / 2

    # crop with offsets
    img_new = img_new.crop((width_offset, height_offset, width_offset+target_width, height_offset+target_height))
    #  The box is a 4-tuple defining the left, upper, right, and lower pixel coordinate.
    return img_new