Install these library on TERMINAL if you haven't install

pip install matplotlib tensorflow opency-python scikit-learn scikit-image pillow pandas


For Window Linux Subsystem user:
1. Display plot from matplotlib require Xming Server running.
2. Download (Xming)[https://sourceforge.net/projects/xming/]
3. Update .bashrc
    vim ~/.bashrc
4. Append at the end of the file
    export DISPLAY=:0



In [3]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import json
import os
import cv2
import pickle
import csv

from PIL import Image
from skimage.feature import hog
from skimage.color import rgb2grey
from skimage.filters import prewitt_h,prewitt_v, sobel
from skimage.data import camera

from itertools import combinations
# Libs for Scikit
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# Libs for LIBSVM
from libsvm.svmutil import *
# libs for tf and keras
import tensorflow as tf
from tensorflow import keras

ModuleNotFoundError: No module named 'libsvm'

In [None]:
# check for tkinter
# This block DOES NOT need to run on jupyter
if os.environ.get('DISPLAY','') == '':
    print('no display found. Using non-interactive Agg backend')
    mpl.use('Agg')

In [None]:
# Global variables
# parent_dir and model_dir should be change according 
# the dataset location and location of save models according to users 
# 101 is main dataset, 102 is for testing with smaller dataset
parent_dir = "/mnt/c/Users/nhmin/Downloads/food-101/"
bin_n = 16 # Number of bin
project_dir = os.getcwd()
model_dir = "/mnt/c/Users/nhmin/Downloads/"
class_label = {"pad_thai" : 0, "pho" : 1, "ramen" : 2, "spaghetti_bolognese" : 3, "spaghetti_carbonara" : 4}

`get_image()` taken in image path and return numpy 384x384 array of the image.
`load_json(path)` return a dictionary of classes as key and list of image directories as value. This is load from **train.json** and **test.json** in the **meta** folder from the dataset.

In [None]:
# Return np img size 227x227
def get_image(path):
    # image resize to 227x227
    img = Image.open(path + ".jpg")
    resized_image = img.resize((227,227), Image.ANTIALIAS)
    return np.array(resized_image)

Load json

In [None]:
def load_json(path):
    final_data = dict()
    # Load in json file to create dictionary: key = class label; value = file path
    with open(path, 'r') as file:
        data = json.load(file)
    # Only get information from needed class
    for label in class_label:
        final_data.update({label : data.get(label)})
    return final_data

#=========================
# Exclusive block for showing a sample of how json load data
data_head = 5
sample_data = load_json(parent_dir + "/meta/test.json")
for key, value in sample_data:
    print(key)
    print(value[i] for i in range(0,data_head))
    for i in range(0, data_head):
        print(value[i])   
#=========================


I started off with loading data from **train.json** into `dictionary{label:[image_dir]`. We will use (Histogram of Oriented Gradient (HOG))[https://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.hog] of every image flatten into 1-D array and form a new return data as `dictionary{label: [image_name,[hog_array]]}`.

In [None]:
def label_processing(data, label_dictionary):
    final_label = []
    for i in label_dictionary.keys():
        label_value = label_dictionary.get(i)
        for image in data.get(i):
            final_label.append(label_value)
    return np.array(final_label)

In [None]:
def load_HOG_data(data):
    feature_lists = []
    for image in data:
            file_name = image.split('/')[1]
            file_image = get_image(parent_dir + "images/" + image)
            # given 32x32 cell
            image_feature, image_hog = hog(file_image, orientations=8, pixels_per_cell=(16, 16),
                    cells_per_block=(8, 8), block_norm = 'L2-Hys', visualize=True, multichannel=True)
            feature_lists.append(image_feature)
    return np.array(feature_lists)

def feature_format(data):
    feature_matrix = load_HOG_data(data)
    ss = StandardScaler()
    food_stand = ss.fit_transform(feature_matrix)
    pca = PCA(n_components = feature_matrix.shape[0])
    food_pca = ss.fit_transform(food_stand)
    return food_pca

def pre_process_data(data_json):
    pca_data = dict()
    labels = list(data_json.keys())
    for label in labels:
        pca_data.update({label : feature_format(data_json.get(label))})
    return pca_data

In [None]:
def train_dataset(train_json):
    # Getting all the label and combination of all label
    labels = list(train_json.keys())
    class_combinations = list(combinations(labels, 2))

    #Get all pca dictinary data for both train and test
    train_pca_dict = pre_process_data(train_json)
    for combination in class_combinations:
        #SVM classification
        svm = SVC(gamma='auto', kernel='linear', probability=True)
        combine_train_data = np.vstack((train_pca_dict.get(combination[0]), train_pca_dict.get(combination[1])))
        label_dict = {combination[0] : 1, combination[1] : -1}
        label_lists_train = label_processing(train_json, label_dict)

        x_train = pd.DataFrame(combine_train_data)
        y_train = pd.Series(label_lists_train)

        svm.fit(x_train, y_train)
        filename = combination[0] + "_" + combination[1] + "_model.sav"
        model_filename = model_dir + "/models_save/" + filename
        pickle.dump(svm, open(model_filename, 'wb'))