In [27]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import json
import os
import cv2
import pickle
import csv

from PIL import Image
from skimage.feature import hog
from skimage.color import rgb2grey

# Library for scikit-learn
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Library for LIBSVM
from libsvm.svmutil import *
from itertools import combinations
from skimage.data import camera

# Library fror tensorflow and keras
import tensorflow as tf
import tensorflow.keras
import keras
from keras.preprocessing.image import load_img, img_to_array
from keras.applications import imagenet_utils
from keras.models import Model, Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import h5py

In [4]:
# 101 is main dataset, 102 is for testing with smaller dataset
data_dir = "/mnt/c/Users/nhmin/Downloads/food-101/"
bin_n = 16 # Number of bin
project_dir = os.getcwd()
model_dir = "/mnt/c/Users/nhmin/Downloads/"
class_label = {"pad_thai" : 0, "pho" : 1, "ramen" : 2, "spaghetti_bolognese" : 3, "spaghetti_carbonara" : 4}

In [4]:
def alexnet_model():
    # Start off the model
    model = Sequential()
    model.trainable = False
    # 1st Convolution layer
    model.add(Conv2D(input_shape=(227,227,3), filters=96,kernel_size=(11,11),strides=(4,4),padding='valid'))
    model.add(Activation('relu'))
    # max pooling 1sst layer
    model.add(MaxPooling2D(pool_size=(3,3), strides=(2,2), padding='valid'))
    model.add(BatchNormalization())
    
    # 2nd Convolution layer
    model.add(Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding='valid'))
    model.add(Activation('relu'))
    # max pooling for 2nd layer
    model.add(MaxPooling2D(pool_size=(3,3), strides=(2,2), padding='valid'))
    model.add(BatchNormalization())
    #===================
    # Testing to get layer 2
    layer = model.get_layer(index=2)
    print(layer.get_output_at(0))
    #===================
    # 3rd Convolution layer
    model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
    model.add(Activation('relu'))
    
    # 4th Convolution layer
    model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
    model.add(Activation('relu'))
    
    # 5th Convolution layer
    model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='valid'))
    model.add(Activation('relu'))
    # max pooling for 5th layer
    model.add(MaxPooling2D(pool_size=(3,3), strides=(2,2), padding='valid'))
    
    # dense layer
    model.add(Flatten())
    # 1st dense layer
    model.add(Dense(4096, input_shape=(6*6*256,)))
    model.add(Activation('relu'))
    # dropout to prevent overfitting
    model.add(Dropout(0.4))
    
    # 2nd dense layer
    model.add(Dense(4096))
    model.add(Activation('relu'))
    # drop out to prevent overfitting
    model.add(Dropout(0.4))
    model.add(BatchNormalization())
    
    # 3rd dense layer
    model.add(Dense(1000))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    
    # output layer
    model.add(Dense(2))
    model.add(Activation('softmax'))
    
    # Uncomment line below to see architecture detail
    # model.summary()
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [4]:
# Return np img size 227x227
def get_image(path):
    # image resize to 227x227
    img = Image.open(path)
    resized_image = img.resize((227,227), Image.ANTIALIAS)
    return np.array(resized_image)

In [5]:
def load_json(path):
    final_data = dict()
    # Load in json file to create dictionary: key = class label; value = file path
    with open(path, 'r') as file:
        data = json.load(file)
    # Only get information from needed class
    for label in class_label:
        final_data.update({label : data.get(label)})
    return final_data

#=========================
# Exclusive block for showing a sample of how json load data
# data_head = 5
# sample_data = load_json(parent_dir + "/meta/test.json")
# for key, value in sample_data:
#     print(key)
#     print(value[i] for i in range(0,data_head))
#     for i in range(0, data_head):
#         print(value[i])   
#=========================

In [33]:
# def HOG_image(image):
#     np_image = get_image(parent_dir + "images/" + image)
#     # given 32x32 cell
#     image_feature, image_hog = hog(np_image, orientations=8, pixels_per_cell=(8, 8),
#         cells_per_block=(8, 8), block_norm = 'L2-Hys', visualize=True, multichannel=True)
#     return np.array(image_feature)

# def pca_transform(image):
#     ss = StandardScaler()
#     image_ss = ss.fit_transform(image)
#     # Keep 90% of variance
#     pca = PCA(0.85)
#     image_pca = pca.fit_transform(image_ss)
#     return image_pca

def get_key(val):
    for key, value in class_label.items():
        if(val == value): return key
    return None

In [39]:
def create_data(json_data):
    # Create train data
    file_names_list = []
    label_list = []
    for label in json_data.keys():
        file_names = os.listdir(data_dir + '/images/' + label)
        insert_pos = len(file_names)
        for file in file_names:
            file_names_list.append(file)
            label_list.append(class_label.get(label))
    # Create dataframe
    data_df = pd.DataFrame({
        'filename' : file_names_list,
        'label' : label_list
    })
#     # Splitting training and validating data
#     train_df, validate_df = train_test_split(data_df, test_size=0.20, random_state=42)
#     # When splitting, train_df and validate_df contain original index. this is to reset the index
#     train_df = train_df.reset_index(drop=True)
#     validate_df = validate_df.reset_index(drop=True)
    

    return data_df

In [34]:
def create_feature(data_df, pre_model):
    x_tmp = []
    size = len(data_df['filename'])
    for i in range(0,size):
        image = data_df['filename'][i]
        label = data_df['label'][i]
        path = data_dir + 'images/' + get_key(label) + '/' + image
        #image_np = get_image(path)
        image_np = load_img(path, target_size=(227,227))
        image_np = img_to_array(image_np)
        image_np = np.expand_dims(image_np,axis=0)
        image_np = imagenet_utils.preprocess_input(image_np)
        x_tmp.append(image_np)
    x = np.vstack(x_tmp)
    features = pre_model.predict(x, batch_size=32)
    features_flatten = features.reshape((features.shape[0], 2*2*256))
    return features_flatten

In [None]:
# load json file
train_json = load_json(data_dir + '/meta/train.json')
train_df = create_data(train_json)

#model = tf.keras.models.load_model(model_dir + '227-imgsz-32-bsz-0.01-lr-30-ep/saved_model.pb')
model = tf.keras.models.load_model(model_dir + '227-imgsz-32-bsz-0.01-lr-30-ep.ckpt.02-1.64.hdf5')
drop_layer = 12
new_model = tf.keras.Sequential()
for layer in model.layers[:-drop_layer]:
    new_model.add(layer)
new_model.trainable = False
new_model.summary()
# change optimizer when get from flo
# model.compile(loss='categorical_crossentropy',
#               optimizer='adam',
#               metrics=['accuracy'])
train_feature = create_feature(train_df, new_model)
print(train_df['label'])
print(type(train_df['label']))
# b = 1 will give prob est
train_model = svm_train(train_df['label'].tolist(), train_feature, '-s 0 -t 0 -c 2.3 -b 1')
svm_save_model('all_food_classification.model', train_model)


Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_30 (Conv2D)           (None, 55, 55, 96)        34944     
_________________________________________________________________
activation_54 (Activation)   (None, 55, 55, 96)        0         
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 27, 27, 96)        0         
_________________________________________________________________
batch_normalization_18 (Batc (None, 27, 27, 96)        384       
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 25, 25, 256)       221440    
_________________________________________________________________
activation_55 (Activation)   (None, 25, 25, 256)       0         
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 12, 12, 256)     

In [None]:
def test_dataset():
    model = svm_load_model('all_food_classification.model')
    test_json = load_json(data_dir + 'meta/test.json')
    test_df = create_data(test_json)
    test_feature = create_feature(test_df, new_model)
    
    # Predict
    p_label, p_acc, p_val = svm_predict(test_label, test_data, model, '-b 0')
    acc, mse, scc = evaluations(test_label, p_label)
    print("Test acc: ", acc)
    # print wront classify image
    counter = 0
    for i in range(0, len(p_label)):
        if(p_label[i] != test_df['label'][i]):
            print(test_df['filename'][i])
            print('True label: ', get_key(test_df['label'][i]), " Predicted label: ", get_key(p_label[i]))