In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image

path = Path(os.path.join('C:/', 'Users', 'ale19', 'Downloads', 'Food-101'))
path_h5 = path
path_img = path/'images'
path_meta = path/'meta/meta'
path_working = '/kaggle/working/'

# Modify the from folder function in fast.ai to use the dictionary mapping from folder to space seperated labels
def label_from_folder_map(class_to_label_map):
    return lambda o: class_to_label_map[(o.parts if isinstance(o, Path) else o.split(os.path.sep))[-2]]

# Develop dictionary mapping from classes to labels
classes = pd.read_csv(path_meta/'classes.txt', header=None, index_col=0)
classes_list = classes.index.tolist()
labels = pd.read_csv(path_meta/'labels.txt', header=None)
classes['map'] = labels[0].values
classes_to_labels_map = classes['map'].to_dict()
label_from_folder_food_func = label_from_folder_map(classes_to_labels_map)

# Setup the training set of images
train_df = pd.read_csv(path_meta/'train.txt', header=None).apply(lambda x : x + '.jpg')
train_set = dict((c, []) for c in classes_list)
for i in range(len(train_df)):
    train_set[Path(train_df[0][i]).parts[-2]].append(train_df[0][i])

# Setup the testing set of images
test_df = pd.read_csv(path_meta/'test.txt', header=None).apply(lambda x : x + '.jpg')
test_set = dict((c, []) for c in classes_list)
for i in range(len(test_df)):
    test_set[Path(test_df[0][i]).parts[-2]].append(train_df[0][i])
    
#img = Image.open(os.path.join(path_img, test_set['apple_pie'][1]))
#plt.imshow(np.array(img))


## Extracting the features

In [4]:
from resnet import FeaturesExtractor
import cv2

def extractFeatures(t_set, n_images):

    # Extractor initialization
    extractor = FeaturesExtractor()

    # Infer from all images (or until one is reached)
    n_features = 2048
    features = np.zeros((n_images, n_features+1), dtype=object) # in the first column there is the image's name
    idx = -1
    for c in t_set:
        print(c)
        img_list = []
        for name in t_set[c]:
            #print(idx)
            idx += 1
            features[idx][0] = name
            img = cv2.imread(os.path.join(path_img, name))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img,(128,128))
            # img_list size has to be (N,W,H,C), output has size (N,2048)
            features[idx,1:] = extractor.getFeaturesOfList(np.stack(img_list))
        if c == 'apple_pie': # ONLY THE FIRST 3 CLASSES
            break

    print(features[:10,:])
            
    return features


## Detect the features for train and test set

In [5]:
n_features = 2048

n_images_per_class = len(list(train_set.keys())[0])
n_images = len(train_set) * n_images_per_class     
if os.path.isfile('train_features.txt'):
    idx = 0
    train_feat = np.zeros((n_images, n_features+1), dtype=object)
    with open('train_features.txt', 'r') as file:
        for line in file:
            train_feat[idx,0] = line.split()[0]
            train_feat[idx,1:] = line.split()[1:]
            idx += 1
else:
    train_feat = extractFeatures(train_set, n_images)
    
n_images_per_class = len(list(train_set.keys())[0])
n_images = len(train_set) * n_images_per_class     
if os.path.isfile('test_features.txt'):
    idx = 0
    n_images_per_class = len(list(test_set.keys())[0])
    n_images = len(test_set) * n_images_per_class 
    test_feat = np.zeros((n_images, n_features+1), dtype=object)
    with open('test_features.txt', 'r') as file:
        for line in file:
            test_feat[idx,0] = line.split()[0]
            test_feat[idx,1:] = line.split()[1:]
            idx += 1
else:
    test_feat = extractFeatures(test_set, n_images)

NameError: name 'n_images' is not defined

## Classification with KNN

In [None]:
from scipy.spatial.distance import cdist
from scipy import stats

# 1. Set the parameters: K is the number of clostes neighbours to consider and fun is the metric chosen
K = 1
fun = 'euclidean'

# 2. Calculate the distance between train objects and test objects 
D = cdist(x_train, x_test, metric=fun)

#print(D.shape)

# 3. Per ogni dato di test (argomento axis=0), ordino le distanze dalla più piccola alla più grande 
#    e trovo gli indici di train dei più vicini
# Attenzione: Tengo solo i primi K!
k_neighbors = np.argsort(D, axis=0)[:K,:]
#print(k_neighbors.shape)

# 4. Controllo le etichette di questi K punti: devo trovare la più frequente:
#     - Ottengo le etichette dei punti vicini
#     - Trovo l'etichetta più frequente! Utilizzo la moda!
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mode.html


neighbors_labels = y_train[k_neighbors]
prediction = stats.mode(neighbors_labels, axis=0)[0]
#print(prediction.shape)

# 5. Calcolo l'accuratezza
accurancy = np.sum(prediction == y_test) / len(y_test)
print('Accuratezza del classificatore: ' + '{0:.2f}'.format(accurancy * 100) + '%')