# Transform image to feature and lable vectors

In [None]:
import _pickle as pickle
import glob
import os
import numpy as np
import cv2

In [None]:
class data_manager(object):
    
    def __init__(self, data_dir, class_labels, image_size, set_name):
        
        self.data_dir = data_dir
        
        self.class_labels = class_labels
        
        self.num_class = len(self.class_labels)
        
        self.image_size = image_size
        
        self.set_name = set_name
        
        self.load_set()   
        
    
    def compute_label(self, label):
        '''
        Compute one-hot labels given the class size
        '''    
        one_hot = np.zeros(self.num_class)

        idx = self.class_labels.index(label)

        one_hot[idx] = 1.0

        return one_hot


    def compute_feature(self, image):
        '''
        Standardizing pixel value from [0, 255] to [-1, 1].
        ''' 
        image = cv2.resize(image, (self.image_size, self.image_size))
        
        # image = (image / 255.0) * 2.0 - 1.0

        return image      
    
            
    def load_set(self):
        
        features = []
        
        label = []
        
        data_paths = glob.glob(os.path.join(self.data_dir, self.set_name, '*.png'))
        
        idx = np.arange(len(data_paths))
        
        np.random.shuffle(idx)

        for i in idx:
            
            data_path = data_paths[i]

            fname = os.path.basename(data_path)

            img_label = fname.split("_")[0]

            if img_label in self.class_labels:

                img = cv2.imread(data_path)

                label.append(np.expand_dims(self.compute_label(img_label), axis=0))

                features.append(np.expand_dims(self.compute_feature(img), axis=0))
                
        self.X = np.concatenate(features)
        
        self.Y = np.concatenate(label)
        
        del features, label
        

In [None]:
data_dir = ".\\data"
CLASS_LABELS = ['apple','banana','nectarine','plum','peach','watermelon','pear','mango','grape',
                'orange','strawberry','pineapple','radish','carrot','potato','tomato','bellpepper',
                'broccoli','cabbage','cauliflower','celery','eggplant','garlic','spinach','ginger']
image_size = 150

In [None]:
dm_train = data_manager(data_dir, CLASS_LABELS, image_size, 'train')
print (dm_train.X.shape)
print (dm_train.Y.shape)

In [None]:
np.save(".\\data\\X_train.npy", dm_train.X)
np.save(".\\data\\Y_train.npy", dm_train.Y)

In [None]:
dm_test = data_manager(data_dir, CLASS_LABELS, image_size, 'val')
print (dm_test.X.shape)
print (dm_test.Y.shape)

In [None]:
np.save(".\\data\\X_test.npy", dm_test.X)
np.save(".\\data\\Y_test.npy", dm_test.Y)