In [1]:
# Import your libraries
import glob
import cv2
import os

#For data management
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


#import classifiers
from sklearn.svm import SVC
from sklearn import svm

#metrics contain our plot_confusion_matrix and classification_report
from sklearn import metrics

#helper function to splitting data
from sklearn.model_selection import train_test_split, cross_val_score

#for plotting
import matplotlib.pyplot as plt
%matplotlib inline


#export model
import pickle

In [7]:
def getFruitData(folder, k_fold = False):
    fruit_images = []
    labels = [] 
    
    if not k_fold:
    #Retrieve images from the folders -- fruit labels are the name of the image file
        path = "./fruits-360/"+ folder +"/*"
        for fruit_dir_path in glob.glob(path):
            #for windows, to extract the fruit label from image file name
            fruit_label = fruit_dir_path.split("\\")[-1]


            for image_path in glob.glob(os.path.join(fruit_dir_path, "*.jpg")):


                #read image
                image = cv2.imread(image_path, cv2.IMREAD_COLOR)


                #resizing image to be 45x45
                image = cv2.resize(image, (45, 45))

                #convert color space
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                #append the image of fruit to the fruit array
                fruit_images.append(image)
                #append the name of the fruit
                labels.append(fruit_label)


        fruit_images = np.array(fruit_images)
        labels = np.array(labels)
        
        #creating a dictionary of fruit labels to their corresponding number (ID)
        label_to_id_dict = {v:i for i,v in enumerate(np.unique(labels))}

        #creating a dictionary of numbers (ID) to their corresponding for labels
        id_to_label_dict = {v:k for k, v in label_to_id_dict.items()}

        #turn our dataset of current labels into their corresponding ID numbers
        labelIDs = np.array([label_to_id_dict[x] for x in labels])
        
        return fruit_images, labelIDs, id_to_label_dict
    else:
        folder_names = ['Training', 'Test']
        for folders in folder_names:
            path = "./fruits-360/"+ folders +"/*"
            for fruit_dir_path in glob.glob(path):
                #for windows, to extract the fruit label from image file name
                fruit_label = fruit_dir_path.split("\\")[-1]


                for image_path in glob.glob(os.path.join(fruit_dir_path, "*.jpg")):


                    #read image
                    image = cv2.imread(image_path, cv2.IMREAD_COLOR)


                    #resizing image to be 45x45
                    image = cv2.resize(image, (45, 45))

                    #convert color space
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                    #append the image of fruit to the fruit array
                    fruit_images.append(image)
                    #append the name of the fruit
                    labels.append(fruit_label)


        fruit_images = np.array(fruit_images)
        labels = np.array(labels)
        #creating a dictionary of fruit labels to their corresponding number (ID)
        label_to_id_dict = {v:i for i,v in enumerate(np.unique(labels))}

        #creating a dictionary of numbers (ID) to their corresponding for labels
        id_to_label_dict = {v:k for k, v in label_to_id_dict.items()}

        #turn our dataset of current labels into their corresponding ID numbers
        labelIDs = np.array([label_to_id_dict[x] for x in labels])
        
        return fruit_images, labelIDs, id_to_label_dict


In [9]:
X_train, y_train, y_train_id_to_label = getFruitData('Training', k_fold=False)
X_test, y_test, y_test_id_to_label = getFruitData('Test', k_fold=False)

X, y, y_id_to_label = getFruitData('',k_fold=True)

In [10]:
X_train_scaled = StandardScaler().fit_transform([i.flatten() for i in X_train])
X_test_scaled = StandardScaler().fit_transform([i.flatten() for i in X_test])

X_scaled = StandardScaler().fit_transform([i.flatten() for i in X])


In [None]:
#initialize our model
model = svm.SVC()

#fit our model
model.fit(X_train_scaled, y_train)

In [None]:
#accuracy of predictions using our test data
accuracy = model.score(X_test_scaled, y_test)


print("Accuracy %f" % accuracy)

In [None]:
#make new predictions using our test data
y_pred = model.predict(X_test_scaled)
y_pred

In [None]:
#print classification report
print(metrics.classification_report(y_true = y_test, y_pred = y_pred))

In [None]:
# pred_kfold = cross_val_score(model, images_scaled, labelIDs, cv=5) 
# print("Accuracy with SVM and K-FOLD CROSS VALIDATION: %0.2f (+/- %0.2f)" % (pred_kfold.mean(), pred_kfold.std() * 2))