In [1]:
# import the necessary packages
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from imutils import paths
import numpy as np
import imutils
import cv2
import os

import matplotlib.pyplot as plt
import skimage
from skimage import color
from skimage import io
from skimage.transform import resize

In [2]:
# initialize
DATA_PATH = './dataset/'

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    # extract a 3D color histogram from the HSV color space using
    # the supplied number of `bins` per channel
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,[0, 180, 0, 256, 0, 256])
 
    # handle normalizing the histogram if we are using OpenCV 2.4.X
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    # otherwise, perform "in place" normalization in OpenCV 3 (I
    # personally hate the way this is done
    else:
        cv2.normalize(hist, hist)
    # return the flattened histogram as the feature vector
    return hist.flatten()

In [4]:
def buildDataAndLabel(imagePaths,updateTime):
    data = []
    labels = []
    for (i, imagePath) in enumerate(imagePaths):
        # load the image and extract the class label (assuming that our
        # path as the format: /path/to/dataset/{class}.{image_num}.jpg/png
        image = cv2.imread(imagePath)
        label = imagePath.split(os.path.sep)[-1].split(".")[0]
        # extract a grayscale matrix of image
        # data matrix and labels list
        hist = extract_color_histogram(image)
        data.append(hist)
        labels.append(label)
        # show an update every 100 images
        if i > 0 and i % updateTime == 0:
            print("[INFO] processed {}/{}".format(i, len(imagePaths)))
    return (data,labels)

## Image Preprocessing

In [5]:
data = []
labels = []
imagePaths = list(paths.list_images(DATA_PATH))
imagePaths[:10]

['./dataset/banana.4.jpg',
 './dataset/banana.5.jpg',
 './dataset/banana.7.jpg',
 './dataset/banana.6.jpg',
 './dataset/banana.2.jpg',
 './dataset/banana.3.jpg',
 './dataset/banana.1.jpg',
 './dataset/apple.40.jpg',
 './dataset/banana.16.jpg',
 './dataset/apple.33.jpg']

In [6]:
(data ,labels) = buildDataAndLabel(imagePaths,10)

[INFO] processed 10/78
[INFO] processed 20/78
[INFO] processed 30/78
[INFO] processed 40/78
[INFO] processed 50/78
[INFO] processed 60/78
[INFO] processed 70/78


In [7]:
# encode the labels, converting them from strings to integers
le = LabelEncoder()
labels = le.fit_transform(labels)

## Split Train Test

In [8]:
# partition the data into training and testing splits, using 75%
# of the data for training and the remaining 25% for testing

(trainData, testData, trainLabels, testLabels) = train_test_split(
    np.array(data), labels, test_size=0.25, random_state=100)
 

## SVM Model Training

In [9]:
print("[INFO] training Linear SVM classifier...")
model = LinearSVC(C=1,random_state=100)
model.fit(trainData, trainLabels)

[INFO] training Linear SVM classifier...


LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=100, tol=0.0001,
     verbose=0)

## Evaluating classifier

In [10]:
# evaluate the classifier
predictions = model.predict(testData)
print(classification_report(testLabels, predictions,target_names=le.classes_))

             precision    recall  f1-score   support

      apple       0.88      0.88      0.88         8
     banana       0.92      0.92      0.92        12

avg / total       0.90      0.90      0.90        20

