In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split

from skimage.io import imread
from skimage.transform import resize

**Load images in structured directory**

In [2]:
def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            img = imread(file)
            img_resized = img
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

**Flattening images after resizing them to 64 x 64 due to computation constraints**

In [3]:
image_dataset = load_image_files("resized/")

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

In [5]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(23496, 12288) (10070, 12288) (23496,) (10070,)


In [6]:
X_train = X_train/255
X_test = X_test/255

In [None]:
from sklearn import svm
clf = svm.LinearSVC(verbose = 10)
clf.fit(X_train , y_train)

[LibLinear]

**The model still did not complete training even after 40 hours**

In [None]:
from sklearn.metrics import accuracy_score

y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
print(accuracy_score(y_train , y_pred_train))
accuracy_score(y_test , y_pred_test)