# Cats vs Dogs Classification using SVM
- [x] Read training images
- [x] Resize the images to a standered size (128, 64)
- [x] Extract the features using HOG
- [x] Train the SVM on the extracted features
- [x] Calculate the training accuracy
- [x] Test the trained model using the test data
- [x] Calculate the testing accuracy

## Imports

In [None]:
import cv2
import os
import pandas as pd 
import numpy as np
import random
from skimage.feature import hog
from sklearn import svm

## Feature Extraction 
- Read training images 
- define the category of each image
- read each image
- resize each image to (128, 64)
- extract the features of the image using hog
- construct lists for categories, images names, features

In [None]:
training_path = './training/train'

i = 0

categories = []
imgs_names = []
features = []

for filename in os.listdir(training_path):
    # append the image name to the list of names
    imgs_names.append(filename)

    # define the cateory (class (dog or cat)) of each image 
    if 'dog' in filename:
        categories.append(1)
    else:
        categories.append(0)
        
    
    # read the image 
    img = cv2.imread(os.path.join(training_path, filename))
    # resize the image
    resized_img = cv2.resize(img, (128, 64))
    # extract the features using HOG
    fd, hog_img = hog(resized_img, orientations = 9, pixels_per_cell = (8, 8), 
                      cells_per_block = (2, 2), visualize = True, multichannel = True)
    # append the feature array to the list of features 
    features.append(fd)
    
    # take only the first 3000 data images
    i += 1
    if i == 3000:
        break

In [None]:
len(categories)

In [None]:
len(features)

## Train the SVM on the extracted features

In [None]:
X = features
Y = categories

# we create an instance of SVM and fit out data.
C = 0.1  # SVM regularization parameter
svc = svm.SVC(kernel='poly', C=C).fit(X, Y)

## Predict using the trainig data

In [None]:
predictions = svc.predict(X)

## Calculate the training set accuracy

In [None]:
accuracy = np.mean(predictions == Y)
accuracy

## Read the testing set

In [None]:
i = 0

test_names = []
test_features = []
test_categories = []
for filename in os.listdir(training_path):
    # take only the last 2000 data images from the first 5000 images
    if(i > 3000):
        # append the image name to the list of names
        test_names.append(filename)

        # define the cateory (class (dog or cat)) of each image 
        if 'dog' in filename:
            test_categories.append(1)
        else:
            test_categories.append(0)


        # read the image 
        img = cv2.imread(os.path.join(training_path, filename))
        # resize the image
        resized_img = cv2.resize(img, (128, 64))
        # extract the features using HOG
        fd, hog_img = hog(resized_img, orientations = 9, pixels_per_cell = (8, 8), 
                          cells_per_block = (2, 2), visualize = True, multichannel = True)
        # append the feature array to the list of features 
        test_features.append(fd)

    i += 1
    
    # take only the last 2000 data images from the first 5000 images
    if i == 5000:
        break

## Predict for the testing set

In [None]:
test_X = test_features
test_Y = test_categories

test_predictions = svc.predict(test_X)

## Calculate the accuracy of the testing set

In [None]:
accuracy = np.mean(test_predictions == test_Y)
accuracy