In [1]:
import pathlib
import imageio
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import image as mpimg
from skimage.feature import hog
import cv2, os, glob, math
from skimage import color, io
import scipy.spatial
from collections import Counter 
from os import listdir,makedirs
from os.path import isfile,join
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
def fff(filename):
    data = []
    for jpg in glob.glob("photos/" + filename + "/*.jpg"):
        im_gray = cv2.imread(jpg, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(im_gray, (256,256))
        fd = hog(img)
        data.append(fd)
    return data

In [3]:
kangaroo = fff("kangaroo")[:500]
kangaroo

[array([0.08691685, 0.01170624, 0.02178796, ..., 0.        , 0.        ,
        0.        ]),
 array([0.15178893, 0.01902035, 0.0024182 , ..., 0.14318816, 0.03712584,
        0.07563561]),
 array([0.2028048 , 0.        , 0.03585116, ..., 0.03275377, 0.0490455 ,
        0.03124411]),
 array([0.11384315, 0.05285703, 0.12902455, ..., 0.        , 0.        ,
        0.        ]),
 array([0.25585166, 0.06239849, 0.00297613, ..., 0.0810396 , 0.06353355,
        0.18720762]),
 array([0.25214704, 0.0455037 , 0.0146445 , ..., 0.05421276, 0.0632904 ,
        0.        ]),
 array([0.17518465, 0.07797097, 0.03116372, ..., 0.21186911, 0.11525545,
        0.07432363]),
 array([0.        , 0.        , 0.00121798, ..., 0.06835507, 0.0418076 ,
        0.0682725 ]),
 array([0.11583483, 0.03883586, 0.02095639, ..., 0.12642173, 0.05311594,
        0.04686487]),
 array([0.04957561, 0.06921421, 0.01155237, ..., 0.        , 0.        ,
        0.        ]),
 array([0.        , 0.        , 0.        , ..., 0

In [4]:
rabbit = fff("rabbit")[:500]
len(rabbit)

500

In [5]:
def split(rate, array):
    l = len(array)
    n_train = math.floor((1-rate) * l)
    train = array[:n_train]
    test = array[n_train:]
    return train, test

In [6]:
def labelmaker(array, label1, label2):
    l = len(array)
    new_array = []
    for i in range(l):
        if i < l/2:
            new_array.append(label1)
        else:
            new_array.append(label2)
    
    return new_array

In [7]:
train_kangaroo, test_kangaroo = split(0.1, kangaroo)
train_rabbit, test_rabbit = split(0.1, rabbit)
train_data = train_kangaroo + train_rabbit
test_data = test_kangaroo + test_rabbit
train_labels = labelmaker(train_data, "kangaroo", "rabbit")
test_labels = labelmaker(test_data, "kangaroo", "rabbit")

In [8]:
datas = train_data + test_data 

In [9]:
datas

[array([0.08691685, 0.01170624, 0.02178796, ..., 0.        , 0.        ,
        0.        ]),
 array([0.15178893, 0.01902035, 0.0024182 , ..., 0.14318816, 0.03712584,
        0.07563561]),
 array([0.2028048 , 0.        , 0.03585116, ..., 0.03275377, 0.0490455 ,
        0.03124411]),
 array([0.11384315, 0.05285703, 0.12902455, ..., 0.        , 0.        ,
        0.        ]),
 array([0.25585166, 0.06239849, 0.00297613, ..., 0.0810396 , 0.06353355,
        0.18720762]),
 array([0.25214704, 0.0455037 , 0.0146445 , ..., 0.05421276, 0.0632904 ,
        0.        ]),
 array([0.17518465, 0.07797097, 0.03116372, ..., 0.21186911, 0.11525545,
        0.07432363]),
 array([0.        , 0.        , 0.00121798, ..., 0.06835507, 0.0418076 ,
        0.0682725 ]),
 array([0.11583483, 0.03883586, 0.02095639, ..., 0.12642173, 0.05311594,
        0.04686487]),
 array([0.04957561, 0.06921421, 0.01155237, ..., 0.        , 0.        ,
        0.        ]),
 array([0.        , 0.        , 0.        , ..., 0

In [10]:
targets = train_labels + test_labels

In [11]:
df = pd.DataFrame({'Shape':datas,'Value':targets})

In [12]:
df

Unnamed: 0,Shape,Value
0,"[0.08691685045783587, 0.011706235776195662, 0....",kangaroo
1,"[0.1517889316371094, 0.019020351106708903, 0.0...",kangaroo
2,"[0.2028047978941584, 0.0, 0.03585116134846516,...",kangaroo
3,"[0.11384314991130128, 0.05285703099765758, 0.1...",kangaroo
4,"[0.2558516641069307, 0.06239849151643665, 0.00...",kangaroo
...,...,...
995,"[0.18347670395687354, 0.02402605815660534, 0.0...",rabbit
996,"[0.33083322487367534, 0.10019601740875615, 0.0...",rabbit
997,"[0.08563423651497193, 0.04408164568431299, 0.0...",rabbit
998,"[0.22609224046763146, 0.10624235012260826, 0.0...",rabbit


In [13]:
KR = []
for i in range(len(df)):
    if df['Value'][i] == 'kangaroo':
        KR.append(1)
    elif df['Value'][i] != 'kangaroo':
        KR.append(0)
    else:
        KR.append(np.nan)

In [14]:
KR = pd.DataFrame(KR)
KR.columns = ['Kangaroo_Rabbit']

In [15]:
data = pd.concat([df,KR.set_index(df.index)], axis=1)

In [16]:
data

Unnamed: 0,Shape,Value,Kangaroo_Rabbit
0,"[0.08691685045783587, 0.011706235776195662, 0....",kangaroo,1
1,"[0.1517889316371094, 0.019020351106708903, 0.0...",kangaroo,1
2,"[0.2028047978941584, 0.0, 0.03585116134846516,...",kangaroo,1
3,"[0.11384314991130128, 0.05285703099765758, 0.1...",kangaroo,1
4,"[0.2558516641069307, 0.06239849151643665, 0.00...",kangaroo,1
...,...,...,...
995,"[0.18347670395687354, 0.02402605815660534, 0.0...",rabbit,0
996,"[0.33083322487367534, 0.10019601740875615, 0.0...",rabbit,0
997,"[0.08563423651497193, 0.04408164568431299, 0.0...",rabbit,0
998,"[0.22609224046763146, 0.10624235012260826, 0.0...",rabbit,0


In [17]:
data.dtypes

Shape              object
Value              object
Kangaroo_Rabbit     int64
dtype: object

In [18]:
X = np.array(datas)
y = np.array(data['Kangaroo_Rabbit'])

In [19]:
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [20]:
df['Shape'][1]

array([0.15178893, 0.01902035, 0.0024182 , ..., 0.14318816, 0.03712584,
       0.07563561])

In [21]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

class LogisticRegression():

    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear_pred)

            dw = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions-y)

            self.weights = self.weights - self.lr*dw
            self.bias = self.bias - self.lr*db


    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(linear_pred)
        class_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return class_pred

In [22]:
#X, y = df['Shape'], df['Value']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1234)

In [23]:
clf = LogisticRegression(lr=0.01)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
def accuracy(y_pred, y_test):
    return np.sum(y_pred==y_test)/len(y_test)

acc = accuracy(y_pred, y_test)
print(acc)

0.74


In [24]:
class SVM:

    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        y_ = np.where(y <= 0, 0, 1)

        # init weights
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
                    self.b -= self.lr * y_[idx]


    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)

In [25]:
clf = SVM()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy
print("SVM classification accuracy", accuracy(y_test, predictions))

SVM classification accuracy 0.44
