In [1]:
import os
import numpy as np
import cv2

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier

In [None]:
# uncomment when run notebook in Google Colab
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
def extract_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [3]:
train_imgs_path = '../data/W4T1/train'

categories = ['dog', 'cat']
flattened_input_arr = []  #input array
outcome_arr = []  #output array

train_imgs_names = sorted(os.listdir(train_imgs_path))

In [4]:
for img_name in train_imgs_names:
    img_path = os.path.join(train_imgs_path, img_name)
    img = cv2.imread(img_path)
    img_hist = extract_histogram(img)
    flattened_input_arr.append(img_hist)
    outcome_arr.append(1 if 'cat' in img_name else 0)
xtrain = np.array(flattened_input_arr)  #preditors
ytrain = np.array(outcome_arr)  #response

In [5]:
lsvc = LinearSVC(C=1.44, random_state=42)
lsvc.fit(xtrain, ytrain)

In [6]:
dtc = DecisionTreeClassifier(criterion='entropy', min_samples_leaf=10, max_leaf_nodes=20, random_state=42)
dtc.fit(xtrain, ytrain)

In [22]:
# bc = BaggingClassifier(dtc, n_estimators=18, random_state=42)
bc = BaggingClassifier(
    DecisionTreeClassifier(criterion='entropy', min_samples_leaf=10, max_leaf_nodes=20, random_state=42),
    n_estimators=18,
    random_state=42)
bc.fit(xtrain, ytrain)

In [23]:
rfc = RandomForestClassifier(n_estimators = 18, criterion = 'entropy', min_samples_leaf = 10, max_leaf_nodes = 20, random_state = 42)
rfc.fit(xtrain, ytrain)

In [24]:
lr = LogisticRegression(solver='lbfgs', random_state = 42)

In [25]:
base_estimators = [('SVM', lsvc), ('Bagging DT', bc), ('DecisionForest', rfc)]
sclf = StackingClassifier(estimators=base_estimators, final_estimator=lr, cv=2)
sclf.fit(xtrain,ytrain)

In [26]:
from sklearn.metrics import accuracy_score

ytrainpred = sclf.predict(xtrain)

accuracy_score(ytrain, ytrainpred)

0.845

In [32]:
test_imgs_path = '../data/W4T1/test'

test_imgs_names = ['cat.1040.jpg', 'cat.1015.jpg', 'dog.1022.jpg', 'cat.1022.jpg']

xtest = []
ytest = []

for test_img_name in test_imgs_names:
    test_img_path = os.path.join(test_imgs_path, test_img_name)
    test_img = cv2.imread(test_img_path)
    test_img_hist = extract_histogram(test_img)
    xtest.append(test_img_hist)
    ytest.append(1 if 'cat' in test_img_name else 0)

sclf.predict_proba(xtest)

array([[0.29917991, 0.70082009],
       [0.37622497, 0.62377503],
       [0.44782898, 0.55217102],
       [0.46135907, 0.53864093]])