In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import imutils
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import glob
import os

In [2]:
class SimplePreprocessor:
    def __init__(self,height,width,inter=cv2.INTER_AREA):
        self.height=height
        self.width=width
        self.inter=inter
        
    def preprocess(self,image):
        return cv2.resize(image,(self.width,self.height),self.inter)
    
        

In [3]:
class DataLoader:
    def __init__(self,preprocessor=None):
        self.preprocessor=preprocessor
        
        if self.preprocessor is None:
            self.preprocessor=[]
    
    def load(self,ImageList,verbose=1):
        
        data=[]
        labels=[]
        
        for (i,imagePath) in enumerate(ImageList):
            image=cv2.imread(imagePath)
            label=imagePath.split(os.sep)[1]
            
            if self.preprocessor is not None:
                for process in self.preprocessor:
                    image=process.preprocess(image)
            
            data.append(image)
            labels.append(label)
            
            if verbose>0 and i>0 and (i+1)%verbose==0:
                print("[INFO] processed {}/{}".format(i+1,len(ImageList)))
                
        return(np.array(data),np.array(labels))

            
            

In [4]:
prep=SimplePreprocessor(height=32,width=32)
loader=DataLoader(preprocessor=[prep])

In [5]:
imageList=glob.glob("Images/dataset/animals/*/*")

In [6]:
data,labels=loader.load(imageList,verbose=500)

[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [7]:
le=LabelEncoder()
labels=le.fit_transform(labels)
data=data.reshape((data.shape[0],data.shape[1]*data.shape[2]*data.shape[3]))

In [8]:
(xTrain,xTest,yTrain,yTest)=train_test_split(data,labels,test_size=0.2,random_state=42,shuffle=True)

In [9]:
model=RandomForestClassifier(n_estimators=100, max_depth=8)
model.fit(xTrain,yTrain)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=8, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [10]:
prediction=model.predict(xTest)
print(classification_report(yTest,prediction,target_names=le.classes_))

              precision    recall  f1-score   support

        cats       0.58      0.45      0.51       217
        dogs       0.47      0.56      0.51       197
       panda       0.74      0.80      0.77       186

    accuracy                           0.59       600
   macro avg       0.60      0.60      0.60       600
weighted avg       0.60      0.59      0.59       600



In [5]:
os.sep

'\\'