In [1]:
import numpy as np
import cv2
import os

class SimpleDatasetLoader:
	def load(self, imagePaths, verbose=-1):
		# initialize the list of features and labels
		data = []
		labels = []
		# loop over the input images
		for (i, imagePath) in enumerate(imagePaths):
			try:
				# load the image and extract the class label assuming
				# that our path has the following format:
				# /path/to/dataset/{class}/{image}.jpg
				image = cv2.imread(imagePath)
				label = imagePath.split(os.path.sep)[-2]
				
				image = cv2.resize(image, (512, 512), interpolation=cv2.INTER_AREA)
				# treat our processed image as a "feature vector"
				# by updating the data list followed by the labels
				data.append(image)
				labels.append(label)
				# show an update every `verbose` images
				if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
					print("[INFO] processed {}/{}".format(i + 1,
						len(imagePaths)))
			except:
				print(imagePath)
		# return a tuple of the data and labels
		return (data, labels)

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths

In [3]:
data_dir = r"D:\dungnd\data\anhgoc_doimau"

print("[INFO] loading images...")
imagePaths = list(paths.list_images(data_dir))

sdl = SimpleDatasetLoader()


[INFO] loading images...


In [4]:
(data, labels) = sdl.load(imagePaths, verbose=100)

[INFO] processed 100/2078
[INFO] processed 200/2078
[INFO] processed 300/2078
[INFO] processed 400/2078
[INFO] processed 500/2078
[INFO] processed 600/2078
[INFO] processed 700/2078
[INFO] processed 800/2078
[INFO] processed 900/2078
[INFO] processed 1000/2078
[INFO] processed 1100/2078
[INFO] processed 1200/2078
[INFO] processed 1300/2078
[INFO] processed 1400/2078
[INFO] processed 1500/2078
[INFO] processed 1600/2078
[INFO] processed 1700/2078
[INFO] processed 1800/2078
[INFO] processed 1900/2078
[INFO] processed 2000/2078


In [5]:
labels = np.array(labels)
data = np.array(data)

In [6]:
print(labels.shape)
print(data.shape)

(2078,)
(2078, 512, 512, 3)


In [7]:
dataset_size = data.shape[0]
data = data.reshape(dataset_size,-1)
print(data.shape)

(2078, 786432)


In [8]:
le = LabelEncoder()
labels = le.fit_transform(labels)

myset = set(labels)
print(myset)

{0, 1}


In [9]:
(trainX, testX, trainY, testY ) = train_test_split(data, labels, test_size= 0.2, random_state=42)

In [10]:
# Normalization
trainX = trainX/255.0
testX = testX/255.0

In [11]:
from sklearn.tree import DecisionTreeClassifier

print("[INFO] evaluating Decision Tree Classifier...")

model=DecisionTreeClassifier()

[INFO] evaluating Decision Tree Classifier...


In [12]:
model.fit(trainX, trainY)

In [13]:
from sklearn.metrics import accuracy_score

# After model fitting
y_pred = model.predict(testX)
accuracy = accuracy_score(testY, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.45913461538461536


In [14]:
print(classification_report(testY, y_pred, target_names=le.classes_))

              precision    recall  f1-score   support

      anhgoc       0.41      0.37      0.39       193
      doimau       0.50      0.54      0.52       223

    accuracy                           0.46       416
   macro avg       0.45      0.45      0.45       416
weighted avg       0.46      0.46      0.46       416

