In [1]:
import numpy as np
import cv2
import os

class SimpleDatasetLoader:
	def load(self, imagePaths, verbose=-1):
		# initialize the list of features and labels
		data = []
		labels = []
		# loop over the input images
		for (i, imagePath) in enumerate(imagePaths):
			try:
				# load the image and extract the class label assuming
				# that our path has the following format:
				# /path/to/dataset/{class}/{image}.jpg
				image = cv2.imread(imagePath)
				label = imagePath.split(os.path.sep)[-2]
				
				image = cv2.resize(image, (256, 256), interpolation=cv2.INTER_AREA)
				# treat our processed image as a "feature vector"
				# by updating the data list followed by the labels
				data.append(image)
				labels.append(label)
				# show an update every `verbose` images
				if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
					print("[INFO] processed {}/{}".format(i + 1,
						len(imagePaths)))
			except:
				print(imagePath)
		# return a tuple of the data and labels
		return (data, labels)

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths

In [3]:
data_dir = r"D:\dungnd\data\DATA_USB\MyOwnData"

print("[INFO] loading images...")
imagePaths = list(paths.list_images(data_dir))

sdl = SimpleDatasetLoader()


[INFO] loading images...


In [4]:
(data, labels) = sdl.load(imagePaths, verbose=100)

[INFO] processed 100/2109
[INFO] processed 200/2109
[INFO] processed 300/2109
[INFO] processed 400/2109
[INFO] processed 500/2109
[INFO] processed 600/2109
[INFO] processed 700/2109
[INFO] processed 800/2109
[INFO] processed 900/2109
[INFO] processed 1000/2109
[INFO] processed 1100/2109
[INFO] processed 1200/2109
[INFO] processed 1300/2109
[INFO] processed 1400/2109
[INFO] processed 1500/2109
[INFO] processed 1600/2109
[INFO] processed 1700/2109
[INFO] processed 1800/2109
[INFO] processed 1900/2109
[INFO] processed 2000/2109
[INFO] processed 2100/2109


In [5]:
labels = np.array(labels)
data = np.array(data)

In [6]:
print(labels.shape)
print(data.shape)

(2109,)
(2109, 256, 256, 3)


In [7]:
dataset_size = data.shape[0]
data = data.reshape(dataset_size,-1)
print(data.shape)

(2109, 196608)


In [8]:
le = LabelEncoder()
labels = le.fit_transform(labels)

myset = set(labels)
print(myset)

{0, 1}


In [9]:
(trainX, testX, trainY, testY ) = train_test_split(data, labels, test_size= 0.2, random_state=42)

In [10]:
# Normalization
trainX = trainX/255.0
testX = testX/255.0

Logistic Regression

In [11]:
from sklearn.linear_model import LogisticRegression
print("[INFO] evaluating Logistic Regression Classifier...")

logreg =  LogisticRegression()

[INFO] evaluating Logistic Regression Classifier...


In [12]:
logreg.fit(trainX, trainY)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [13]:
from sklearn.metrics import accuracy_score

# After model fitting
y_pred = logreg.predict(testX)
accuracy = accuracy_score(testY, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7843601895734598


Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier
print("[INFO] evaluating Random Forest Classifier...")

model=RandomForestClassifier()

[INFO] evaluating Random Forest Classifier...


In [15]:
model.fit(trainX, trainY)

In [16]:
from sklearn.metrics import accuracy_score

# After model fitting
y_pred = model.predict(testX)
accuracy = accuracy_score(testY, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8767772511848341


Decision Tree

In [17]:
from sklearn.tree import DecisionTreeClassifier

print("[INFO] evaluating Decision Tree Classifier...")

model=DecisionTreeClassifier()

[INFO] evaluating Decision Tree Classifier...


In [18]:
model.fit(trainX, trainY)

In [19]:
from sklearn.metrics import accuracy_score

# After model fitting
y_pred = model.predict(testX)
accuracy = accuracy_score(testY, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7511848341232228


Naive Bayes

In [20]:
from sklearn.naive_bayes import GaussianNB

print("[INFO] evaluating Naive Bayes Classifier...")

nb=GaussianNB()

[INFO] evaluating Naive Bayes Classifier...


In [21]:
nb.fit(trainX, trainY)

In [22]:
from sklearn.metrics import accuracy_score

# After model fitting
y_pred = nb.predict(testX)
accuracy = accuracy_score(testY, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6611374407582938


KNN

In [23]:
from sklearn.neighbors import KNeighborsClassifier
print("[INFO] evaluating k-NN classifier...")
model_knn = KNeighborsClassifier(n_neighbors=2, n_jobs=-1)

[INFO] evaluating k-NN classifier...


In [24]:
model_knn.fit(trainX, trainY)

In [25]:
from sklearn.metrics import accuracy_score

# After model fitting
predictions = model.predict(testX)
accuracy = accuracy_score(testY, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.7511848341232228
