In [5]:
import glob

test_images = glob.glob("datasets/intel_images/seg_test/seg_test/*/*.*")
train_images = glob.glob("datasets/intel_images/seg_train/seg_train/*/*.*")

print('Total images to test', len(test_images))
print('Total images to train', len(train_images))

Total images to test 3000
Total images to train 14034


In [6]:
test_categories = [image_path.split('/')[3] for image_path in test_images]
train_categories = [image_path.split('/')[3] for image_path in train_images]
all_categories = test_categories + train_categories
all_images = test_images + train_images
print('total categories', len(all_categories))

total categories 17034


In [7]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(all_categories)
y = encoder.transform(all_categories)
len(y)

17034

In [8]:
import numpy as np
np.unique(y)

array([0, 1])

In [9]:
import cv2
import matplotlib.pyplot as plt

image_list = []
for idx, image_path in enumerate(all_images):
  image = cv2.imread(image_path)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  image = cv2.resize(image, (50,50))
  image = image.flatten()
  image_list.append(image)

X = np.array(image_list)
print(X.shape)


(17034, 7500)


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2)

rf = RandomForestClassifier(n_estimators=100, random_state=2)
rf.fit(X_train, y_train)

y_predicted = rf.predict(X_test)
print(accuracy_score(y_test, y_predicted))


0.8209568535368359


## Applying PCA to reduce features

In [58]:
from sklearn.decomposition import PCA
pca = PCA(n_components=30)
pca.fit(X)
X_pca = pca.transform(X)

X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(X_pca, y, test_size=0.2, stratify=y, random_state=2)
rf = RandomForestClassifier(n_estimators=100, random_state=2)
rf.fit(X_train_pca, y_train_pca)
print(X_train_pca.shape)

(13627, 30)


In [59]:
y_predicted_pca = rf.predict(X_test_pca)
print(accuracy_score(y_test_pca, y_predicted_pca))

0.8235984737305547


## Exporting Models

In [66]:
import pickle

pca_path = "outputs/pca_intel.pkl"
pickle.dump(pca, open(pca_path, 'wb'))

rf_path = "outputs/random_forest_intel.pkl"
pickle.dump(rf, open(rf_path, 'wb'))

rf_encoder = "outputs/enconder_intel.pkl"
pickle.dump(encoder, open(rf_encoder, 'wb'))