In [1]:
import numpy as np 
import cv2 as cv
import os 
from pathlib import Path
from imutils.paths import list_images
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import joblib

import pv_vision.model.classifier.rf_train_val as rf_tool

# Load the data
Folder structure is: \
. \
|-- rf_train_inference.ipynb \
|-- segmented_cells \
....|-- train \
....|...|-- class 1 \
....|...|-- class 2 \
....|...|-- class ... \
....|...\`-- class n \
....|-- val \
....|...|-- class 1 \
....|...|-- class 2 \
....|...|-- class ... \
....|...\`-- class n \
....\`-- test \
........|-- class 1 \
........|-- class 2 \
........|-- class ... \
........`-- class n 



In [None]:
# load training data
images_train = []
names_train = []
labels_train = []

for im_path in list(list_images('segmented_cells/train')):
    images_train.append(cv.imread(im_path, cv.IMREAD_UNCHANGED))
    labels_train.append(im_path.split('/')[-2])
    names_train.append(os.path.splitext(os.path.split(im_path)[-1])[0])

# load val + test data
images_val = []
names_val = []
labels_val = []

images_test = []
names_test = []
labels_test = []

for im_path in list(list_images('segmented_cells/val')):
    images_val.append(cv.imread(im_path, cv.IMREAD_UNCHANGED))
    labels_val.append(im_path.split('/')[-2])
    names_val.append(os.path.splitext(os.path.split(im_path)[-1])[0])

for im_path in list(list_images('segmented_cells/test')):
    images_test.append(cv.imread(im_path, cv.IMREAD_UNCHANGED))
    labels_test.append(im_path.split('/')[-2])
    names_test.append(os.path.splitext(os.path.split(im_path)[-1])[0])

images_train = np.array(images_train)
images_val = np.array(images_val)
images_test = np.array(images_test)

# transform labels into integers
le = LabelEncoder()
le.fit(labels_train)

y_train = le.transform(labels_train)
y_val = le.transform(labels_val)
y_test = le.transform(labels_test)

# Model training

In [None]:
# convert to grayscale if original image is 3 channel grayscale 
#images_train_g = np.array([image[:, :, 0] for image in images_train])
#images_val_g = np.array([image[:, :, 0] for image in images_val])
#images_test_g = np.array([image[:, :, 0] for image in images_test])

# augment the training set
images_train_aug, y_train_aug = rf_tool.im_aug(images_train, y_train)

In [None]:
# hyperparameters for tuning
rf_para_grid = {
    'n_estimators': [10, 20, 30, 40, 50, 80, 100, 200, 400, 1000],
    'criterion': ['gini', 'entropy'],
    'max_depth': [10, 20, 40, 60, 80, 100, None],
    'bootstrap': [True, False]
}

# hyperparameters tuning in 50 iterations. Return the model with best macro F1 score on val set
rf_best, para_best, score_best = rf_tool.random_search_rf(images_train_aug, y_train_aug, images_val, y_val, 50, rf_para_grid)

In [None]:
# save the model and optimal parameters
os.makedirs('RF', exist_ok=True)
joblib.dump(rf_best, 'RF/rf_aug_model.pkl')
joblib.dump(para_best, 'RF/rf_best_para.pkl')

# Evaluation & Inference

In [4]:
import pv_vision.model.classifier.result_analysis as analysis
import pickle
from sklearn import preprocessing

In [None]:
# load model
rf_fit = joblib.load('RF/rf_aug_model.pkl')

# define save path
save_path = Path('RF')/'results'
os.makedirs(save_path, exist_ok=True)

# Do prediction on testing set
X_test = rf_tool.im_flatten(images_test)
pred_test = rf_fit.predict(X_test)

# This can output the probability of each class
#prob_test = model_fit.predict_proba(X_test)

In [None]:
# save wrongly predicted
failed = analysis.predict_failed(y_test, pred_test, images_test)
with open(save_path/'rf_failed.pkl', 'wb') as f:
    pickle.dump(failed, f)

In [None]:
# Mapping the value of y into label names.
defect_name = {
    0: 'crack',
    1: 'intact',
    2: 'intra',
    3: 'oxygen',
    4: 'solder'
}

# confusion matrix
analysis.draw_cm(defect_name, y_true=y_test, y_pred=pred_test)

In [6]:
# metrics report
test_report = analysis.metrics_report(y_test, pred_test,
                                      label_names=['crack', 'intact', 'intra', 'oxygen', 'solder'])

test_report.to_pickle(save_path/'rf_test_report.pkl')

In [None]:
# save the prediction
# no need to save ground truth when you do prediction on unlabelled images
le = preprocessing.LabelEncoder()
le.fit(['crack', 'intact', 'intra', 'oxygen', 'solder'])

with open(save_path/'rf_predicted.pkl', 'wb') as f:
    pickle.dump({'name': np.array(names_test), 
                'defects_pred': le.inverse_transform(pred_test),
                'defects_true': le.inverse_transform(y_test),
                'y_pred': np.array(pred_test),
                'y_true': np.array(y_test)}, f)
