In [None]:
# For google colab use
"""
from google.colab import drive
drive.mount('/content/drive')
!unzip -q -n './drive/MyDrive/Colab Notebooks/Project/dataset.zip' -d '../../'
!cp './drive/MyDrive/Colab Notebooks/Project/base_lib.py' '../../'
"""

### Testing with random forests
### Ming Ong

#### Import necessary libraries

In [None]:
import sys
sys.path.append('../../')
from base_lib import *
from sklearn.ensemble import RandomForestClassifier
IMAGE_SIZE = (128, 128) # Possibly redefine image re-sizing parameters

#### Test different hyperparameters to tune performance

In [None]:
RANDOM_STATE = 42
train_files, test_files = train_test_split(filenames, random_state=RANDOM_STATE)
train_files, validation_files = train_test_split(train_files, random_state=RANDOM_STATE)
X_train, y_train = extract_multiple_images(train_files)

rf_model = RandomForestClassifier(random_state=RANDOM_STATE, n_estimators=200, max_depth=20, n_jobs=-1)
best_rf_model = rf_model
best_score = 0

for n_estimators in (100, 200, 400):
    for max_depth in (None, 10, 20):
        print(f'{time.ctime()} Testing with n_estimators: {n_estimators}, max_depth: {max_depth}')
        rf_model = RandomForestClassifier(random_state=RANDOM_STATE, n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1)
        rf_model.fit(X_train, y_train)
        score = 0
        for base_filename in validation_files:
            X_1_val, y_1_val = extract_basic_image_data(base_filename, IMAGE_SIZE)
            y_1_pred = rf_model.predict(X_1_val)
            score += iou_score(y_1_val, y_1_pred)
        score /= len(validation_files)
        if (score > best_score):
            print(f'Better performance found with: n_estimators: {n_estimators}, max_depth: {max_depth} score: {score}')
            best_rf_model = copy.deepcopy(rf_model)
            best_score = score

joblib.dump(best_rf_model, 'random_forest_best_42.joblib')
print("Best Params:", best_rf_model.get_params())
print("Best IoU:", best_score)


#### Test the classifier on a test file and visualise outputs

In [None]:
random.seed(42)
NUM_TEST_SAMPLES = 5
fig, axes = plt.subplots(nrows=NUM_TEST_SAMPLES, ncols=4, figsize=(16, 20))

for base_filename, i in zip(random.sample(test_files, NUM_TEST_SAMPLES), range(NUM_TEST_SAMPLES)):
    print(base_filename in train_files)
    X_1_test, y_1_test = extract_basic_image_data(base_filename, IMAGE_SIZE)
    y_1_pred = best_rf_model.predict(X_1_test)

    rgb_filename = rgb_prefix + base_filename
    nrg_filename = nrg_prefix + base_filename
    mask_filename = mask_prefix + base_filename

    mask = cv2.resize(cv2.imread(mask_filename, flags=cv2.IMREAD_GRAYSCALE), IMAGE_SIZE)
    mask = mask.reshape(-1)
    mask = (mask > 127).astype(np.uint8)
    rgb_image = cv2.resize(cv2.imread(rgb_filename), IMAGE_SIZE)
    nrg_image = cv2.resize(cv2.imread(nrg_filename), IMAGE_SIZE)

    iou = iou_score(y_1_test, y_1_pred)
    print(f'IOU: {iou}')

    axes[i][0].imshow(rgb_image)
    axes[i][0].axis('off')
    axes[i][0].set_title('RGB')

    axes[i][1].imshow(nrg_image)
    axes[i][1].axis('off')
    axes[i][1].set_title('NRG')

    axes[i][2].imshow(y_1_test.reshape(IMAGE_SIZE), cmap='gray')
    axes[i][2].axis('off')
    axes[i][2].set_title('y_true')

    axes[i][3].imshow(y_1_pred.reshape(IMAGE_SIZE), cmap='gray')
    axes[i][3].axis('off')
    axes[i][3].set_title('y_pred')

fig.show()
score = 0
for base_filename in test_files:
    X_1_test, y_1_test = extract_basic_image_data(base_filename, IMAGE_SIZE)
    y_1_pred = best_rf_model.predict(X_1_test)
    score += iou_score(y_1_test, y_1_pred)
score /= len(test_files)
print(f'IOU score: {score}')

#### Try add HoG features to see if performance improves

In [None]:
RANDOM_STATE = 42
train_files, test_files = train_test_split(filenames, random_state=RANDOM_STATE)
train_files, validation_files = train_test_split(train_files, random_state=RANDOM_STATE)
X_train, y_train = extract_multiple_images(train_files)
X_hog = extract_multiple_hog(train_files)
X_train = np.concatenate((X_train, X_hog), axis=1)

rf_model = RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1)
best_rf_model = rf_model
best_score = 0

for n_estimators in (100, 200, 400):
    for max_depth in (None, 10, 20):
        print(f'{time.ctime()} Testing with n_estimators: {n_estimators}, max_depth: {max_depth}')
        rf_model = RandomForestClassifier(random_state=RANDOM_STATE, n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1)
        rf_model.fit(X_train, y_train)
        score = 0
        for base_filename in validation_files:
            X_1_val, y_1_val = extract_basic_image_data(base_filename, IMAGE_SIZE)
            X_1_hog = extract_hog(base_filename)
            X_1_val = np.concatenate((X_1_val, X_1_hog), axis=1)
            y_1_pred = rf_model.predict(X_1_val)
            score += iou_score(y_1_val, y_1_pred)
        score /= len(validation_files)
        if (score > best_score):
            print(f'Better performance found with: n_estimators: {n_estimators}, max_depth: {max_depth} score: {score}')
            best_rf_model = copy.deepcopy(rf_model)
            best_score = score

joblib.dump(best_rf_model, 'random_forest_best_with_hog_42.joblib')

print("Best Params:", best_rf_model.get_params())
print("Best IoU:", best_score)

In [None]:
random.seed(42)
NUM_TEST_SAMPLES = 5
fig, axes = plt.subplots(nrows=NUM_TEST_SAMPLES, ncols=4, figsize=(16, 20))

for base_filename, i in zip(random.sample(test_files, NUM_TEST_SAMPLES), range(NUM_TEST_SAMPLES)):
    print(base_filename in train_files)
    X_test, y_test = extract_basic_image_data(base_filename, IMAGE_SIZE)
    X_hog = extract_hog(base_filename, IMAGE_SIZE)
    X_test = np.concatenate((X_test, X_hog), axis=1)
    y_pred = best_rf_model.predict(X_test)

    rgb_filename = rgb_prefix + base_filename
    nrg_filename = nrg_prefix + base_filename
    mask_filename = mask_prefix + base_filename

    mask = cv2.resize(cv2.imread(mask_filename, flags=cv2.IMREAD_GRAYSCALE), IMAGE_SIZE)
    mask = mask.reshape(-1)
    mask = (mask > 127).astype(np.uint8)
    rgb_image = cv2.resize(cv2.imread(rgb_filename), IMAGE_SIZE)
    nrg_image = cv2.resize(cv2.imread(nrg_filename), IMAGE_SIZE)

    iou = iou_score(y_test, y_pred)
    print(f'IOU: {iou}')

    axes[i][0].imshow(rgb_image)
    axes[i][0].axis('off')
    axes[i][0].set_title('RGB')

    axes[i][1].imshow(nrg_image)
    axes[i][1].axis('off')
    axes[i][1].set_title('NRG')

    axes[i][2].imshow(y_test.reshape(IMAGE_SIZE), cmap='gray')
    axes[i][2].axis('off')
    axes[i][2].set_title('y_true')

    axes[i][3].imshow(y_pred.reshape(IMAGE_SIZE), cmap='gray')
    axes[i][3].axis('off')
    axes[i][3].set_title('y_pred')

fig.show()
score = 0
for base_filename in test_files:
    X_test, y_test = extract_basic_image_data(base_filename, IMAGE_SIZE)
    y_pred = rf_model.predict(X_test)
    score += iou_score(y_test, y_pred)
score /= len(test_files)
print(f'IOU score: {score}')