## Random Forest Machine Learning

In this notebook we perform Random Forest Machine Learning on the SEM Grain image.  We use pre-existing labels provided by Sreenivas Bhattiprolu to train the random classifier.  We use Napari to visualize the labels and (optionally) add additional labels to improve the result.  We train on one image then apply the model to a second image (that is not used for training).

## Open train and test image and pre-existing labels

In [None]:
from skimage.io import imread, imsave
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from tnia.machinelearning.random_forest_helper import extract_features

parent_path =r'D:\images\tnia-python-images\\imagesc\\2024_12_19_sem_grain_size_revisit'
parent_path = r'/home/bnorthan/images/tnia-python-images/imagesc/2024_12_19_sem_grain_size_revisit'

training_data_path = os.path.join(parent_path,r'training_data')

im_name = '211122_AM_Al2O3_SE_021.ome.tiff'
background_name = '211122_AM_Al2O3_SE_021_sp_background_binary.tif'
grains_name = '211122_AM_Al2O3_SE_021_sp_grains_binary.tif'
inclusions_name = '211122_AM_Al2O3_SE_021_sp_inclusions_binary.tif'

validation_name = r'211122_AM_Al2O3_SE_027_sp.tif'

im = imread(os.path.join(training_data_path,im_name))
background = (imread(os.path.join(training_data_path,background_name))//255)
grains = (imread(os.path.join(training_data_path,grains_name))//255)*2
inclusions = (imread(os.path.join(training_data_path,inclusions_name))//255)*3
predictions = np.zeros_like(background)
labels = np.zeros_like(background)

validation_im = imread(os.path.join(training_data_path,validation_name))

## View in Napari

In [None]:
import napari 
viewer = napari.Viewer()
viewer.add_image(im)
viewer.add_labels(background, name='background')
viewer.add_labels(grains, name='grains')
viewer.add_labels(inclusions, name='inclusions')
viewer.add_labels(predictions, name='predictions')
#viewer.add_labels(labels, name='labels')

## Extract features

In [None]:
features = extract_features(im)
print(features.shape)

## Create label and feature vector

In this step we create a label and feature vector using the data at the non-zero labels.

In [None]:
# first make a copy of the background image
labels = background.copy()
## now copy the grain labels, but only at non-zero locations (as not to overwrite the background)
labels[labels==0] = grains[labels==0]
## now copy the inclusion labels, but only at non-zero locations (as not to overwrite the background and grains)
labels[labels==0] = inclusions[labels==0]
print(labels.min(), labels.max())
label_vector = labels[labels>0]
features_vector = features[labels>0,:]
print(label_vector.shape, features_vector.shape)

viewer.add_labels(labels, name='labels')


## Here we use a utility to rebalance the data

There are many more grain pixels, so we rebalance ```smote.fit_resample``` creates artificial samples by interpolating between real points. 

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(features_vector, label_vector)

print(X_resampled.shape, y_resampled.shape)

## Train the random forest classifier

In [None]:
clf = RandomForestClassifier(n_estimators=100, n_jobs=-1, max_depth=20)
#clf.fit(features_vector, label_vector-1)
clf.fit(X_resampled, y_resampled-1)

## Predict pixels

In [None]:
import numpy as np
from skimage import future


predictions = future.predict_segmenter(features.reshape(-1, features.shape[-1]), clf).reshape(features.shape[:-1]) + 1
predictions = np.squeeze(predictions).astype(np.uint32)
print(predictions.min(), predictions.max())
viewer.layers['predictions'].data = predictions

## Create instances

In [None]:
predicted_grains = predictions==2
from skimage.measure import label

instances = label(predicted_grains)

viewer.add_labels(instances, name='instances')

predicted_background = np.any([predictions==1, predictions==3], axis=0)

from skimage.morphology import dilation, closing, disk
# utility to close small gaps in boundary
def close_small_gaps(image, disk_size):
    from skimage.morphology import closing, disk
    dilated_image = dilation(image, disk(disk_size))
    return closing(dilated_image, disk(disk_size))

closed_image = close_small_gaps(predicted_background, 2)

inverted_image = ~closed_image
instances_after_closing = label(inverted_image)

viewer.add_labels(instances_after_closing, name='instances_after_closing')



## Predict validation image

In [None]:
validation_features = extract_features(validation_im)
validation_predictions = future.predict_segmenter(validation_features.reshape(-1, validation_features.shape[-1]), clf).reshape(validation_features.shape[:-1]) + 1

validation_instances = label(validation_predictions==2)
viewer.add_labels(validation_predictions, name='validation predictions')
viewer.add_labels(validation_instances, name='validation labeled grains')

validation_background = np.any([validation_predictions==1, validation_predictions==3], axis=0)
validation_closed_image = close_small_gaps(validation_background, 2)
validation_inverted_image = ~validation_closed_image
validation_instances_closed = label(validation_inverted_image)
viewer.add_labels(validation_instances_closed, name='validation instances after closing')
