In [None]:
'''
DATA PATHS
'''
TOP_DIR = '/tf/Notebooks/Iwashita'

IR_PATH = TOP_DIR + '/Data/IR/'
RGB_PATH = TOP_DIR + '/Data/RGB/'
MASKS_PATH = TOP_DIR + '/Data/Masks/'
ANNOTATIONS_PATH = TOP_DIR + '/Data/Annotations/'

'''
OUTPUTS PATH
'''
WEIGHTS_PATH = TOP_DIR + '/output/Weights/'
METRICS_PATH = TOP_DIR + '/output/Metrics/'

!cd $TOP_DIR && ls

In [None]:
from enum import Enum
import numpy as np
from numpy import asarray, save
import os
from PIL import Image
import re
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt 
from matplotlib.pyplot import axis, figure, imshow, show, subplot

In [None]:
'''
SUPPORTING FUNCTIONS
'''

'''
POPULATE DATA SETS
'''
def populate_data_sets(X_list, y_list, rgb_dict, ir_dict, annotations_dict):
    X_rgb = []
    X_ir = []
    y_data =[]

    for i, fn in enumerate(X_list, start=0):        
        X_rgb.append(rgb_dict[fn])
        X_ir.append(ir_dict[fn])
        y_data.append(annotations_dict[fn])

    return np.array(X_rgb), np.array(X_ir), np.array(y_data)

'''
CALCULATE CLASS FREQUENCY
'''
def calculate_class_frequency_single(annotation_array):
    num_classes = annotation_array.shape[2]
    
    pixel_count = np.zeros(num_classes)

    # Iterate over each class and count pixels
    for i in range(num_classes):
        pixel_count[i] = np.sum(annotation_array[:, :, i] == 1)

    # Compute class frequencies
    class_frequency = pixel_count / np.sum(pixel_count)

    # Print out the frequency for each class
    for cls in classes:
        print(f"{cls.name}: {class_frequency[cls.value]*100:.4f}")

    return class_frequency

def calculate_class_frequency_set(annotation_array):
    num_classes = annotation_array.shape[3]
    
    pixel_count = np.zeros(num_classes)
    
    # Iterate over each class and count pixels
    for i in range(num_classes):
        pixel_count[i] = np.sum(annotation_array[:, :, :, i] == 1)
        
    # Compute class frequencies
    class_frequency = pixel_count / np.sum(pixel_count)
    
    # Print out the frequency for each class
    for cls in classes:
        print(f"{cls.name}: {class_frequency[cls.value]*100:.4f}")
        
    return class_frequency

'''
ONE-HOT ANNOTATION CHECK
'''
def display_one_hot_annotation(annotations_onehot):
    label = np.argmax(annotations_onehot, axis=-1)
    cmap = plt.get_cmap('tab10', 7)

    plt.imshow(label, cmap=cmap)
    plt.colorbar(ticks=range(num_classes), format=plt.FuncFormatter(lambda val, loc: {
        0: "unlabeled",
        1: "sand",
        2: "soil",
        3: "ballast",
        4: "rock",
        5: "bedrock",
        6: "rocky terrain"
    }[val]))
    plt.show()

In [None]:
'''
IMAGE PROPERTIES - Original image dimensions are 800x600
'''
IMG_HEIGHT = 572
IMG_WIDTH = 572

RGB_CHANNELS = 3
IR_CHANNELS = 1

'''
IMAGE LISTS
'''
img_list = [file for file in os.listdir(RGB_PATH) if file.lower().endswith('0000.png')]

rgb_imgs = {}
ir_imgs = {}

'''
LOAD MASKS
'''
rgb_mask = np.array(Image.open(os.path.join(MASKS_PATH, 'rgb_mask.ppm'))) / 255
ir_mask = np.array(Image.open(os.path.join(MASKS_PATH, 'ir_mask.png')))[:,:,0] / 255

'''
LOAD AND NORMALIZE
'''
print("Processing RGB images...")

for n, filename in tqdm(enumerate(img_list, start=0), total=len(img_list)):
    
    # Open image from file
    rgb_img = Image.open(os.path.join(RGB_PATH, filename))
    
    # Normalize RGB image in an 600x800x3 numpy array
    rgb_array = np.array(rgb_img, dtype=np.float32) / 255.0
    
    # Apply mask
    rgb_array = rgb_array * rgb_mask
    
    # Resize
    rgb_array = resize(rgb_array, (IMG_WIDTH, IMG_HEIGHT), mode='reflect', anti_aliasing=True)
    
    # Save to dictionary
    rgb_imgs[filename] = rgb_array
    
print("Processing IR images...")

for n, filename in tqdm(enumerate(img_list, start=0), total=len(img_list)):
    
    # Open image from file
    ir_img = Image.open(os.path.join(IR_PATH, filename))
    
    # Normalize IR image in an 600x800 numpy array
    ir_array = np.array(ir_img, dtype=np.float32) / 255.0
    
    # Apply mask
    ir_array = ir_array * ir_mask
    
    # Resize
    ir_array = resize(ir_array, (IMG_WIDTH, IMG_HEIGHT), mode='reflect', anti_aliasing=True)
    
    # Save to dictionary
    ir_imgs[filename] = ir_array

In [None]:
subplot(121), imshow(rgb_array), axis('off')
subplot(122), imshow(ir_array), axis('off')

print(rgb_array.shape)
print(np.max(rgb_array))

print(ir_array.shape)
print(np.max(ir_array))

In [None]:
'''
CLASSES
'''
classes = Enum('Classes', [
    '__UNLABELED__',
    'SAND',
    'SOIL',
    'BALLAST',
    'ROCK',
    'BEDROCK',
    'ROCKY_TERRAIN'
    ], start=0)

num_classes = max(classes, key=lambda x: x.value).value + 1

'''
LOAD/DECODE ANNOTATION FILES
'''
annotations = {}

print("Loading annotation files...")

for n, filename in tqdm(enumerate(img_list, start=0), total=len(img_list)):

    img = Image.open(os.path.join(ANNOTATIONS_PATH, filename)).resize((IMG_WIDTH, IMG_HEIGHT))

    encoded = np.array(img)

    label = np.bitwise_or(np.bitwise_or(
        encoded[:, :, 0].astype(np.uint32),
        encoded[:, :, 1].astype(np.uint32) << 8),
        encoded[:, :, 2].astype(np.uint32) << 16)

    annotations[filename] = label

'''
ONE-HOT ENCODE
'''
annotations_onehot = {}
class_freq = {i: 0 for i in range(num_classes)}

print("One-hot encoding annotation files...")

for n, filename in tqdm(enumerate(img_list, start=0), total=len(img_list)):

    onehot_annotation = np.zeros((IMG_HEIGHT, IMG_WIDTH, num_classes), dtype=np.uint8)
    
    for c in range(num_classes):
        mask = (annotations[filename] == c)
        onehot_annotation[..., c] = mask
        class_freq[c] += np.sum(mask)
    
    annotations_onehot[filename] = onehot_annotation

In [None]:
total_pixels = sum(class_freq.values())

class_percentages = {cls: (freq / total_pixels) * 100 for cls, freq in class_freq.items()}

for cls, percentage in class_percentages.items():
    if cls == 0:
      print("\n")
    if cls != 0:
      print(f"{classes(cls).name}: {percentage:.2f}%")

In [None]:
arr = annotations_onehot['03__2017-11-17-105825-0000.png']
print(len(annotations_onehot))
test = np.array(list(annotations_onehot.values()))
print(test.shape[3])

calculate_class_frequency_single(test[0])
print("")
calculate_class_frequency_single(test[1])
print("")
calculate_class_frequency_set(test)

In [None]:
display_one_hot_annotation(onehot_annotation)

In [None]:
'''
FILTER EXPERIMENT 1 DATA
'''
exp1_pattern = r'^\d{2}__2017-11-17-16(4[0-9]|[4-5]\d)[0-9]{2}-0000.png$'
exp1_img_list = [file for file in img_list if re.match(exp1_pattern, file)]

'''
SPLIT DATA
'''
X_train, X_temp, y_train, y_temp = train_test_split(
    exp1_img_list, 
    exp1_img_list, 
    test_size=0.50, 
    train_size=0.50, 
    random_state=42, 
    shuffle=True)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, 
    y_temp, 
    test_size=0.50, 
    train_size=0.50, 
    random_state=42, 
    shuffle=False)

print("Populating experiment 1 training data sets...")
exp1_rgb_X_train, exp1_ir_X_train, exp1_y_train = populate_data_sets(
    X_train, y_train, rgb_imgs, ir_imgs, annotations_onehot)

print("Populating experiment 1 validation data sets...")  
exp1_rgb_X_val, exp1_ir_X_val, exp1_y_val = populate_data_sets(
    X_val, y_val, rgb_imgs, ir_imgs, annotations_onehot)

print("Populating experiment 1 test data sets...")  
exp1_rgb_X_test, exp1_ir_X_test, exp1_y_test = populate_data_sets(
    X_test, y_test, rgb_imgs, ir_imgs, annotations_onehot)

In [None]:
print(exp1_y_train[0].shape)
calculate_class_frequency_set(exp1_y_train)

In [None]:
EXP1_DIR = '/tf/Notebooks/Iwashita/Data/Preprocessed/Experiment1'

'''
SAVE TRAINING DATA
'''
save(EXP1_DIR + '/Train/exp1_rgb_X_train.npy', exp1_rgb_X_train)
save(EXP1_DIR + '/Train/exp1_ir_X_train.npy', exp1_ir_X_train)
save(EXP1_DIR + '/Train/exp1_y_train.npy', exp1_y_train)

'''
SAVE VALIDATION DATA
'''
save(EXP1_DIR + '/Validate/exp1_rgb_X_val.npy', exp1_rgb_X_val)
save(EXP1_DIR + '/Validate/exp1_ir_X_val.npy', exp1_ir_X_val)
save(EXP1_DIR + '/Validate/exp1_y_val.npy', exp1_y_val)

'''
SAVE TEST DATA
'''
save(EXP1_DIR + '/Test/exp1_rgb_X_test.npy', exp1_rgb_X_test)
save(EXP1_DIR + '/Test/exp1_ir_X_test.npy', exp1_ir_X_test)
save(EXP1_DIR + '/Test/exp1_y_test.npy', exp1_y_test)

print("Done")

In [None]:
'''
FILTER EXPERIMENT 2 DATA
'''
exp2_img_list = img_list

'''
SPLIT DATA
'''
X_train, X_temp, y_train, y_temp = train_test_split(
    exp2_img_list, 
    exp2_img_list, 
    test_size=0.50, 
    train_size=0.50, 
    random_state=42, 
    shuffle=True)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, 
    y_temp, 
    test_size=0.50, 
    train_size=0.50, 
    random_state=42, 
    shuffle=False)

print("Populating experiment 2 training data sets...")
exp2_rgb_X_train, exp2_ir_X_train, exp2_y_train = populate_data_sets(
    X_train, y_train, rgb_imgs, ir_imgs, annotations_onehot)

print("Populating experiment 2 validation data sets...")  
exp2_rgb_X_val, exp2_ir_X_val, exp2_y_val = populate_data_sets(
    X_val, y_val, rgb_imgs, ir_imgs, annotations_onehot)

print("Populating experiment 2 test data sets...")  
exp2_rgb_X_test, exp2_ir_X_test, exp2_y_test = populate_data_sets(
    X_test, y_test, rgb_imgs, ir_imgs, annotations_onehot)

In [None]:
EXP2_DIR = '/tf/Notebooks/Iwashita/Data/Preprocessed/Experiment2'

'''
SAVE TRAINING DATA
'''
save(EXP2_DIR + '/Train/exp2_rgb_X_train.npy', exp2_rgb_X_train)
save(EXP2_DIR + '/Train/exp2_ir_X_train.npy', exp2_ir_X_train)
save(EXP2_DIR + '/Train/exp2_y_train.npy', exp2_y_train)

'''
SAVE VALIDATION DATA
'''
save(EXP2_DIR + '/Validate/exp2_rgb_X_val.npy', exp2_rgb_X_val)
save(EXP2_DIR + '/Validate/exp2_ir_X_val.npy', exp2_ir_X_val)
save(EXP2_DIR + '/Validate/exp2_y_val.npy', exp2_y_val)

'''
SAVE TEST DATA
'''
save(EXP2_DIR + '/Test/exp2_rgb_X_test.npy', exp2_rgb_X_test)
save(EXP2_DIR + '/Test/exp2_ir_X_test.npy', exp2_ir_X_test)
save(EXP2_DIR + '/Test/exp2_y_test.npy', exp2_y_test)

print("Done")

In [None]:
'''
FILTER EXPERIMENT 3 DATA
'''
exp3_pattern = r'^\d{2}__2017-11-17-(?:14(?:1[0-9]|[2-9][0-9])|15\d{2}|16(?:[0-4][0-9]|5[0-9]))[0-5][0-9]-0000.png$'
exp3_img_list = [file for file in img_list if re.match(exp3_pattern, file)]

exp3_test_a_pattern = r'^\d{2}__2017-11-17-(?:1(?:[0]\d{2}|1(?:[0-9][0-9]|2(?:[0-9][0-9]|3[0-5][0-9]))))[0-5][0-9]-0000.png$'
exp3_test_a_list = [file for file in img_list if re.match(exp3_test_a_pattern, file)]

exp3_test_b_pattern = r'^\d{2}__2017-11-17-(?:1(?:[4-6]\d{2}|7(?:[0-2][0-9]|3[0-9])))[0-5][0-9]-0000.png$'
exp3_test_b_list = [file for file in img_list if re.match(exp3_test_b_pattern, file)]

'''
SPLIT DATA
'''
X_train, X_val, y_train, y_val = train_test_split(
    exp3_img_list, 
    exp3_img_list, 
    test_size=0.70, 
    train_size=0.30, 
    random_state=42, 
    shuffle=True)

print("Populating experiment 3 training data sets...")
exp3_rgb_X_train, exp3_ir_X_train, exp3_y_train = populate_data_sets(X_train, y_train, rgb_imgs, ir_imgs, annotations_onehot)

print("Populating experiment 3 validation data sets...")  
exp3_rgb_X_val, exp3_ir_X_val, exp3_y_val = populate_data_sets(X_val, y_val, rgb_imgs, ir_imgs, annotations_onehot)

print("Populating experiment 3 test data sets...")  
exp3_rgb_X_test_a, exp3_ir_X_test_a, exp3_y_test_a = populate_data_sets(exp3_test_a_list, exp3_test_a_list, rgb_imgs, ir_imgs, annotations_onehot)
exp3_rgb_X_test_b, exp3_ir_X_test_b, exp3_y_test_b = populate_data_sets(exp3_test_b_list, exp3_test_b_list, rgb_imgs, ir_imgs, annotations_onehot)

In [None]:
EXP3_DIR = '/tf/Notebooks/Iwashita/Data/Preprocessed/Experiment3'

'''
SAVE TRAINING DATA
'''
save(EXP3_DIR + '/Train/exp3_rgb_X_train.npy', exp3_rgb_X_train)
save(EXP3_DIR + '/Train/exp3_ir_X_train.npy', exp3_ir_X_train)
save(EXP3_DIR + '/Train/exp3_y_train.npy', exp3_y_train)

'''
SAVE VALIDATION DATA
'''
save(EXP3_DIR + '/Validate/exp3_rgb_X_val.npy', exp3_rgb_X_val)
save(EXP3_DIR + '/Validate/exp3_ir_X_val.npy', exp3_ir_X_val)
save(EXP3_DIR + '/Validate/exp3_y_val.npy', exp3_y_val)

'''
SAVE TEST DATA
'''
save(EXP3_DIR + '/Test/exp3_rgb_X_test_a.npy', exp3_rgb_X_test_a)
save(EXP3_DIR + '/Test/exp3_ir_X_test_a.npy', exp3_ir_X_test_a)
save(EXP3_DIR + '/Test/exp3_y_test_a.npy', exp3_y_test_a)

save(EXP3_DIR + '/Test/exp3_rgb_X_test_b.npy', exp3_rgb_X_test_b)
save(EXP3_DIR + '/Test/exp3_ir_X_test_b.npy', exp3_ir_X_test_b)
save(EXP3_DIR + '/Test/exp3_y_test_b.npy', exp3_y_test_b)

print("Done")