In [1]:
import rasterio
import glob
import numpy as np
import os
import cv2
from PIL import Image

import torchvision
import torch

def normalize_images(images):
    """
        Normalizes images between zero and a predefined maximum value

        Parameters:
            images: torch.Tensor
                input images to be normalized
            normalize_value_dataset:
                maximum value to which the images shall be normalized

        Returns:
            images: torch.Tensor
                normalized images
    """
    normalize_value_dataset = 255
    images = images - torch.amin(images, dim=(-2, -1)).unsqueeze(dim=-1).unsqueeze(dim=-1)
    images = torch.div(images, torch.amax(images, dim=(-2, -1)).unsqueeze(dim=-1).unsqueeze(dim=-1))
    images = (normalize_value_dataset * images).to(torch.uint8)
    return images

In [2]:
def load_source(i, resize):
    data_list = []
    print(f'loading A{i:03}...')
    remaining_life_list = []
    capture_time_list = []
    load_path = f'../rename_dataset/A{i:03}'
    files = glob.glob(f'{load_path}*.tiff')
    # print(files)
    for file in files:
        # camera_id, remaining_life, capture_time = file.split('_')[1:4]
        file_name = file[:-5].split("_")
        camera_id, remaining_life, capture_time = file_name[-3], file_name[-2], file_name[-1]
        remaining_life_list.append(int(remaining_life))
        capture_time_list.append(int(capture_time))

    data_array = np.concatenate([np.array(remaining_life_list).reshape(-1, 1), np.array(capture_time_list).reshape(-1, 1)], axis=1)
    
    data_array = data_array[np.lexsort((data_array[:, 1], data_array[:, 0]))]
    _, idx = np.unique(data_array[:, 0], return_index=True)
    data_array = data_array[idx][::-1]
    
    # Exclude the first 8/9 (even number rqrd) images because then the experiment still has to stabilise
    if data_array.shape[0] % 2 == 0:
        data_array = data_array[8:]
    else:
        data_array = data_array[9:]
    


    # print(data_array.shape)

    for idx, v in enumerate(data_array):
        # Exclude RUL < 4000 (otherwise large bias)
        if v[0] < 4000:
            tiff_path_0 = f'{load_path}_0_{v[0]}_{v[1]}.tiff'
            tiff_path_1 = f'{load_path}_1_{v[0]}_{v[1]}.tiff'
            
            src_0 = torch.squeeze(resize(torch.unsqueeze(torch.Tensor(np.array(Image.open(tiff_path_0))), 0))).to(torch.uint8).unsqueeze(0)
            src_1 = torch.squeeze(resize(torch.unsqueeze(torch.Tensor(np.array(Image.open(tiff_path_1))), 0))).to(torch.uint8).unsqueeze(0)
            src = torch.cat((src_0, src_1), 0)
            src = np.array(src)

            data_list.append([src, int(v[0]), i])

    # x = np.array([data[0] for data in data_list]) 
    x = np.array([data[0] for data in data_list])
    y = np.array([data[1] for data in data_list])

    # Apply histogram equalization and normalize images between 0 and normalize value dataset
    # x = normalize_images(torch.Tensor(x))
    # x = np.array(x)
    
    return x, y

In [3]:
import torchvision.transforms as T
import tqdm
def augment_data(i, train_x, train_y, N, cpu_storage=True, sigma_rul=0.01):
    """
        Function to create an augmented dataset
        It is saved with name filename_dataset_training_set_augmented, and only augments the training set
        
        Args:
            filename_dataset: string
                filename of the dataset without extension (e.g. window_size_7_test_0)
            N: int
                number of times you want to augment the entire dataset with the same (however randomized) augmentations
            cpu_storage: bool
                if True, stores the dataset on the cpu, else it is stored on cuda if available
            sigma_rul: float
                standard deviation of the noise added to the RUL
    """
    # Initialize devices
    devices = torch.device('cpu') if cpu_storage else torch.device('cuda')
    # Load dataset
    image_width_aug = 320
    image_height_aug = 640

    # Antialias parameter is not available in torchvision version smaller than or equal to 13
    torchvision_version, torchvision_subversion = torchvision.__version__.split('.')[:2]
    torchvision_version, torchvision_subversion = int(torchvision_version), int(torchvision_subversion)
    if torchvision_version == 0 and torchvision_subversion <= 13:
        resized_crop = T.RandomResizedCrop((image_height_aug, image_width_aug), scale=(0.5, 1.0), ratio=(7/8, 8/7))
    else:
        resized_crop = T.RandomResizedCrop((image_height_aug, image_width_aug), scale=(0.5, 1.0), ratio=(7/8, 8/7), antialias=True)

    # Initialize the (type of) augmentations
    augmentations = T.Compose([
        resized_crop,
        T.RandomHorizontalFlip(p=0.5),
        T.RandomVerticalFlip(p=0.5),
        T.RandomRotation([-5, 5]),
    ])

    # Initialize list
    train_x_augmented = []
    train_y_augmented = []

    # Loop through the dataset for N times to apply augmentations
    current_len = 0
    for _ in tqdm.tqdm(range(N), leave=True):
        train_x_current = train_x.detach()
        train_y_current = train_y.detach()
        # temp = normalize_images(augmentations(train_x_current)).cpu()
        # print('temp:', temp.shape)
        train_x_augmented.append(normalize_images(augmentations(train_x_current)).cpu())
        train_y_augmented.append(train_y_current)

        del train_x_current
        del train_y_current

    # Concatenate all the lists to a tensor
    train_x_augmented = torch.cat(train_x_augmented)
    train_y_augmented = torch.cat(train_y_augmented)
    print('train_x_augmented:', train_x_augmented.shape, 'train_y_augmented:', train_y_augmented.shape)

    # Save the data
    os.makedirs(f'../dataset_aug/A{i:03}', exist_ok=True)
    np.save(f'../dataset_aug/A{i:03}/x.npy', train_x_augmented)
    np.save(f'../dataset_aug/A{i:03}/y.npy', train_y_augmented)

    del train_x_augmented
    del train_y_augmented


In [None]:
source = [3, 9, 11, 12, 13, 14]

# source = [3]

resize = torchvision.transforms.Resize((640, 320), antialias=True)

for i in source:
    os.makedirs(f'../dataset/A{i:03}', exist_ok=True)
    x, y = load_source(i, resize)
    print(f'A{i:03} x shape: {x.shape}, y shape: {y.shape}')
    np.save(f'../dataset/A{i:03}/x.npy', x)
    np.save(f'../dataset/A{i:03}/y.npy', y)

    augment_data(i, torch.Tensor(x), torch.Tensor(y), 20)

    print(f'A{i:03} done')

# num = sum([np.load(f'../dataset/A{i:03}/x.npy').shape[0] for i in source])
# print(f'total: {num}')

loading A003...
A003 x shape: (56, 2, 640, 320), y shape: (56,)


100%|██████████| 10/10 [00:00<00:00, 11.94it/s]


train_x_augmented: torch.Size([560, 2, 640, 320]) train_y_augmented: torch.Size([560])
A003 done
loading A009...
A009 x shape: (64, 2, 640, 320), y shape: (64,)


100%|██████████| 10/10 [00:00<00:00, 10.32it/s]


train_x_augmented: torch.Size([640, 2, 640, 320]) train_y_augmented: torch.Size([640])
A009 done
loading A011...
A011 x shape: (92, 2, 640, 320), y shape: (92,)


100%|██████████| 10/10 [00:01<00:00,  6.93it/s]


train_x_augmented: torch.Size([920, 2, 640, 320]) train_y_augmented: torch.Size([920])
A011 done
loading A012...
A012 x shape: (56, 2, 640, 320), y shape: (56,)


100%|██████████| 10/10 [00:00<00:00, 11.63it/s]


train_x_augmented: torch.Size([560, 2, 640, 320]) train_y_augmented: torch.Size([560])
A012 done
loading A013...
A013 x shape: (50, 2, 640, 320), y shape: (50,)


100%|██████████| 10/10 [00:00<00:00, 14.32it/s]


train_x_augmented: torch.Size([500, 2, 640, 320]) train_y_augmented: torch.Size([500])
A013 done
loading A014...
A014 x shape: (84, 2, 640, 320), y shape: (84,)


100%|██████████| 10/10 [00:01<00:00,  7.96it/s]


train_x_augmented: torch.Size([840, 2, 640, 320]) train_y_augmented: torch.Size([840])
A014 done
