In [1]:
from PIL import Image
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import h5py

## Semantic Segmentation Image Data Generation

Some simple manipulation to convert the spectrogram data generated by `OBSToolbox` into segmentation-ready images that can be used inspected in an annotation application. 

In [2]:
from reverb.analysis.plotting import get_spectrogram_colors

# Assuming you have an RGBA array called 'image_array'
# with shape (height, width, 4)
def create_and_save_image(spectrogram_path : Path, output_folder):
    spectrogram_filename = spectrogram_path.stem
    # create output folder if it doesn't already exist
    Path(output_folder).mkdir(exist_ok=True, parents=True)

    if not Path(f'{output_folder}/{spectrogram_filename}.png').exists():
        file = h5py.File(spectrogram_path)
        array = np.array(file['spectrogram_array'])
        colors = get_spectrogram_colors(array)
        # Convert the array to an unsigned 8-bit integer array
        image_array = np.uint8(255*colors)

        plt.clf()
        plt.close()


        # Create a PIL image from the array
        image = Image.fromarray(np.flip(image_array, (0,1)), 'RGBA')

        # Save the image as a PNG file
        image.save(f'{output_folder}/{spectrogram_filename}.png')

def create_segmentation_images(folder_name, output_folder):
    paths = list(Path(folder_name).glob('*.h5'))
    for spectrogram_filepath in paths:
        create_and_save_image(spectrogram_filepath, output_folder)

Specify the target data folder here. This step requires the .h5 files to already have been generated. See the .ini file in this folder for an example. 

In [4]:
# Specify the h5 filepath output of the .ini file
spectrogram_h5_file_folder = '/data/UPFLOW/projects/iReverb/reverb/segmentation/analysis/data/UP34_annotations/raw'

# Convert these h5 files into spectrogram images, complete with RGBA channels, similar to the OBSToolbox spectrogram plots.
output_folder = '/data/UPFLOW/projects/iReverb/reverb/segmentation/analysis/data/UP34_annotations/images'
create_segmentation_images(spectrogram_h5_file_folder, output_folder)

### Partitioning Dataset for Manual Labelling

We now split up the dataset by randomly sampling from the generated images to select a subset of datapoints that should be manually annotated. 

In [8]:
import re

_nsre = re.compile('([0-9]+)')
def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(_nsre, str(s))]   

In [9]:
num_images_for_annotation = 50
image_paths = list( Path(output_folder).glob('*.png'))

image_paths.sort(key = natural_sort_key)

print(len(image_paths))
print([path.stem for path in image_paths[:5]])

np.random.seed(0)
files_to_be_annotated = np.random.choice(image_paths, num_images_for_annotation)


2879
['RR45_BHZ_2013.01.01-000000-2013.01.01-001500', 'RR45_BHZ_2013.01.01-001500-2013.01.01-003000', 'RR45_BHZ_2013.01.01-003000-2013.01.01-004500', 'RR45_BHZ_2013.01.01-004500-2013.01.01-010000', 'RR45_BHZ_2013.01.01-010000-2013.01.01-011500']


In [10]:
import shutil

def copy_files_to_annotation_folder(files_to_be_annotated, output_pathname):
    output_path = Path(output_pathname)
    output_path.mkdir(exist_ok=True, parents=True)
    for file_path in files_to_be_annotated:
        shutil.copy(file_path, output_path / file_path.name)
    

annotation_images_path = '../../examples/specs/RR45_test/to_be_annotated'
copy_files_to_annotation_folder(files_to_be_annotated, annotation_images_path)
