In [2]:
import numpy as np
from matplotlib import pyplot as plt
import h5py
import os
import datetime

## Sentinel-1, 640

In [3]:
train_S1_640 = h5py.File('/work/ka1176/shared_data/2021-ai4food/dev_data/south-africa/sentinel-1/extracted-640/train_data.h5', 'r')

In [7]:
samples = len(train_P_640['label'])
print('Samples before inflation:', samples)

h5_keys = train_S1_640.keys()
print(h5_keys)

Samples before inflation: 4143
<KeysViewHDF5 ['crop_name', 'fid', 'image_stack', 'label', 'mask']>


In [34]:
target_size = 64

In [45]:
raw_ds = {key:[] for key in h5_keys}

for i in range(samples):
    this_crop_name = train_S1_640['crop_name'][i]
    this_fid       = train_S1_640['fid'][i]
    this_image_stack = train_S1_640['image_stack'][i]
    this_label       = train_S1_640['label'][i]
    this_mask        = train_S1_640['mask'][i]
    
    current_size = this_image_stack.shape[-1]
    
    inflation_factor = current_size // target_size
    
    print('Inflation factor', inflation_factor)
    
    for j in range(inflation_factor):
        # always the same
        raw_ds['crop_name'].append(this_crop_name) 
        raw_ds['fid'].append(this_fid)
        raw_ds['label'].append(this_label)
        # divide
        raw_ds['image_stack'].extend(this_image_stack[np.newaxis, :, :, j*target_size:(j+1)*target_size])
        raw_ds['mask'].extend(this_mask[np.newaxis, j*target_size:(j+1)*target_size])
    
    if i%100:
        print('Progress: ', i/samples * 100)

Inflation factor 10
Inflation factor 10
Progress:  0.02413709872073377
Inflation factor 10
Progress:  0.04827419744146754
Inflation factor 10
Progress:  0.0724112961622013
Inflation factor 10
Progress:  0.09654839488293508
Inflation factor 10
Progress:  0.12068549360366883
Inflation factor 10
Progress:  0.1448225923244026
Inflation factor 10
Progress:  0.1689596910451364
Inflation factor 10
Progress:  0.19309678976587016
Inflation factor 10
Progress:  0.2172338884866039
Inflation factor 10
Progress:  0.24137098720733766
Inflation factor 10
Progress:  0.26550808592807146
Inflation factor 10
Progress:  0.2896451846488052
Inflation factor 10
Progress:  0.313782283369539
Inflation factor 10
Progress:  0.3379193820902728
Inflation factor 10
Progress:  0.3620564808110065
Inflation factor 10
Progress:  0.3861935795317403
Inflation factor 10
Progress:  0.41033067825247405
Inflation factor 10
Progress:  0.4344677769732078
Inflation factor 10
Progress:  0.4586048756939416
Inflation factor 10
Pro

In [46]:
np.array(raw_ds['image_stack']).shape, np.array(raw_ds['mask']).shape

((41430, 41, 2, 64), (41430, 64))

In [49]:
n_samples = int(inflation_factor * samples)
image_dims = (41, 2, target_size)
mask_dims = (target_size,)
chunk_size = 100

In [68]:
filepath = '/work/ka1176/shared_data/2021-ai4food/dev_data/south-africa/sentinel-1/extracted-640/train_data_inflated.h5' # note renamed files in directory!
if os.path.exists(filepath):
    os.remove(filepath)
    print('removed')
new_h5_file = h5py.File(filepath, 'w')

removed


In [69]:
new_h5_file.create_dataset("image_stack", shape=(n_samples,) + image_dims, chunks=(chunk_size,) + image_dims, fletcher32=True, dtype='float32', data=np.array(raw_ds['image_stack']))
new_h5_file.create_dataset("mask", shape=(n_samples,) + mask_dims, chunks=(chunk_size,) + mask_dims, fletcher32=True, dtype='float32', data=np.array(raw_ds['mask']))
new_h5_file.create_dataset("fid", shape=(n_samples,), chunks=(chunk_size,), fletcher32=True, dtype='int', data=np.array(raw_ds['fid']))
new_h5_file.create_dataset("label", shape=(n_samples,), chunks=(chunk_size,), fletcher32=True, dtype='int', data=np.array(raw_ds['label']))
new_h5_file.create_dataset("crop_name", shape=(n_samples,), chunks=(chunk_size,), dtype=h5py.string_dtype(), data=np.array(raw_ds['crop_name']))


<HDF5 dataset "crop_name": shape (41430,), type "|O">

In [70]:
new_h5_file.attrs['time_created'] = str(datetime.datetime.now())
new_h5_file.flush()
new_h5_file.close()

In [73]:
tmp = h5py.File(filepath, 'r') # check
print(tmp.keys())
print(tmp['image_stack'][0].shape, tmp['mask'][0].shape, tmp['label'][:10], tmp['fid'][:10], tmp['crop_name'][:10])

<KeysViewHDF5 ['crop_name', 'fid', 'image_stack', 'label', 'mask']>
(41, 2, 64) (64,) [4 4 4 4 4 4 4 4 4 4] [80172 80172 80172 80172 80172 80172 80172 80172 80172 80172] [b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics'
 b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics'
 b'Lucerne/Medics' b'Lucerne/Medics']


## Planet, 640

In [3]:
train_P_640 = h5py.File('/work/ka1176/shared_data/2021-ai4food/dev_data/south-africa/planet/extracted-640/train_data.h5', 'r')

In [4]:
samples = len(train_P_640['label'])
print('Samples before inflation:', samples)

h5_keys = train_P_640.keys()
print(h5_keys)

Samples before inflation: 4143
<KeysViewHDF5 ['crop_name', 'fid', 'image_stack', 'label', 'mask']>


In [5]:
target_size = 64

In [6]:
raw_ds = {key:[] for key in h5_keys}

for i in range(samples):
    this_crop_name = train_P_640['crop_name'][i]
    this_fid       = train_P_640['fid'][i]
    this_image_stack = train_P_640['image_stack'][i]
    this_label       = train_P_640['label'][i]
    this_mask        = train_P_640['mask'][i]
    
    current_size = this_image_stack.shape[-1]
    
    inflation_factor = current_size // target_size
    
    print('Inflation factor', inflation_factor)
    
    for j in range(inflation_factor):
        # always the same
        raw_ds['crop_name'].append(this_crop_name) 
        raw_ds['fid'].append(this_fid)
        raw_ds['label'].append(this_label)
        # divide
        raw_ds['image_stack'].extend(this_image_stack[np.newaxis, :, :, j*target_size:(j+1)*target_size])
        raw_ds['mask'].extend(this_mask[np.newaxis, j*target_size:(j+1)*target_size])
    
    if i%100:
        print('Progress: ', i/samples * 100)

Inflation factor 10
Inflation factor 10
Progress:  0.02413709872073377
Inflation factor 10
Progress:  0.04827419744146754
Inflation factor 10
Progress:  0.0724112961622013
Inflation factor 10
Progress:  0.09654839488293508
Inflation factor 10
Progress:  0.12068549360366883
Inflation factor 10
Progress:  0.1448225923244026
Inflation factor 10
Progress:  0.1689596910451364
Inflation factor 10
Progress:  0.19309678976587016
Inflation factor 10
Progress:  0.2172338884866039
Inflation factor 10
Progress:  0.24137098720733766
Inflation factor 10
Progress:  0.26550808592807146
Inflation factor 10
Progress:  0.2896451846488052
Inflation factor 10
Progress:  0.313782283369539
Inflation factor 10
Progress:  0.3379193820902728
Inflation factor 10
Progress:  0.3620564808110065
Inflation factor 10
Progress:  0.3861935795317403
Inflation factor 10
Progress:  0.41033067825247405
Inflation factor 10
Progress:  0.4344677769732078
Inflation factor 10
Progress:  0.4586048756939416
Inflation factor 10
Pro

In [7]:
np.array(raw_ds['image_stack']).shape, np.array(raw_ds['mask']).shape

((41430, 244, 4, 64), (41430, 64))

In [11]:
n_samples = int(inflation_factor * samples)
image_dims = (244, 4, target_size)
mask_dims = (target_size,)
chunk_size = 100

In [12]:
filepath = '/work/ka1176/shared_data/2021-ai4food/dev_data/south-africa/planet/extracted-640/train_data_inflated.h5'
if os.path.exists(filepath):
    os.remove(filepath)
    print('removed')
new_h5_file = h5py.File(filepath, 'w')

removed


In [13]:
new_h5_file.create_dataset("image_stack", shape=(n_samples,) + image_dims, chunks=(chunk_size,) + image_dims, fletcher32=True, dtype='float32', data=np.array(raw_ds['image_stack']))
new_h5_file.create_dataset("mask", shape=(n_samples,) + mask_dims, chunks=(chunk_size,) + mask_dims, fletcher32=True, dtype='float32', data=np.array(raw_ds['mask']))
new_h5_file.create_dataset("fid", shape=(n_samples,), chunks=(chunk_size,), fletcher32=True, dtype='int', data=np.array(raw_ds['fid']))
new_h5_file.create_dataset("label", shape=(n_samples,), chunks=(chunk_size,), fletcher32=True, dtype='int', data=np.array(raw_ds['label']))
new_h5_file.create_dataset("crop_name", shape=(n_samples,), chunks=(chunk_size,), dtype=h5py.string_dtype(), data=np.array(raw_ds['crop_name']))


<HDF5 dataset "crop_name": shape (41430,), type "|O">

In [14]:
new_h5_file.attrs['time_created'] = str(datetime.datetime.now())
new_h5_file.flush()
new_h5_file.close()

In [15]:
tmp = h5py.File(filepath, 'r') # check
print(tmp.keys())
print(tmp['image_stack'][0].shape, tmp['mask'][0].shape, tmp['label'][:10], tmp['fid'][:10], tmp['crop_name'][:10])

<KeysViewHDF5 ['crop_name', 'fid', 'image_stack', 'label', 'mask']>
(244, 4, 64) (64,) [4 4 4 4 4 4 4 4 4 4] [80172 80172 80172 80172 80172 80172 80172 80172 80172 80172] [b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics'
 b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics' b'Lucerne/Medics'
 b'Lucerne/Medics' b'Lucerne/Medics']
