# Notebook to prepare datasets

In [3]:
import torch
import preprocessing.dataset_generation as dg
from pathlib import Path

In [4]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print("CUDA Device Count: ", torch.cuda.device_count())
    print("CUDA Device Name: ", torch.cuda.get_device_name(0))
else:
    device = 'cpu'
    
print(f'Using device: {device}')

CUDA Device Count:  1
CUDA Device Name:  NVIDIA GeForce RTX 3050 Laptop GPU
Using device: cuda:0


In [5]:
# set common keys required for functions

train = 'training'
val = 'validation'
test = 'testing'

In [20]:
# by default March images are used - if another month is used change the number (available months: 1-4)

dataset_path = r'data\satellite\dataset_month3' 
ndvi_path = r'data\ndvi\output_NDVI'\

dataset_path_prep = Path(dataset_path)
ndvi_path_Path = Path(ndvi_path)

dtype=torch.float32

train_set = dg.prepare_3d_dataset_with_ndvi(train, binary_dir=dataset_path_prep,
                                              ndvi_dir=ndvi_path, device=device, dtype=dtype)

print("Train set prepared")

val_set = dg.prepare_3d_dataset_with_ndvi(val, binary_dir=dataset_path_prep,
                                                ndvi_dir=ndvi_path_Path, device=device, dtype=dtype)

print("Validation set prepared")

test_set = dg.prepare_3d_dataset_with_ndvi(test, binary_dir=dataset_path_prep,
                                                    ndvi_dir=ndvi_path_Path, device=device, dtype=dtype)

print("Test set prepared")

Processing folder 29/1: JRC_GSW1_4_MonthlyHistory_validation_r1


  torch.tensor(inputs, dtype=torch.float32, device=device)


Finished processing folders.
Finmisehd stacking input tensors.
Finished stacking target tensors.
Validation set prepared
Processing folder 0/1: JRC_GSW1_4_MonthlyHistory_testing_r1
Finished processing folders.
Finmisehd stacking input tensors.
Finished stacking target tensors.
Test set prepared


In [22]:
def save_with_hdf5(tensor_dataset, name):
    """
    Save tensor data in HDF5 with gzip compression
    """
    features, labels = tensor_dataset.tensors
    with h5py.File(f"data/{name}.h5", "w") as f:
        f.create_dataset("features", data=features.numpy(), compression="gzip")
        f.create_dataset("labels", data=labels.numpy(), compression="gzip")

    print(f"Saved {name} dataset with HDF5.")



In [None]:
save_with_hdf5(train_set, "train_set")
save_with_hdf5(val_set, "val_set")
save_with_hdf5(test_set, "test_set")