# Read and process the landslide4sense training images

This script reads the downloaded landslide4sense training images, calculates new indexes such as NDVI and outputs .npy files. The shell of this script have been borrowed by @iamtekson and modified to fit my needs.

In [2]:
#Import necessary libraries
import numpy as np
import h5py
import glob

In [3]:
#Read the 3799 training images and labels and assign them to 4d numpy arrays
TRAIN_PATH = r"\landslide4sense\img\*.h5"
TRAIN_MASK = r"\landslide4sense\mask\*.h5"

TRAIN_XX = np.zeros((3799, 128, 128, 11))
TRAIN_YY = np.zeros((3799, 128, 128, 1))
all_train = sorted(glob.glob(TRAIN_PATH))
all_mask = sorted(glob.glob(TRAIN_MASK))

In [None]:
#Read images, calculate bands and create two numpy arrays, X and y
for i, (img, mask) in enumerate(zip(all_train, all_mask)):
    with h5py.File(img) as hdf:
        ls = list(hdf.keys())
        data = np.array(hdf.get('img'))

        # assign 0 for the nan value
        data[np.isnan(data)] = 0.000001

        # to normalize the data 
        mid_rgb = data[:, :, 1:4].max() / 2.0
        mid_slope = data[:, :, 12].max() / 2.0
        mid_elevation = data[:, :, 13].max() / 2.0
        mid_b10 = data[:, :, 10].max() / 2.0

        # ndvi calculation
        data_red = data[:, :, 3]
        data_nir = data[:, :, 7]
        data_ndvi = np.divide(data_nir - data_red,np.add(data_nir, data_red))

        # ndmi calculation
        b7 = data[:, :, 7]
        b10 = data[:, :, 10]
        data_ndmi = np.divide(b7 - b10, np.add(b7, b10))

        # gndvi calculation
        b2 = data[:, :, 2]
        data_gndvi = np.divide(b7 - b2, np.add(b7, b2))
        
        # brightness calculation
        b3 = data[:, :, 3]
        data_br = np.sqrt(np.divide(np.divide(np.multiply(b3, b3),np.multiply(b2, b2)), 2))
        mid_br = data_br.max() / 2.0

        # bsi calculation
        b1 = data[:, :, 1]
        data_bsi = np.divide((np.add(b10,b3) - np.add(b7, b1)), np.add(np.add(b10, b3), np.add(b7,b1)))
        data_bsi_norm = (data_bsi-data_bsi.min()) / (data_bsi.max()-data_bsi.min())
        
        # final array
        TRAIN_XX[i, :, :, 0] = 1 - data[:, :, 3] / mid_rgb  #RED
        TRAIN_XX[i, :, :, 1] = 1 - data[:, :, 2] / mid_rgb #GREEN
        TRAIN_XX[i, :, :, 2] = 1 - data[:, :, 1] / mid_rgb #BLUE
        TRAIN_XX[i, :, :, 3] = data_ndvi #NDVI
        TRAIN_XX[i, :, :, 4] = 1 - data[:, :, 12] / mid_slope #SLOPE
        TRAIN_XX[i, :, :, 5] = 1 - data[:, :, 13] / mid_elevation #ELEVATION
        TRAIN_XX[i, :, :, 6] = data_ndmi #NDMI
        TRAIN_XX[i, :, :, 7] = data_gndvi #GNDVI
        TRAIN_XX[i, :, :, 8] = 1 - data[:, :, 10] / mid_b10 #BAND 10
        TRAIN_XX[i, :, :, 9] = 1 - data_br / mid_br #BRIGHTNESS
        TRAIN_XX[i, :, :, 10] = 1 - data_bsi_norm #BSI
    
    
    with h5py.File(mask) as hdf:
        ls = list(hdf.keys())
        data=np.array(hdf.get('mask'))
        TRAIN_YY[i, :, :, 0] = data

In [23]:
#Check the min, max for ensuring normalized data within -1.0 - 1.0
TRAIN_XX[np.isnan(TRAIN_XX)] = 0.000001
print(TRAIN_XX.min(), TRAIN_XX.max(), TRAIN_YY.min(), TRAIN_YY.max())

-1.0 1.0 0.0 1.0


In [25]:
#Save the arrays
np.save('output/TRAIN_XX_11var.npy', TRAIN_XX)
np.save('output/TRAIN_YY_11var.npy', TRAIN_YY)