# Conversion notebook

Repository: https://github.com/ZainK-hub/satbinclass

This notebook uses the versions listed below of the following packages:
* Python: 3.7.7
* Numpy: 1.18.5
* GDAL: 3.0.2

## Introduction

This notebook contains the code for data conversion from blobs of 5 images in .TIF files to blobs of 4 images in a Numpy database.

In [15]:
import numpy as np
from osgeo import gdal, osr
import cv2
import glob
import os

## Data preprocessing

The data needs to be set to 224 x 224 image size for use with ResNet and mst of the other models except for Inception-ResNetV2 which uses an image size of 299 x 299.

In [16]:
#Function returns image array
def getImgArray(tif_name):
    #Display displacement map of blob
    data_tif = gdal.Open(tif_name, gdal.GA_ReadOnly)

    bands = data_tif.RasterCount
    cols = data_tif.RasterXSize
    rows = data_tif.RasterYSize

    band = data_tif.GetRasterBand(1)
    img_array = band.ReadAsArray(0, 0, cols, rows)
    img_array = cv2.resize(img_array, (img_size, img_size))
    del data_tif
    return img_array

#Function generates RGB images.
#It takes in the name of a merged scaled tif file
def getRGBArray(sentinel2_tif_name):
    sentinel2_img = gdal.Open(sentinel2_tif_name, gdal.GA_ReadOnly)

    bands = sentinel2_img.RasterCount
    cols = sentinel2_img.RasterXSize
    rows = sentinel2_img.RasterYSize

    band_blue = sentinel2_img.GetRasterBand(3)
    img_blue = band_blue.ReadAsArray(0, 0, cols, rows)

    band_green = sentinel2_img.GetRasterBand(2)
    img_green = band_green.ReadAsArray(0, 0, cols, rows)

    band_red = sentinel2_img.GetRasterBand(1)
    img_red = band_red.ReadAsArray(0, 0, cols, rows)

    img_rgb = np.zeros((rows,cols,3))
    img_rgb[:,:,0] = img_red
    img_rgb[:,:,1] = img_green 
    img_rgb[:,:,2] = img_blue

    #Clip image to make it brighter, convert to 0 to 255
    img_rgb = ((np.clip(img_rgb,0,255))).astype('uint8')
    img_rgb = cv2.resize(img_rgb, (img_size, img_size))
    del sentinel2_img
    return img_rgb

In [17]:
#Function to read in data.
def populateData(folder_path, fill_start, fill_stop, blobs_range):
    disp_list = sorted(glob.glob('./' + folder_path + '/*Disp*.tif'), key=os.path.getmtime)
    phase_list = sorted(glob.glob('./' + folder_path + '/*Phase*.tif'), key=os.path.getmtime)
    coh_list = sorted(glob.glob('./' + folder_path + '/*Coh*.tif'), key=os.path.getmtime)
    sentinel2_list1 = sorted(glob.glob('./' + folder_path + '/*Sentinel2A_Img1*.tif'), key=os.path.getmtime)
    sentinel2_list2 = sorted(glob.glob('./' + folder_path + '/*Sentinel2A_Img2*.tif'), key=os.path.getmtime)
    
    fill_index = fill_start
    for blob_no in range(blobs_range):
        img_disp = getImgArray(disp_list[blob_no])
        img_phase = getImgArray(phase_list[blob_no])
        img_coh = getImgArray(coh_list[blob_no])
        img_sentinel2_rgb1 = getRGBArray(sentinel2_list1[blob_no])
        img_sentinel2_rgb2 = getRGBArray(sentinel2_list2[blob_no])
        
        img_sentinel2_hsv1 = cv2.cvtColor(img_sentinel2_rgb1, cv2.COLOR_RGB2HSV)
        img_sentinel2_hsv2 = cv2.cvtColor(img_sentinel2_rgb2, cv2.COLOR_RGB2HSV)
    
        
        if np.isnan(np.sum(img_disp)) == True or np.isnan(np.sum(img_phase)) == True or np.isnan(np.sum(img_coh)) ==True \
        or np.isnan(np.sum(img_sentinel2_hsv1)) == True or np.isnan(np.sum(img_sentinel2_hsv2)) == True:
            print('Warning: Data contains NaN: ', disp_list[blob_no])

        X[:,:,0, fill_index] = img_disp
        X[:,:,1, fill_index] = img_phase 
        X[:,:,2, fill_index] = img_coh
        #X[:,:,3, fill_index] = img_sentinel2_rgb1[:,:,0]
        #X[:,:,4, fill_index] = img_sentinel2_rgb1[:,:,1]
        #X[:,:,5, fill_index] = img_sentinel2_rgb1[:,:,2]
        X[:,:,3, fill_index] = img_sentinel2_rgb2[:,:,0]
        X[:,:,4, fill_index] = img_sentinel2_rgb2[:,:,1]
        X[:,:,5, fill_index] = img_sentinel2_rgb2[:,:,2]
        
        fill_index = fill_index + 1
    if folder_path == 'data_pos':
        y[fill_start:fill_stop,:] = 1 
    else:
        y[fill_start:fill_stop,:] = 0

Download and unzip the *data.zip* folder from: https://drive.google.com/file/d/1sfxayXEWubLtgp-yZbZIgFVeLYi7aO0w/view?usp=sharing

In [18]:
#Parameters, number of positive and negative blobs
total_blobs = 1440

#Number of images (bands) per blob: disp, phase, coherence, sentinel2 img1, sentinel2 img2
no_images = 6
#10% of input data is the size of the unseen Test dataset
unseen_testing_size = 0.10

#Resize the images to 224 x 224 for ResNet by using 224 as img_size,  
#or resize the images to 299 x 299 for InceptionResnetV2 by using 299 as img_size.
#Data Name, set name_data to '' for ResNet, or to 'inc_res_' for InceptionResnetV2.
img_size = [224, 299]
name_data = ['','inc_res_']
params = zip(img_size, name_data)
for img_sz, name_dat in params:
    #Get mage dimensions
    tif_list =  sorted(glob.glob('./data/data_pos/*.tif'))
    disp_tif = gdal.Open(tif_list[0], gdal.GA_ReadOnly)
    cols = disp_tif.RasterXSize

    img_size = img_sz
    name_data = name_dat

    #Images are squares
    X = np.zeros((img_size, img_size, no_images, total_blobs))
    y = np.zeros((total_blobs, 1))
    del disp_tif, tif_list

    #Read in .TIF data in the respective folders for positive and negative blobs
    populateData('data/data_pos', 0, round((total_blobs/2) - 1), round(total_blobs/2))
    populateData('data/data_neg', round(total_blobs/2), total_blobs - 1, round(total_blobs/2))

    #Save arrays X, y for use in the  notebook so they don't have to be read in again
    np.save(name_data + 'X.npy', X)
    np.save(name_data + 'y.npy', y)
    print('Saving X: ', X.shape, ' and y: ' + str(y.shape))

Saving X:  (224, 224, 6, 1440)  and y: (1440, 1)
Saving X:  (299, 299, 6, 1440)  and y: (1440, 1)
