In [None]:
import numpy as np
import random
import os
from glob import glob
import matplotlib.pyplot as plt

## 1. Apply data augmentation

File structure for the data to be read by the model is source > observation (e.g. sentinel-5p/obs1)

In [None]:
def data_aug_rectangle(rand, source, file_name, out_name):
    """"Save an additional version of the data with random flip or rotate"""
    print(source)
    full_path = os.path.join(path, source, observation, file_name)
    img = np.load(full_path)
    if rand < 0.2:
        img = np.flip(img, 0)
    elif rand < 0.4:
        img = np.flip(img, 1)
    elif rand < 0.6:
        img = np.rot90(img, 2) 
        img = np.flip(img, 0)
    elif rand < 0.8:
        img = np.rot90(img, 2) 
        img = np.flip(img, 1)
    else: 
        img = np.rot90(img, 2) 
    
    print(np.mean(img))
    np.save(os.path.join(path, source, observation, out_name), img)
    
def data_aug_rectangle_s2(rand, source, file_name, out_name):
    """"Save an additional version of the data with random flip or rotate 
    apply the same process to each band in the s2 image """
    print(source)
    full_path = os.path.join(path, source, observation, file_name)
    img = np.load(full_path)
    for idx,band in enumerate(img): 
        if rand < 0.2:
            img[idx] = np.flip(img[idx], 0)
        elif rand < 0.4:
            img[idx] = np.flip(img[idx], 1)
        elif rand < 0.6:
            img[idx] = np.rot90(img[idx], 2)
            img[idx] = np.flip(img[idx], 0)
        elif rand < 0.8:
            img[idx] = np.rot90(img[idx], 2) 
            img[idx] = np.flip(img[idx], 1)
        else: 
            img[idx] = np.rot90(img[idx], 2)
    print(np.mean(img)) 
    np.save(os.path.join(path, source, observation, out_name), img) 

In [None]:
# select one random number for each observation. 
# use this random number to determine the flip/ rotate that is applied
observations = os.listdir(os.path.join(path, "ghgsat"))
for idx in np.arange(0,2):
    for observation in observations:
        if observation != '.DS_Store':
            rand = random.random()
            data_aug_rectangle(rand, "ghgsat", "original.npy", str(idx)+"data_aug.npy")
            data_aug_rectangle_s2(rand, "sentinel-2", "original.npy", str(idx)+"_data_aug.npy")
            data_aug_rectangle(rand, "sentinel-5p", "original.npy", str(idx)+"_data_aug.npy")
print("Augmented data saved")

## 2. 6 patches and data augmentation

In [None]:
def crop_to_squares(img, source):
    # ghgsat is 20m x20m pixels, so the image is 750 width by 1000 height. Output from this function will be 400x400
    ghgsat_boxes = {
        "box_1": (30, 30, 430, 430),
        "box_2": (310, 30, 710, 430),
        "box_3": (30, 300, 430, 700),
        "box_4": (310, 300, 710, 700),
        "box_5": (30, 570, 430, 970),
        "box_6": (310, 570, 710, 970) 
    }
    # s5p and s2 are 10m x 10m pixels, the image is 1,500 width by 2,000 height. Output from this function will be 800x 800.
    s5p_boxes = {
        "box_1": (60, 60, 860, 860),
        "box_2": (620, 60, 1420, 860),
        "box_3": (60, 600, 860, 1400),
        "box_4": (620, 600, 1420, 1400),
        "box_5": (60, 1140, 860, 1940),
        "box_6": (620, 1140, 1420, 1940) 
    }
    # s5p is 10m resolution, double ghgsat
    box_images = []
    for key in ghgsat_boxes:
        print(source)
        if source == "ghgsat":
            box_img = img.crop(ghgsat_boxes[key])
        elif source == "s5p":
            box_img = img.crop(s5p_boxes[key])
        else:
            raise Exception("Wrong source input")
        box_images.append(np.array(box_img))
    return box_images

def crop_s2(file):
    s5p_boxes = {
        "box_1": (60, 60, 860, 860),
        "box_2": (620, 60, 1420, 860),
        "box_3": (60, 600, 860, 1400),
        "box_4": (620, 600, 1420, 1400),
        "box_5": (60, 1140, 860, 1940),
        "box_6": (620, 1140, 1420, 1940) 
    }
    box_images = []
    for key in s5p_boxes:
        cropped = []
        for idx, band in enumerate(file):
            img = Image.fromarray(band)
            box_img = img.crop(s5p_boxes[key])
            cropped.append(np.array(box_img))
        box_images.append(cropped)
    return box_images

# since the outputs of these patches are squares they can have more options for rotate than the rectangular images.
def data_aug_square(rands, source, path_name):
    """"Save an additional version of the data with random flip or rotate"""
    for idx, rand in enumerate(rands):
        if rand > 0.75:
            source[idx] = np.flip(source[idx], 0) 
        if rand > 0.5:
            source[idx] = np.flip(source[idx], 1)
        if rand > 0.25:
            source[idx] = np.rot90(source[idx], 3)
        np.save(os.path.join(path_name, "box"+str(idx+1)), source[idx]) 

def data_aug_s2(rands, source, path_name):
    """"Save an additional version of the data with random flip or rotate"""
    for idx, rand in enumerate(rands):
        for jdx, img in enumerate(source[idx]):
            if rand > 0.75:
                source[idx][jdx] = np.flip(source[idx][jdx], 0)
            if rand > 0.5:
                source[idx][jdx] = np.flip(source[idx][jdx], 1)
            if rand > 0.25:
                source[idx][jdx]  = np.rot90(source[idx][jdx], 3)
        np.save(os.path.join(path_name, "box"+str(idx+1)), source[idx]) 
        
base_path = 'path'
for observation in os.listdir(os.path.join(base_path, "ghgsat")):

    #ghgsat
    ghgsat_path = 'ghgsat'
    img = Image.fromarray(np.load(ghgsat_path))
    ghgsat_cropped = crop_to_squares(img, "ghgsat") 

    # s5p
    s5p_path = 's5p'
    img = Image.fromarray(np.load(s5p_path))
    s5p_cropped = crop_to_squares(img, "s5p")

    # s2
    s2_path = 's2'
    file = np.load(s2_path)
    s2_cropped = crop_s2(file)

    #apply data augmentation consistently to each patch across the different sources and save
    # get 6 random numbers, one for each patch so data augmentationc an be applied
    rands = np.random.uniform(low=0, high=1, size=(6,))
    data_aug_square(rands, ghgsat_cropped, os.path.join(base_path, "ghgsat", observation))
    data_aug_square(rands, s5p_cropped,os.path.join(base_path, "sentinel-5p", observation))
    data_aug_s2(rands, s2_cropped, os.path.join(base_path, "sentinel-2", observation))

## 3. 4 patches and data augmentation

In [None]:
def crop_to_rectangle(img, source):
    # ghgsat is 20m x20m pixels, so the image is 750 width by 1000 height. Output from this function will be 500x400
    ghgsat_boxes = {
        "box_1": (0, 30, 400, 530), 
        "box_2": (350, 30, 750, 530),
        "box_3": (0, 470, 400, 970),
        "box_4": (350, 470, 750, 970)
    }
    # s5p and s2 are 10m x 10m pixels, the image is 1,500 width by 2,000 height. Output from this function will be 1000x 800.
    s5p_boxes = {
        "box_1": (0, 60, 800, 1060), 
        "box_2": (700, 60, 1500, 1060),
        "box_3": (0, 940, 800, 1940),
        "box_4": (700, 940, 1500, 1940)
    }
    # s5p is 10m resolution, double ghgsat
    box_images = []
    for key in ghgsat_boxes:
        print(source)
        if source == "ghgsat":
            box_img = img.crop(ghgsat_boxes[key])
        elif source == "s5p":
            box_img = img.crop(s5p_boxes[key])
        else:
            raise Exception("Wrong source input")
        box_images.append(np.array(box_img))
    return box_images

def crop_s2_to_rectangle(file):
    s5p_boxes = {
        "box_1": (0, 60, 800, 1060),
        "box_2": (700, 60, 1500, 1060),
        "box_3": (0, 940, 800, 1940),
        "box_4": (700, 940, 1500, 1940)
    }
    box_images = []
    for key in s5p_boxes:
        cropped = []
        for idx, band in enumerate(file):
            img = Image.fromarray(band)
            box_img = img.crop(s5p_boxes[key])
            cropped.append(np.array(box_img))
        box_images.append(cropped)
    return box_images

def data_aug(rands, source, path_name):
    """"Save an additional version of the data with random flip or rotate"""
    for idx, rand in enumerate(rands):
        if rand < 0.2:
            source[idx] = np.flip(source[idx], 0)
        elif rand < 0.4:
            source[idx] = np.flip(source[idx], 1)
        elif rand < 0.6:
            source[idx] = np.rot90(source[idx], 2)
            source[idx] = np.flip(source[idx], 0)
        elif rand < 0.8:
            source[idx] = np.rot90(source[idx], 2) 
            source[idx] = np.flip(source[idx], 1)
        else: 
            source[idx] = np.rot90(source[idx], 2)
        np.save(os.path.join(path_name, "4_box"+str(idx+1)), source[idx]) 
        print("saved")

def data_aug_s2(rands, source, path_name):
    """"Save an additional version of the data with random flip or rotate"""
    for idx, rand in enumerate(rands):
        for jdx, img in enumerate(source[idx]):
            if rand < 0.2:
                source[idx][jdx] = np.flip(source[idx][jdx], 0)
            elif rand < 0.4:
                source[idx][jdx] = np.flip(source[idx][jdx], 1)
            elif rand < 0.6:
                source[idx][jdx] = np.rot90(source[idx][jdx], 2)
                source[idx][jdx] = np.flip(source[idx][jdx], 0)
            elif rand < 0.8:
                source[idx][jdx] = np.rot90(source[idx][jdx], 2) 
                source[idx][jdx] = np.flip(source[idx][jdx], 1)
            else: 
                source[idx][jdx]= np.rot90(source[idx][jdx], 2)
        np.save(os.path.join(path_name, "4_box"+str(idx+1)), source[idx]) 
        print("saved")

base_path = 'path'
for observation in os.listdir(os.path.join(data_path, "ghgsat"):
    # crop each data source
    #ghgsat
    ghgsat_img = os.path.join(data_path, "ghgsat", observation, "flag_mask_20m_high_res_methane.npy")
    img = Image.fromarray(np.load(ghgsat_img))
    ghgsat_cropped = crop_to_rectangle(img, "ghgsat") 

    # s5p
    s5p_path = os.path.join(base_path, "sentinel-5p", observation, "low_res_methane.npy")
    img = Image.fromarray(np.load(s5p_path))
    s5p_cropped = crop_to_rectangle(img, "s5p")

    # s2
    s2_path = os.path.join(base_path, "sentinel-2", observation, "all_bands.npy")
    file = np.load(s2_path)
    s2_cropped = crop_s2_to_rectangle(file)

    #apply data augmentation consistently to each patch across the different sources and save
    # get random numbers for data augmentation
    rands = np.random.uniform(low=0, high=1, size=(4,))
    data_aug(rands, ghgsat_cropped, os.path.join(base_path, "ghgsat", observation))
    data_aug(rands, s5p_cropped,os.path.join(base_path, "sentinel-5p", observation))
    data_aug_s2(rands, s2_cropped, os.path.join(base_path, "sentinel-2", observation))

In [None]:
# testing

base_path = 'path'

for observation in os.listdir(os.path.join(base_path, "sentinel-2")):
    # view s2 imgs for one obs
    for file in glob(os.path.join(base_path, "sentinel-2", observation, '*.npy')):                                     
        print(file)
        print(os.path.join(base_path, "sentinel-2", observation))
        img = np.load(file)
        
        print(np.shape(img))
        plt.imshow(img[0])
        plt.show()

    # view ghgsat for one images
    for file in glob(os.path.join(base_path, "ghgsat", observation, '*.npy')):                                       
        print(file)
        img = np.load(file)
        
        print(np.shape(img))
        plt.imshow(img)
        plt.show()
                                           
        
    for file in glob(os.path.join(base_path, "sentinel-5p", observation, '*.npy')):                                       
        print(file)
        img = np.load(file)
        
        print(np.shape(img))
        plt.imshow(img)
        plt.show()
    input()
