In [2]:
import rasterio
import os
from os.path import join

import geopandas
from turf import area
import numpy as np
import xarray as xr
from PIL import Image
from glob import glob
import shutil

import matplotlib
import matplotlib.pyplot as plt

import rasterio
from rasterio.enums import Resampling
from rasterio.plot import show
from rasterio.windows import Window


## 1. Resample to 20m resolution

In [None]:
def resample(path, out_dir, target_resolution): 
    """ Up or downsample the image to a specified resolution"""
    with rasterio.open(path) as dataset:
        resampling_factor = dataset.transform[0]/target_resolution
        if resampling_factor != 1:
            # resample data to target pixel size
            data = dataset.read(
                out_shape=(
                    dataset.count,
                    int(dataset.height * resampling_factor),
                    int(dataset.width * resampling_factor)
                ),
                resampling=Resampling.bilinear
            )

            # image transform
            transform = dataset.transform * dataset.transform.scale(
                (dataset.width / data.shape[-1]),
                (dataset.height / data.shape[-2])
            )
            out_meta = dataset.meta.copy()
            out_height = int(dataset.height * resampling_factor)
            out_width = int(dataset.width * resampling_factor)
            crs = dataset.crs
            out_meta.update({"driver":"GTiff",
                            "height": out_height,
                            "width": out_width,
                            "transform": transform,
                            "crs" : crs})

            out_path = os.path.join(out_dir, "resample_"+str(resampling_factor)+"_"+os.path.basename(path))
            with rasterio.open(out_path,"w",**out_meta) as dest:
                dest.write(data)

        else:  
            shutil.copy(path, out_dir)

In [None]:
base_path = 'path' # *** path needed ***
location = ['California', 'Arizona']

for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
        for file in glob(os.path.join(base_path, location, observation, "*_CH4.tif")):
            print(file)
            out_dir = os.path.join(base_path, location, observation)
            resample(file, out_dir, 20)

## 2. Crop to 15x 20km

In [None]:
def create_window(mid_height, mid_width, window_height, window_width):
    min_height, max_height =  mid_height - (window_height/2), mid_height + (window_height/2) 
    min_width, max_width = mid_width - (window_width/2), mid_width + (window_width/2)
    window = Window.from_slices((int(min_height), int(max_height)),(int(min_width), int(max_width)))
    return window

In [None]:
base_path = 'path' # *** path needed ***

for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
        for file in glob(os.path.join(base_path, location, observation, "resample_*_CH4.tif")): 
            with rasterio.open(file) as src:
                # see before
                print(file)
                print("width:", src.width)
                print("height", src.height)
                data_array = np.array(src.read())
                plt.imshow(data_array[0])
                plt.show()

                #set variables
                window_width, window_height = 750, 1000 #number of pixels wanted = 1500, 2000 when 10m resolution 
                mid_width, mid_height = src.width/2, src.height/2  

                # create and apply the window, and print
                window = create_window(mid_height, mid_width, window_height, window_width)
                arr = src.read(1, window=window)
                print("height, width:", np.shape(arr))
                plt.imshow(arr)
                plt.show()

                #update metadata
                meta = src.meta.copy()
                crs = src.crs
                meta.update({"driver":"GTiff",
                                "height": window_height,
                                "width": window_width,
                                "transform": src.window_transform(window),
                                "crs" : crs})
                print(meta)

                #save windowed version
                with rasterio.open(os.path.join(base_path, location, observation, 'crop_resample_FLG.tif'), 'w', **meta) as dst:
                    dst.write(src.read(window=window))

## 3. Remove data that has a flag for data issues

In [None]:
file = "flag_file" # *** path needed ***
with rasterio.open(file) as src:
    data_array = np.array(src.read())
    print(data_array[0].shape)
    print(np.mean(data_array[0]))
    print(data_array[0])
    plt.imshow(data_array[0])
    plt.show()

#make mask from the flag data - where 2 and 3 are True and 1 is False
data_array[0][data_array[0] == 1] = 0
data_array[0][data_array[0] >= 2] = 1
data_array = data_array.astype(bool)
plt.imshow(data_array[0])
plt.show()
print(data_array)

file = "methane_20m_file" # *** path needed ***
with rasterio.open(file) as src2:
    data_array2 = np.array(src2.read())
    print(data_array2[0].shape)
    print(np.nanmean(data_array2[0]))
    print(data_array2[0])

plt.imshow(data_array2[0])
plt.show()

# replace values with nan that have lower data quality
data_array2[0][data_array[0]] = np.nan
plt.imshow(data_array2[0])
plt.show()
print(data_array2)

In [None]:
for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
        for file in glob(os.path.join(base_path, location, observation, "20m_crop_resample.tif")):
            # open flag file and turn it into a mask array
            flag = glob(os.path.join(base_path, location, observation, "FLG.tif"))[0]
            with rasterio.open(flag) as flg:
                flag_array = np.array(flg.read()[0])
                plt.imshow(flag_array)
                plt.show()  
                flag_array[flag_array == 1] = 0
                flag_array[flag_array >= 2] = 1
                flag_array = flag_array.astype(bool)
                plt.imshow(flag_array)
                plt.show()
            
            with rasterio.open(file) as src:
                data_array = np.array(src.read()[0])
                plt.imshow(data_array)
                plt.show()
                print(data_array)
                print(np.shape(data_array))
                        
                data_array[flag_array] = np.nan
                plt.imshow(data_array)
                plt.show()
                        
                median = np.nanmedian(data_array)
                sd = np.nanstd(data_array)

                # using a gaussian as the data is a gaussian dist
                nans = np.where(np.isnan(data_array)) #positions of the nans
                nan_length = len(data_array[nans])

                rand = np.random.normal(median, sd, nan_length)
                for i in range(nan_length):
                    data_array[nans[0][i]][nans[1][i]] = rand[i]

                print(data_array)

                plt.imshow(data_array)
                plt.show()

                print(np.shape(data_array))
                data_location = 'out_path/name' # # *** path needed ***
                np.save(data_location, data_array)

## 4. Process the plume data for an additional experiment

In [None]:
for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
            for file in glob(os.path.join(base_path, location, observation, "*_PL.tif")):
                print(file)
                out_dir = os.path.join(base_path, location, observation)
                resample(file, out_dir, 20)

In [None]:
lis = 'path' # *** path needed ***
dest = 'out_path' # *** path needed ***
print(len(lis))

if len(lis) > 0:
    data_array = np.zeros((1, 1000, 750))
    data_array[:] = np.nan
    for img in lis:
        with rasterio.open(img) as src:
            data_array2 = np.array(src.read())
            show(data_array2)
            print(data_array2.shape)
            data_array[np.isnan(data_array)] = data_array2[np.isnan(data_array)]

    data_array = data_array.squeeze()
    show(data_array)

else: # if there are no detections in the observation, create array of zeroes
    data_array = np.zeros((1000, 750))
    
np.save(os.path.join(dest, "ghgsat_plumes.npy"), data_array)