In [1]:
import os
import rasterio
import numpy as np
from osgeo import gdal, gdalconst
import pandas as pd
from shutil import copy2

# Step0: Resample B08

In [27]:
maindir = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step0_ResampleB08'

file = 'T31TFJ_20190912T103019_B08_10m.jp2'
inputfile = gdal.Open(os.path.join(maindir,'input', file), gdalconst.GA_ReadOnly)
inputProj = inputfile.GetProjection()
inputTrans = inputfile.GetGeoTransform()

reference = gdal.Open(os.path.join(maindir,'refImg' ,'T31TFJ_20190912T103019_B02_20m.jp2'), gdalconst.GA_ReadOnly)
referenceProj = reference.GetProjection()
referenceTrans = reference.GetGeoTransform()
bandreference = reference.GetRasterBand(1)    
x = reference.RasterXSize 
y = reference.RasterYSize

outputfile = os.path.join(maindir,'output',file[:-8]+'_20m'+'.tiff')
driver= gdal.GetDriverByName('GTiff')
output = driver.Create(outputfile,x,y,1,bandreference.DataType)
output.SetGeoTransform(referenceTrans)
output.SetProjection(referenceProj)

gdal.ReprojectImage(inputfile,output,inputProj,referenceProj,gdalconst.GRA_Bilinear) # options: nearest neighbor, bilinear, cubic

del output

In [30]:
# Copy resampled B08 files to another directory

for file in os.listdir(os.path.join(maindir,'output')):
    src = os.path.join(os.path.join(maindir,'output',file))
    dst = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step1_ApplySCLMask'
    copy2(src, dst)

# Step1: Create clouded images from SCL taken at other dates

In [2]:
maindir = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step1_ApplySCLMask'
SCL = ['SCL1', 'SCL2', 'SCL3', 'SCL4', 'SCL5']

for file in os.listdir(maindir):
    if 'B' in file and '20m' in file:
        img = rasterio.open(os.path.join(maindir, file))
        img_arr = img.read().astype(np.float32)
        
        for scl in SCL:
            scldir = os.path.join(maindir, scl)
            for file1 in os.listdir(scldir):
                scl_arr = rasterio.open(os.path.join(scldir, file1)).read()
                img_arr = np.append(img_arr, scl_arr, axis=0)               # Combine target and SCL files into one nD array

        for i in range(1,len(SCL)+1):
            vals = np.unique(img_arr[i]).tolist()
            other_values = vals.copy()
            shadow_cloud_values = [3.0, 8.0, 9.0, 10.0]                     # List of shadow and cloud values
            for item in shadow_cloud_values:    
                other_values.remove(item)                                   # List of values other than shadow and clouds
            # Create binary masks out of SCL files
            for item in other_values:
                img_arr[i][img_arr[i]==item] = 1
            for item in shadow_cloud_values:
                img_arr[i][img_arr[i]==item] = 0
            
            cloudMask = np.array(img_arr[i])
            my_array = np.multiply(cloudMask, img_arr[0])
            my_array[my_array==0]=-99

            # Save file
            outputdir = os.path.join(maindir, 'output', 'masked'+file[23:30]+'_'+str(i)+'.tiff')
            with rasterio.open(outputdir,'w',driver='Gtiff', width=img.width, height=img.height, 
                          count=1,crs=img.crs,transform=img.transform, dtype=np.float32, nodata=9.96921e+36) as newImg:
                newImg.write(my_array, indexes=1)    
                newImg.close()

# Step2: Change CRS, resample to resemble turbidity maps and Clip

In [3]:
maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\CopernicusTurbidityLayers\France'

reference = gdal.Open(os.path.join(maindir1, 'c_gls_LWQ100-turbidity-blended-mean_201902110000_CUSTOM_MSI_V1.3.tiff'), gdalconst.GA_ReadOnly)
referenceProj = reference.GetProjection()
referenceTrans = reference.GetGeoTransform()
bandreference = reference.GetRasterBand(1)    
x = reference.RasterXSize 
y = reference.RasterYSize

In [4]:
# Assign CRS and resample original files

maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step1_ApplySCLMask'
maindir3 = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step2_AssignCRS_Resample_Clip'
for file in os.listdir(maindir2):  
    if 'B' in file and '20m':
        inputfile = gdal.Open(os.path.join(maindir2, file), gdalconst.GA_ReadOnly)
        inputProj = inputfile.GetProjection()
        inputTrans = inputfile.GetGeoTransform()
        
        outputfile = os.path.join(maindir3,'original_'+file[23:26]+'_100m'+'.tiff')
        driver= gdal.GetDriverByName('GTiff')
        output = driver.Create(outputfile,x,y,1,bandreference.DataType) # clips to the size of the reference image
        output.SetGeoTransform(referenceTrans)
        output.SetProjection(referenceProj)

        gdal.ReprojectImage(inputfile,output,inputProj,referenceProj,gdalconst.GRA_NearestNeighbour) # choose nearest neighbor to avoid averaging with -99

        del output
        inputfile.FlushCache()
        inputfile = None

In [5]:
# Assign CRS and resample artificially clouded images

maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step1_ApplySCLMask'
maindir3 = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step2_AssignCRS_Resample_Clip'
for file in os.listdir(os.path.join(maindir2, 'output')):  
    inputfile = gdal.Open(os.path.join(maindir2, 'output',file), gdalconst.GA_ReadOnly)    
    inputProj = inputfile.GetProjection()
    inputTrans = inputfile.GetGeoTransform()

    outputfile = os.path.join(maindir3,'resampled_'+file[6:])
    driver= gdal.GetDriverByName('GTiff')
    output = driver.Create(outputfile,x,y,1,bandreference.DataType)
    output.SetGeoTransform(referenceTrans)
    output.SetProjection(referenceProj)

    gdal.ReprojectImage(inputfile,output,inputProj,referenceProj,gdalconst.GRA_NearestNeighbour) # choose nearest neighbor to avoid averaging with -99

    del output
    inputfile.FlushCache()
    inputfile = None        

# Apply land mask

In [6]:
maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\Bathymetry\France\binaryMask'
mask_img = rasterio.open(os.path.join(maindir1, 'binaryMask_v3.tiff'))
mask_arr = mask_img.read(1)

maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step2_AssignCRS_Resample_Clip'
maindir3 = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\Step3_ApplyElevMask'

names = np.array([]).astype('str')
percent = np.array([]).astype('float')
for file in os.listdir(maindir2):
    img = rasterio.open(os.path.join(maindir2, file))
    arr = img.read(1)
    masked = np.multiply(mask_arr, arr)
    # calculate cloud%
    allNumPixels = np.sum(masked!=0) # nan values
    cloudPixels = np.sum(masked==-99)
    percentage = np.round(100*(cloudPixels/allNumPixels), 2)
    names = np.append(names, file[10:19])
    percent = np.append(percent, percentage)
      
    # Save file
    masked[masked==0]=9.96921e+36 # permanent change for nodata values
    if 'original' in file:
        outputdir = os.path.join(maindir3, file)
    else:
        outputdir = os.path.join(maindir3, 'cloudcovered_'+file[10:])
    with rasterio.open(outputdir,'w',driver='Gtiff', width=img.width, height=img.height, 
                  count=1,crs=img.crs,transform=img.transform, dtype=np.float32, nodata=9.96921e+36) as newImg:
        newImg.write(masked, indexes=1)    
        newImg.close()

# Save percentage file
df = pd.DataFrame({'Image': names,'Cloud Percentage': percent})
outputdir = r'G:\MScThesis\waterQualityMonitoring\Data\GapFilling\testingPhase_France\Target_Image\summary_cloud_percent.xlsx'
df[8:13].to_excel(outputdir, index=False)
