In [1]:
import os
os.environ['PYTHONHASHSEED']=str(4)

import random
import pandas as pd
import numpy as np 
import rasterio as rio
from copy import deepcopy
from joblib import Parallel, delayed
from tqdm import tqdm
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.optimizers import Nadam, Adam
from keras.layers import Dense, Activation
from keras.wrappers.scikit_learn import KerasRegressor as ANN
from sklearn.neighbors import KNeighborsRegressor as KNN
from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold, cross_validate
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.colors as colors
from rasterio.plot import show

In [None]:
def set_random_seed(x):
    tf.random.set_seed(x) # Set the `tensorflow` pseudo-random generator at a fixed value
    np.random.seed(x)     # Set the `numpy` pseudo-random generator at a fixed value
    random.seed(x)        # Set the `python` built-in pseudo-random generator at a fixed value      

In [None]:
############################################################################
# Reproducibility is a Problem when using parallel processing  (n_jobs = 1)#
############################################################################ 
seed = 4
set_random_seed(seed) 

In [None]:
def build_model(learn_rate=0.01, units1=14,units2=12,activ_func1='sigmoid',activ_func2='sigmoid',activ_func3='sigmoid'):    
    model = Sequential()
    model.add(Dense(units1, kernel_initializer='uniform', activation=activ_func1, input_shape=(Nfeatures,))) 
    model.add(Dense(units2, kernel_initializer='uniform', activation=activ_func2))                           
    model.add(Dense(1, kernel_initializer='uniform', activation=activ_func3))
    optimizer = Adam(lr=learn_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name="Adam")
    model.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])
    return model

In [20]:
def getPixelValue(array,idx1,idx2,idx3):
    return array[idx1,idx2,idx3]

In [21]:
##########
# Prepare excel files containing all pixel values of best 2 S2 (including missing values = -99)
# Seperate None and valid pixels into 2 excel files
##########
# Without GF
##########

maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\preparedInputData'
maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\preparedInputData'
gf_folders = ['withoutGF']

for gf_folder in gf_folders:
    subdir1 = os.path.join(maindir1,gf_folder,'France')
    subdir2 = os.path.join(maindir2,gf_folder,'France')
    files_temp = [fileName for fileName in os.listdir(subdir1) if 'tiff' in fileName]
    for n in tqdm(range(len(files_temp))):
        file1 = files_temp[n]        
        img = rio.open(os.path.join(subdir1,file1)) # start by reading all layers
        arr = img.read()
        # Rank S2 scenes based on n° KP 
        indices = [i for i in range(1,36,9)]
        nb_KP = []
        for i in indices:
            temp_copy = deepcopy(arr[i])
            temp_copy[temp_copy==-99]=9.96921e+36
            nb_KP.append(len(np.argwhere(temp_copy<=1e+36).tolist()))        
        df1 = pd.DataFrame({'indices':indices,'nb_KP':nb_KP})
        df1.sort_values('nb_KP', inplace=True)  # order based on nb_KP and make changes to df permanent (order from worst to best)
        df1.reset_index(drop=True, inplace=True) # Drop old index and make changes to df permanent
        
        # Select reflectance layers associated with best 2 images
        name = 'Pixels_From_Best_2_S2_'+file1[7:15]
        l = list(df1[2:]['indices']) # get the best 2 S2
        # Create a new stacked array of layers to be used (from which we will extract coord of KP, UP, None pixels)
        arr_temp = np.expand_dims(arr[0], axis=0)
        for k1 in l:
            for k2 in range(k1,k1+9):
                arr_temp = np.append(arr_temp,np.expand_dims(arr[k2], axis=0),axis=0) # get 19 layers (1st layer is turbidity + 18 layers of best S2 images and associated combinations of bands )
        
        # Get all possible pixel coordinates for valid or none pixel values (=9.96921e+36)
        idX = []
        idY = [] 
        idX_none = []
        idY_none = []
        for idx in range(arr_temp.shape[1]):    # get all pixel coordinates
            for idy in range(arr_temp.shape[2]):
                if arr_temp[1,idx,idy] > 1e+36 or arr_temp[10,idx,idy] > 1e+36: # Exclude if pixel is none in one of the layers
                    idX_none.append(idx)                                            
                    idY_none.append(idy)
                else:
                    if arr_temp[1,idx,idy] ==-99 and arr_temp[10,idx,idy] ==-99: # Exclude if 2 S2 is UP
                        idX_none.append(idx)                                            
                        idY_none.append(idy)
                    else:                       # Save if 1 S2 is KP
                        idX.append(idx)                                      # 1: 1st best S2 image # 10: 2nd best S2 image 
                        idY.append(idy)                   
        
        # Store all pixel values (!=none) in an empty df            
        rows = ['L'+str(index) for index in range(len(arr_temp))]
        columns = [index for index in range(len(idX))]
        results = pd.DataFrame(index=rows, columns=columns)
        data = [] # It is recommended to collect data in a list of lists and then assign it to a df (Than modifying a df each iteration => time costly and prone to error of dtypes)
        for idxLayer in range(len(arr_temp)):
            pixelValues = Parallel(n_jobs=-1)(delayed(getPixelValue)(arr_temp,idxLayer,idX[k],idY[k]) for k in range(len(idX)))
            data.append(pixelValues)
        results = pd.DataFrame(data, index=rows, columns=columns).T
        results.insert(loc=0, column='idx', value=idX)   # Add coordinates to df (while specifying position)
        results.insert(loc=1, column='idy', value=idY)  
        # Store all pixel values (==none) in an empty df            
        results_none = pd.DataFrame({'idx_none':idX_none, 'idy_none':idY_none})
        
        # Export as excel files
        os.makedirs(subdir2, exist_ok=True)
        outputdir = os.path.join(subdir2, name+'.xlsx')
        results.to_excel(outputdir, encoding='utf-8')
        
        outputdir2 = os.path.join(subdir2,'coordsNonePixelValues'+str(n)+'.xlsx') 
        results_none.to_excel(outputdir2, encoding='utf-8', index=False) # The coords of none pixel values are the same # save them 1 time

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [19:12<00:00, 76.85s/it]


In [22]:
##########
# Prepare excel files containing all pixel values of best 2 S2 (including missing values = -99)
# Seperate None and valid pixels into 2 excel files
##########
# With GF
##########

maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\preparedInputData'
maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\preparedInputData'
gf_folders = ['withGF']

for gf_folder in gf_folders:
    subdir0 = os.path.join(maindir1,'withoutGF','France')    
    subdir1 = os.path.join(maindir1,gf_folder,'France')
    subdir2 = os.path.join(maindir2,gf_folder,'France')
    files_temp = [fileName for fileName in os.listdir(subdir1) if 'tiff' in fileName]
    for n in tqdm(range(len(files_temp))):
        file1 = files_temp[n]  
        arr0 = rio.open(os.path.join(subdir0,file1)).read()
        img = rio.open(os.path.join(subdir1,file1)) # start by reading all layers
        arr = img.read()
        # Rank S2 scenes based on n° KP 
        indices = [i for i in range(1,36,9)]
        nb_KP = []
        for i in indices:
            temp_copy = deepcopy(arr0[i])
            temp_copy[temp_copy==-99]=9.96921e+36
            nb_KP.append(len(np.argwhere(temp_copy<=1e+36).tolist()))        
        df1 = pd.DataFrame({'indices':indices,'nb_KP':nb_KP})
        df1.sort_values('nb_KP', inplace=True)  # order based on nb_KP and make changes to df permanent (order from worst to best)
        df1.reset_index(drop=True, inplace=True) # Drop old index and make changes to df permanent
        
        # Select reflectance layers associated with best 2 images
        name = 'Pixels_From_Best_2_S2_'+file1[7:15]
        l = list(df1[2:]['indices']) # get the best 2 S2
        # Create a new stacked array of layers to be used (from which we will extract coord of KP, UP, None pixels)
        arr_temp = np.expand_dims(arr[0], axis=0)
        for k1 in l:
            for k2 in range(k1,k1+9):
                arr_temp = np.append(arr_temp,np.expand_dims(arr[k2], axis=0),axis=0) # get 19 layers (1st layer is turbidity + 18 layers of best S2 images and associated combinations of bands )
        
        # Get all possible pixel coordinates for valid or none pixel values (=9.96921e+36)
        idX = []
        idY = [] 
        idX_none = []
        idY_none = []
        for idx in range(arr_temp.shape[1]):    # get all pixel coordinates
            for idy in range(arr_temp.shape[2]):
                if arr_temp[1,idx,idy] > 1e+36 or arr_temp[10,idx,idy] > 1e+36: # Exclude if pixel is none in one of the layers
                    idX_none.append(idx)                                            
                    idY_none.append(idy)
                else:
                    idX.append(idx)                                      # 1: 1st best S2 image # 10: 2nd best S2 image 
                    idY.append(idy)                   
        
        # Store all pixel values (!=none) in an empty df            
        rows = ['L'+str(index) for index in range(len(arr_temp))]
        columns = [index for index in range(len(idX))]
        results = pd.DataFrame(index=rows, columns=columns)
        data = [] # It is recommended to collect data in a list of lists and then assign it to a df (Than modifying a df each iteration => time costly and prone to error of dtypes)
        for idxLayer in range(len(arr_temp)):
            pixelValues = Parallel(n_jobs=-1)(delayed(getPixelValue)(arr_temp,idxLayer,idX[k],idY[k]) for k in range(len(idX)))
            data.append(pixelValues)
        results = pd.DataFrame(data, index=rows, columns=columns).T
        results.insert(loc=0, column='idx', value=idX)   # Add coordinates to df (while specifying position)
        results.insert(loc=1, column='idy', value=idY)  
        # Store all pixel values (==none) in an empty df            
        results_none = pd.DataFrame({'idx_none':idX_none, 'idy_none':idY_none})
        
        # Export as excel files
        os.makedirs(subdir2, exist_ok=True)
        outputdir = os.path.join(subdir2, name+'.xlsx')
        results.to_excel(outputdir, encoding='utf-8')
        
        outputdir2 = os.path.join(subdir2,'coordsNonePixelValues'+str(n)+'.xlsx') 
        results_none.to_excel(outputdir2, encoding='utf-8', index=False) # The coords of none pixel values are the same # save them 1 time

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [17:18<00:00, 69.21s/it]


In [23]:
###############################
# generate new turbidity maps #
###############################

# predict turbidity using all training dataset
maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\preparedInputData'
maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\preparedInputData'
maindir3 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\predictedTurbidity'
gf_folders = ['withoutGF', 'withGF']
    
for gf_folder in gf_folders:    
    subdir1 = os.path.join(maindir1,gf_folder,'France')
    subdir2 = os.path.join(maindir2,gf_folder,'France')
    subdir3 = os.path.join(maindir3,gf_folder,'France')
    files_temp = [fileName for fileName in os.listdir(subdir1) if 'tiff' not in fileName]        

    for n in tqdm(range(len(files_temp))):
        file1 = files_temp[n]
        for file2 in os.listdir(os.path.join(subdir1,file1)): 
            if 'Best_2' in file2:
                ############### Read all training dataset (without splitting) ###############
                # first train the model with the previously prepared training set. Then, apply the model to predict turbidity in whole study area #
                excel_file = pd.read_excel(os.path.join(subdir1,file1,file2)) # step0: Read and split data
                y = np.array(excel_file['L0'].values,dtype=np.float).reshape(-1,1)                        # Target data
                excel_file.drop(['Unnamed: 0','idx','idy','L0'], axis=1,inplace=True)
                features = ['L'+str(i) for i in range(1,len(excel_file.columns)-3)]
                X = excel_file.values                
#                 y = MinMaxScaler().fit_transform(y) # Data Normalization is not necessary for random forests
#                 X = MinMaxScaler().fit_transform(X) # This will save us the time of invert normalization afterwards

                Nfeatures = X.shape[1]
                y = y.ravel() # flatten to 1d array # data is in a column format while it expected it in a row.
                ############### Read all pixel values in 2 S2 images to predict corresponding turbidity values ###############
                excel_file2 = pd.read_excel(os.path.join(subdir2,file2)) 
                idx = np.array(excel_file2['idx'].values,dtype=np.float).reshape(-1,1)                        
                idy = np.array(excel_file2['idy'].values,dtype=np.float).reshape(-1,1)
                excel_file2.drop(['Unnamed: 0','idx','idy','L0'], axis=1,inplace=True)
                features2 = ['L'+str(i) for i in range(1,len(excel_file2.columns)-3)]
                S2_values = excel_file2.values                
#                 S2_values = MinMaxScaler().fit_transform(S2_values) # Data Normalization
                ############### Predict turbidity using RF ()############### 
                model = RFR(n_estimators=500, max_features=int(len(features)/3.0), max_depth=25, random_state=seed)                    
                model.fit(X, y)
                y_pred = model.predict(S2_values)
                results = pd.DataFrame({'idx':idx.ravel(), 'idy':idy.ravel(), 'predTur':y_pred.ravel()})

                # step7: Export as excel files
                outputdir2 = os.path.join(subdir3,'Tur_'+file1+'.xlsx')
                results.to_excel(outputdir2, encoding='utf-8', index=False) 


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [10:42<00:00, 42.82s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [10:55<00:00, 43.67s/it]


In [24]:
maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\preparedInputData'
maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\predictedTurbidity'
gf_folders = ['withoutGF', 'withGF']
    
for gf_folder in gf_folders:
    subdir1 = os.path.join(maindir1,gf_folder,'France')
    subdir2 = os.path.join(maindir2,gf_folder,'France')
    files_temp = [fileName for fileName in os.listdir(subdir1) if 'None' in fileName]
    for n in tqdm(range(len(files_temp))):
        file1 = files_temp[n]
        # Read file: coordsNonePixelValues
        excel_file1 = pd.read_excel(os.path.join(subdir1,file1)) # step0: Read and split data
        idx_temp1 = list(excel_file1['idx_none'])
        idy_temp1 = list(excel_file1['idy_none'])
        noneValues = []
        for i in range(excel_file1.shape[0]):
            noneValues.append(9.96921e+36)
        
        df = pd.DataFrame({'idx':idx_temp1, 'idy':idy_temp1, 'predTur':noneValues})
        # step7: Export as excel files
        outputdir2 = os.path.join(subdir2,file1)
        df.to_excel(outputdir2, encoding='utf-8', index=False)

100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [03:47<00:00, 14.19s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [03:34<00:00, 13.39s/it]


In [25]:
# Read predicted turbidity pixel values and add the none values to it
maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\predictedTurbidity'
maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\preparedInputData'
gf_folders = ['withoutGF', 'withGF']
    
for gf_folder in gf_folders:
    subdir1 = os.path.join(maindir1, gf_folder, 'France')
    subdir2 = os.path.join(maindir2, gf_folder, 'France')
    files_temp1 = [fileName for fileName in os.listdir(subdir1) if ('Tur' in fileName)and('tiff' not in fileName)]  

    for n in tqdm(range(len(files_temp1))):
        file1 = files_temp1[n]
        excel_file1 = pd.read_excel(os.path.join(subdir1,file1))
        idx_temp1 = list(excel_file1['idx'])
        idy_temp1 = list(excel_file1['idy'])
        predTur_temp1 = list(excel_file1['predTur'])
        
        file2 = 'coordsNonePixelValues'+str(n)+'.xlsx'        
        excel_file2 = pd.read_excel(os.path.join(subdir1,file2))        
        idx_temp2 = list(excel_file2['idx'])
        idy_temp2 = list(excel_file2['idy'])
        predTur_temp2 = list(excel_file2['predTur'])
        
        idx = idx_temp1+idx_temp2
        idy = idy_temp1+idy_temp2
        predTur = predTur_temp1+predTur_temp2

        results = pd.DataFrame({'idx':idx, 'idy':idy, 'predTur':predTur})
        results.sort_values(by=['idx', 'idy'], ascending=True, inplace=True) # Sort Values by idx then by idy

        rowsList = results['idx']
        colList = results['idy']
        turList = results['predTur']

        file3 = file1[4:12]        
        img = rio.open(os.path.join(subdir2,'merged_'+file3+'.tiff')) # start by reading all layers
        arr = img.read()
                
        ######## Update Array ########  
        # Export as images 
        temp_copy1 = deepcopy(arr[0]) # retain layer as actual turbidity
        outputdir1 = os.path.join(subdir1, 'actual_'+file1[:-5]+'.tiff')
        with rio.open(outputdir1,'w',driver='Gtiff', width=img.width, height=img.height, 
                            count=1,crs=img.crs,transform=img.transform, dtype='float32', nodata=9.96921e+36) as newImg:
            newImg.write(temp_copy1,1)
            newImg.close()
        
        temp_copy2 = deepcopy(arr[0]) # to be filled with predicted turbidity
        for item in range(len(rowsList)):
            temp_copy2[int(rowsList[item]),int(colList[item])] = turList[item]
        outputdir2 = os.path.join(subdir1, 'predicted_'+file1[:-5]+'.tiff')
        with rio.open(outputdir2,'w',driver='Gtiff', width=img.width, height=img.height, 
                            count=1,crs=img.crs,transform=img.transform, dtype='float32', nodata=9.96921e+36) as newImg:
            newImg.write(temp_copy2,1)
            newImg.close()

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [05:17<00:00, 21.17s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [05:16<00:00, 21.13s/it]


In [3]:
## https://gist.github.com/bshishov/5dc237f59f019b26145648e2124ca1c9

EPSILON = 1e-10

def _error(actual: np.ndarray, predicted: np.ndarray):
    """ Simple error """
    return actual - predicted
def _absolute_error(actual: np.ndarray, predicted: np.ndarray):
    """ absolute error """
    return abs(actual - predicted)
def _percentage_error(actual: np.ndarray, predicted: np.ndarray):
    """
    Percentage error
    Note: result is NOT multiplied by 100
    """
    return (actual - predicted) / (actual + EPSILON)
def error(actual: np.ndarray, predicted: np.ndarray):
    """ Simple error """
    return np.mean(_error(actual, predicted))
def percentage_error(actual: np.ndarray, predicted: np.ndarray):
    """
    Percentage error
    Note: result is NOT multiplied by 100
    """
    return np.mean((actual - predicted)/(actual + EPSILON))
    
def mse(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Squared Error """
    return np.mean(np.square(_error(actual, predicted)))
def mdape(actual: np.ndarray, predicted: np.ndarray):
    """
    Median Absolute Percentage Error
    """
    return np.median(np.abs(_percentage_error(actual, predicted)))
def R2_score(actual: np.ndarray, predicted: np.ndarray):
    return r2_score(actual, predicted)

METRICS = {
    'mse': mse,
    'mdape': mdape, # less affected by outliers
    '_error':_error,
    '_percentage_error':_percentage_error,
    'error':error,
    'percentage_error':percentage_error,
    'R2_score':R2_score,
}

def evaluate(actual: np.ndarray, predicted: np.ndarray, metrics=('mse', 'mdape', '_error','_percentage_error', 'error','percentage_error','R2_score')):
    results = {}
    for name in metrics:
        try:
            results[name] = METRICS[name](actual, predicted)
        except Exception as err:
            results[name] = np.nan
            print('Unable to compute metric {0}: {1}'.format(name, err))
    return results

def evaluate_all(actual: np.ndarray, predicted: np.ndarray):
    return evaluate(actual, predicted, metrics=set(METRICS.keys()))

In [27]:
maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\predictedTurbidity'
maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\slope'
gf_folders = ['withoutGF', 'withGF']

gf = []
d = []
err = []
errP = []
MdAPE = []
MSE = []
R2score = []

for gf_folder in gf_folders:
    subdir1 = os.path.join(maindir1,gf_folder,'France')
    files_temp = [fileName for fileName in os.listdir(subdir1) if ('tiff' in fileName) and ('actual' in fileName) ]
    
    for n in tqdm(range(len(files_temp))):
        img = rio.open(os.path.join(subdir1,'actual_Tur_20190121.tiff'))        
        file1 = files_temp[n]        
        actual = rio.open(os.path.join(subdir1,file1)).read(1)
        predicted = rio.open(os.path.join(subdir1,'predicted_'+file1[7:])).read(1)

        actualValues = []
        predValues = []
        for i in range(actual.shape[0]):  # Exclude none values from this analysis
            for j in range(actual.shape[1]):
                if actual[i,j]<10000 and actual[i,j]>-90 and predicted[i,j]<10000 and predicted[i,j]>-90:
                    actualValues.append(actual[i,j])
                    predValues.append(predicted[i,j])
        # Use error metrics that do not penalize large differences between actual and predicted
        errorMetrics = evaluate(np.array(actualValues,dtype=np.float64), np.array(predValues,dtype=np.float64), metrics=('error', 'percentage_error','mdape','mse','R2_score'))
    
        gf.append(gf_folder)
        d.append(file1[11:19])
        err.append(round(errorMetrics['error'],4))
        errP.append(round(100*errorMetrics['percentage_error'],2))
        MdAPE.append(round(100*errorMetrics['mdape'],2))
        MSE.append(round(errorMetrics['mse'],4))
        R2score.append(round(100*errorMetrics['R2_score'],2))
df = pd.DataFrame({'gf':gf,'d':d,'err':err, 'errP':errP, 'MdAPE':MdAPE,'MSE':MSE,'R2score':R2score})
df

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:17<00:00,  1.18s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:17<00:00,  1.19s/it]


Unnamed: 0,gf,d,err,errP,MdAPE,MSE,R2score
0,withoutGF,20190121,0.0012,-3.57,8.76,0.1512,97.84
1,withoutGF,20190211,0.2876,-62.45,45.28,10.9013,14.78
2,withoutGF,20190311,-0.0018,-4.19,9.9,0.9537,71.77
3,withoutGF,20190411,0.0577,-3.37,11.2,0.2112,88.81
4,withoutGF,20190511,-0.0063,-3.24,8.26,0.1005,94.88
5,withoutGF,20190611,0.0079,-19.19,19.14,1.693,33.8
6,withoutGF,20190711,-0.0088,-18.09,19.4,0.4316,31.98
7,withoutGF,20190811,-0.0265,-14.74,14.37,2.053,7.79
8,withoutGF,20190911,-0.0008,-2.83,7.74,0.0258,54.43
9,withoutGF,20191011,-0.1051,-17.55,16.7,0.1093,87.65


In [4]:
# generate over/under estimation maps (for areas that have been gap filled display None)
# in NTU 
# Need to exclude pixels where turbidity is none while S2 is known

maindir1 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\predictedTurbidity'
maindir2 = r'G:\MScThesis\waterQualityMonitoring\Data\MLearning\massProduction\slope'
gf_folders = ['withoutGF', 'withGF']

for gf_folder in gf_folders:
    subdir1 = os.path.join(maindir1,gf_folder,'France')
    files_temp = [fileName for fileName in os.listdir(subdir1) if ('tiff' in fileName) and ('actual' in fileName) ]
    
    for n in tqdm(range(len(files_temp))):
        img = rio.open(os.path.join(subdir1,'actual_Tur_20190121.tiff'))        
        file1 = files_temp[n]        
        actual = rio.open(os.path.join(subdir1,file1)).read(1)
        predicted = rio.open(os.path.join(subdir1,'predicted_'+file1[7:])).read(1)
        
        arrayBias = 100*(actual-predicted)/(actual+ EPSILON)
        for i in range(actual.shape[0]):  # Exclude none values from this analysis
            for j in range(actual.shape[1]):
                if actual[i,j] > 1e+36 or predicted[i,j] > 1e+36:
                    arrayBias[i,j] = actual[i,j]
        
        # Export as image
        outputdir1 = os.path.join(maindir2,gf_folder,'France','biasArray'+file1[7:19]+'.tiff')
        with rio.open(outputdir1,'w',driver='Gtiff', width=img.width, height=img.height, 
                            count=1,crs=img.crs,transform=img.transform, dtype='float32', nodata=9.96921e+36) as newImg:
            newImg.write(arrayBias,1)
            newImg.close()

  arrayBias = 100*(actual-predicted)/(actual+ EPSILON)
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:25<00:00,  1.68s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:21<00:00,  1.45s/it]
