# Regionalização de Bacias usando Redes Neurais

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')
# %%bash
# cp -r /content/drive/"My Drive"/DEHA/RegBaciasCE /content
# mkdir -p RegBaciasCE/src/models/NewFFNN3/CrossValid
# nvidia-smi

## Importing Packages

In [2]:
import sys
sys.path.append("..")
sys.path.append("../src")
sys.path.append("../scripts")
sys.path.append("../database")

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
#from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from funcoes_auxiliares import *
from tqdm import tqdm
from IPython.display import clear_output
from sklearn.utils import shuffle
import time
import matplotlib.pyplot as plt


In [4]:
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import Adam, Nadam, RMSprop

In [5]:
pd.options.display.max_columns = 999

import warnings

warnings.filterwarnings('ignore')


In [6]:
bacias_com_problema = [36220000]

In [7]:
# Loading the data
dados = pd.read_csv("../database/Formated inputs/NewLSTMData.csv").query(f"`Estações ANA` not in {bacias_com_problema}")\
                                .reset_index(drop=True)

# Get all basins by name and sort it
bacias = list(set(dados['Estações ANA']))
bacias.sort()

# Print the number instances
print(len(dados))
dados.head()

9697


Unnamed: 0,Estações ANA,Declividade - D (%),Comprimento do rio principal - CT (km),Área de contribuição da estação fluviométrica - A (km2),Perímetro da área de contribuição da estação fluviométrica - P (km),Comprimento total de drenagem - CTD (km),Densidade de drenagem - DD (km-1),Curve number médio – CN (mm),Parcela da bacia no cristalino - Cr,E_0,P_5,P_4,P_3,P_2,P_1,P_0,Q_0
0,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,130.0,5.1173,7.3561,31.818,133.23,66.066,204.29,7.884968
1,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,118.0,7.3561,31.818,133.23,66.066,204.29,135.78,6.1386
2,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,121.889904,31.818,133.23,66.066,204.29,135.78,142.92,3.870129
3,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,129.330288,133.23,66.066,204.29,135.78,142.92,12.508,2.136953
4,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,151.559616,66.066,204.29,135.78,142.92,12.508,3.0347,1.040503


In [8]:
drop_columns = ['Estações ANA',
                'Declividade - D (%)',
                'Área de contribuição da estação fluviométrica - A (km2)',
                'Comprimento total de drenagem - CTD (km)',
                'Curve number médio – CN (mm)', 'Parcela da bacia no cristalino - Cr',
                # 'Comprimento do rio principal - CT (km)',
                'Comprimento total de drenagem - CTD (km)',
                'Perímetro da área de contribuição da estação fluviométrica - P (km)',
                'Densidade de drenagem - DD (km-1)',
                'E_2', 'E_1', 'Q_2', 'Q_1', 
                'Q_0']


columns = [
        'P_5', 'P_4', 'P_3', 
        'P_2', 'P_1', 'P_0',
 
]

In [9]:

col = columns#[::-1]
X = dados[col]
Y = dados['Q_0'].values


scaler = MinMaxScaler()
X = scaler.fit_transform(X)
X = pd.DataFrame(X,columns=columns)

In [10]:
print(len(X))
X.head()

9697


Unnamed: 0,P_5,P_4,P_3,P_2,P_1,P_0
0,0.007794,0.011204,0.048463,0.203116,0.107835,0.333448
1,0.011204,0.048463,0.202927,0.100721,0.333448,0.221624
2,0.048463,0.202927,0.100628,0.311451,0.221624,0.233278
3,0.202927,0.100628,0.311162,0.207004,0.233278,0.020416
4,0.100628,0.311162,0.206811,0.217889,0.020416,0.004953


### Definição do modelo

In [11]:
# Parâmetros
INIT_LR = 0.001
EPOCHS = 250
BS = 64  # Diminua o tamanho dos lotes se você não tiver memória suficiente

SEEDS = [1500, 4, 30, 6000, 400, 648, 900,10000, 2000, 5432]
SEED=SEEDS[3]
print("SEED: %s" % SEED)

n_input = X.shape[1]
n_classes = 1

n_h1 = 30
n_h2 = n_h1



SEED: 6000


In [12]:
# Defining model
# Input Layer
main_input = Input(shape = (n_input,),dtype='float32', name='main_input')

# FFNN Layers (a.k.a Dense Layers)
x = Dense(n_h1,activation ='relu',name='hidden_layer1')(main_input)
x = Dense(n_h2,activation ='relu',name='hidden_layer2')(x)

# Output Layer
y = Dense(1,activation ='linear',name='streamflow')(x)


np.random.seed(SEED)
tf.random.set_seed(SEED)
model = Model(inputs=[main_input], outputs=[y])

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 main_input (InputLayer)     [(None, 6)]               0         
                                                                 
 hidden_layer1 (Dense)       (None, 30)                210       
                                                                 
 hidden_layer2 (Dense)       (None, 30)                930       
                                                                 
 streamflow (Dense)          (None, 1)                 31        
                                                                 
Total params: 1,171
Trainable params: 1,171
Non-trainable params: 0
_________________________________________________________________


2022-02-22 15:07:02.166964: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [13]:
# Choosing the optmizer
opt = RMSprop(learning_rate=INIT_LR)


In [14]:
metrics = NSE_K
loss = RMSE_Keras

In [15]:
relu = lambda x: max(x,0)

In [16]:
if col == columns:
    # New
    model_dir = '../src/models/FFNN_ic/CrossValid_SEED_906_NSE_0724'
    model_code = '-ic_'
    model_base_name = "weights.NewFFN%s%d.hdf5"    
else:
    # Old
    model_dir = '../src/models/FFNN_ic/CrossValid_SEED_RANDOM_NSE_?'
    model_code = '_IC'
    model_base_name = 'weights.FFN%s%d.hdf5'


In [17]:
nome, n_obs,nse, cor, rmse = [], [], [], [], []

for bacia in bacias:
    idx = dados['Estações ANA'] == bacia

    X_, y_ = X[idx], Y[idx]
    X_teste, Y_teste = X_[int(0.8*len(X_)):], y_[int(0.8*len(y_)):]
    X_treino, Y_treino = X_[:int(0.8*len(X_))], y_[:int(0.8*len(y_))]
    
    model.load_weights(os.path.join(model_dir,model_base_name%(model_code,bacia)))
    model.compile(loss=[loss],optimizer = opt,
                metrics=[metrics])


    Y_ = model.predict(X_teste)

    Y_Prev = np.vectorize(relu)(Y_)


    Y_test = np.expand_dims(Y_teste,1)

    k_ = np.mean(Y_test) / np.mean(Y_Prev)

    nome.append(bacia)
    cor.append(np.corrcoef(Y_Prev.T,Y_test.T)[1][0])
    rmse.append(RMSE(Y_test,Y_Prev))
    nse.append(NASH(Y_test,Y_Prev))
    n_obs.append(len(Y_teste))


pd.DataFrame({"Bacia":nome, "Observacoes":n_obs, "NSE":nse,"Cor": cor,"RMSE": rmse})



Unnamed: 0,Bacia,Observacoes,NSE,Cor,RMSE
0,34730000,95,0.191834,0.748664,2.977733
1,34740000,19,0.198764,0.88308,3.039781
2,34750000,103,0.746883,0.923588,28.508029
3,35050000,23,0.680192,0.94198,2.762621
4,35125000,66,0.895971,0.971255,7.200379
5,35170000,93,0.93917,0.974722,10.069301
6,35210000,116,0.910415,0.958564,6.032973
7,35223000,26,0.069154,0.800922,156.25946
8,35240000,85,0.45452,0.830572,23.229543
9,35260000,101,0.769886,0.928661,14.157797


### Performance dos modelos

In [18]:
results = pd.DataFrame({"Bacia":nome, "Observacoes":n_obs, "NSE":nse,"Cor": cor,"RMSE": rmse})


In [19]:
results["NSE"].apply(relu).mean()#[results.Bacia != 36220000]


0.6316660867736277

In [20]:
results[results["Bacia"] != 35668000]["NSE"].apply(relu).mean()

results.query(f"""Bacia not in {bacias_com_problema+[35668000]}""")["NSE"].apply(relu).mean()

0.6445823734581492

In [21]:
bacias_barros_2013 = (34750000, 35050000, 35125000, 35170000, 35210000,
                      35260000, 35263000, 35880000, 35950000, 36020000,
                      36125000, 36130000, 36160000, 36210000, 36250000,
                      36270000, 36290000, 36520000)
results_barros_2013 = results.query(f"""Bacia in {bacias_barros_2013}
""")["NSE"].apply(relu).values.mean()


results_barros_2013

0.7240447753636627

In [22]:
results.query(f"""Bacia in {bacias_barros_2013}""")["NSE"]\
       .apply(relu)\
       .median()

0.7798090453487084

In [24]:
results.to_csv("../Database/output/Results/ResultadosFFNN_ic.csv",index=False)