# Regionalização de Bacias usando Redes Neurais

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')
# %%bash
# cp -r /content/drive/"My Drive"/DEHA/RegBaciasCE /content
# mkdir -p RegBaciasCE/src/models/NewFFNN3/CrossValid
# nvidia-smi

## Importing Packages

In [2]:
import sys
sys.path.append("..")
sys.path.append("../src")
sys.path.append("../scripts")
sys.path.append("../database")

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
#from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from funcoes_auxiliares import *
from tqdm import tqdm
from IPython.display import clear_output
from sklearn.utils import shuffle
import time
import matplotlib.pyplot as plt


In [4]:
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import Adam, Nadam, RMSprop

In [5]:
pd.options.display.max_columns = 999

import warnings

warnings.filterwarnings('ignore')


In [6]:
bacias_com_problema = [36220000]

In [7]:
dados = pd.read_csv("../database/NewFFNNData.csv").query(f"`Estações ANA` not in {bacias_com_problema}")\
                                .reset_index(drop=True)
bacias = list(set(dados['Estações ANA']))
bacias.sort()
print(len(dados))
dados.head()

9700


Unnamed: 0,Estações ANA,Declividade - D (%),Comprimento do rio principal - CT (km),Área de contribuição da estação fluviométrica - A (km2),Perímetro da área de contribuição da estação fluviométrica - P (km),Comprimento total de drenagem - CTD (km),Densidade de drenagem - DD (km-1),Curve number médio – CN (mm),Parcela da bacia no cristalino - Cr,E_0,P_2,P_1,P_0,Q_0
0,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,130.0,133.23,66.066,204.29,7.884968
1,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,118.0,66.066,204.29,135.78,6.1386
2,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,121.889904,204.29,135.78,142.92,3.870129
3,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,129.330288,135.78,142.92,12.508,2.136953
4,34730000,0.069435,64319.016213,897.371509,211.282396,528.790311,0.589266,56.773914,0.0,151.559616,142.92,12.508,3.0347,1.040503


In [8]:
drop_columns = ['Estações ANA',
       'Declividade - D (%)',
      #  'Comprimento do rio principal - CT (km)',
       'Comprimento total de drenagem - CTD (km)',
       'Perímetro da área de contribuição da estação fluviométrica - P (km)',
       'Densidade de drenagem - DD (km-1)',
       'E_2', 'E_1', 'Q_2', 'Q_1', 
       'Q_0']


columns = [
#        'Comprimento do rio principal - CT (km)',
       'Área de contribuição da estação fluviométrica - A (km2)',
       'Comprimento total de drenagem - CTD (km)',
       'Curve number médio – CN (mm)', 'Parcela da bacia no cristalino - Cr',
       'P_2', 'P_1', 'P_0','E_0',
        #'Q_2','Q_1'
]

In [9]:
#X = dados.drop(drop_columns,axis=1)
X = dados[columns]
Y = dados['Q_0'].values

#scaler = StandardScaler()
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
X = pd.DataFrame(X,columns=columns)

In [10]:
print(len(X))
X.head()

9700


Unnamed: 0,Área de contribuição da estação fluviométrica - A (km2),Comprimento total de drenagem - CTD (km),Curve number médio – CN (mm),Parcela da bacia no cristalino - Cr,P_2,P_1,P_0,E_0
0,0.019908,0.011969,0.0,0.0,0.203116,0.107835,0.333448,0.437452
1,0.019908,0.011969,0.0,0.0,0.100721,0.333448,0.221624,0.348627
2,0.019908,0.011969,0.0,0.0,0.311451,0.221624,0.233278,0.37742
3,0.019908,0.011969,0.0,0.0,0.207004,0.233278,0.020416,0.432495
4,0.019908,0.011969,0.0,0.0,0.217889,0.020416,0.004953,0.597038


### Definição do modelo

In [11]:
# Parâmetros
INIT_LR = 0.001
EPOCHS = 250
BS = 64  # Diminua o tamanho dos lotes se você não tiver memória suficiente
SEEDS = [1500, 4, 30, 6000, 400, 648, 900,10000, 2000, 5432]
SEED=SEEDS[3]
print("SEED: %s" % SEED)
n_input = X.shape[1]
n_classes = 1

n_h1 = 50
n_h2 = n_h1
n_h3 = 10
n_h4 = 10
n_h5 = 1


SEED: 6000


In [12]:
main_input = Input(shape = (n_input,),dtype='float32', name='main_input')
x = Dense(n_h1,activation ='relu',name='hidden_layer1')(main_input)
# x = Dropout(0.05)(x)
x = Dense(n_h2,activation ='relu',name='hidden_layer2')(x)
# x = Dropout(0.05)(x)

y = Dense(1,activation ='linear',name='streamflow')(x)


np.random.seed(SEED)
tf.random.set_seed(SEED)
model = Model(inputs=[main_input], outputs=[y])

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
main_input (InputLayer)      [(None, 8)]               0         
_________________________________________________________________
hidden_layer1 (Dense)        (None, 50)                450       
_________________________________________________________________
hidden_layer2 (Dense)        (None, 50)                2550      
_________________________________________________________________
streamflow (Dense)           (None, 1)                 51        
Total params: 3,051
Trainable params: 3,051
Non-trainable params: 0
_________________________________________________________________


In [13]:
#Adam, Nadam, RMSprop

opt = RMSprop(learning_rate=INIT_LR)
# opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# opt = Nadam(learning_rate=INIT_LR, beta_1=INIT_LR / EPOCHS, beta_2=0.999, epsilon=1e-07)
#opt = 'Adadelta'

In [14]:
metrics = NSE_K#tf.keras.metrics.MeanSquaredError()
loss = RMSE_Keras#tf.keras.losses.MeanSquaredError()

In [15]:
relu = lambda x: max(x,0)

In [16]:
model_dir = '../src/models/legacy/Donut'
# model_dir = '../src/models/legacy/saved_models/Ivy'
model_code = '3'


In [17]:
nome, n_obs,nse, cor, rmse = [], [], [], [], []

for bacia in bacias:
    idx = dados['Estações ANA'] == bacia

    X_treino, Y_treino = X[~idx], Y[~idx]
    X_teste, Y_teste = X[idx], Y[idx]

    
    model.load_weights(os.path.join(model_dir,'weights.Model%s_%d.hdf5'%(model_code,bacia)))
    model.compile(loss=[loss],optimizer = opt,
                metrics=[metrics])


    Y_ = model.predict(X_teste)

    Y_Prev = np.vectorize(relu)(Y_)


    Y_test = np.expand_dims(Y_teste,1)

    k_ = np.mean(Y_test) / np.mean(Y_Prev)

    nome.append(bacia)
    cor.append(np.corrcoef(Y_Prev.T,Y_test.T)[1][0])
    rmse.append(RMSE(Y_test,Y_Prev))
    nse.append(NASH(Y_test,Y_Prev))
    n_obs.append(len(Y_teste))


pd.DataFrame({"Bacia":nome, "Observacoes":n_obs, "NSE":nse,"Cor": cor,"RMSE": rmse})

Unnamed: 0,Bacia,Observacoes,NSE,Cor,RMSE
0,34730000,475,-4.854282,0.720901,17.753341
1,34740000,92,0.440019,0.861527,4.69399
2,34750000,514,0.880909,0.938893,33.377022
3,35050000,115,0.477216,0.732109,6.215669
4,35125000,328,0.90252,0.964911,5.991441
5,35170000,462,0.86314,0.934791,21.98704
6,35210000,578,0.874079,0.943814,8.836568
7,35223000,130,0.099686,0.733257,71.982945
8,35240000,421,0.508759,0.800892,12.754612
9,35260000,504,0.714187,0.899843,14.502013


### Performance dos modelos

In [18]:
results = pd.DataFrame({"Bacia":nome, "Observacoes":n_obs, "NSE":nse,"Cor": cor,"RMSE": rmse})


In [19]:
results["NSE"].apply(relu).mean()#[results.Bacia != 36220000]


0.6649716768565879

In [20]:
results[results["Bacia"] != 35668000]["NSE"].apply(relu).mean()
results.query(f"""Bacia not in {bacias_com_problema+[35668000]}""")["NSE"].apply(relu).mean()

0.687322949828226

In [21]:
bacias_barros_2013 = (34750000, 35050000, 35125000, 35170000, 35210000,
                      35260000, 35263000, 35880000, 35950000, 36020000,
                      36125000, 36130000, 36160000, 36210000, 36250000,
                      36270000, 36290000, 36520000)
results_barros_2013 = results.query(f"""Bacia in {bacias_barros_2013}
""")["NSE"].apply(relu).values.mean()


results_barros_2013

0.7960238639498247

In [22]:
results.to_csv("../Database/output/ResultadosFFNN3.csv",index=False)