# IA para espacialização dos dados

In [91]:
import os
import rasterio
import numpy as np
import pandas as pd
import asciichartpy
import seaborn as sns
import geopandas as gpd
import rioxarray as rxr
import matplotlib.pyplot as plt

from tqdm import tqdm, trange
from pyproj import Transformer
from shapely import Point, distance
from rasterio.transform import rowcol
from IPython.display import clear_output
from utils.consts import SOIL_TYPES, USO_SOLO_CLASS
from utils import Infiltrometro

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau, CyclicLR
from torch.utils.data import TensorDataset, Dataset, DataLoader, random_split

### Device disponível para treinar o modelo MLP

In [92]:
# 1) Dispositivo (GPU se disponível)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cuda


### Dataset com os dados para convolução
---
- O Dataset nesta aplicação é extremamente importante pois irá englobar todas as informações necessárias para espacializar os dado de infiltração

In [93]:
class MeuDataset(Dataset):

    def __init__(self, device:torch.device|None=None, eval=False):

        self.eval = eval
        self.device = device

        # Lendo Tabelas
        print("Lendo Tabelas")
        infil = pd.read_excel(r"D:\Mestrado\Trabalho Final\Dados\Levantamento em Campo\Compiled.xlsx", sheet_name="Infiltracao")
        self.infil = Infiltrometro(infil)
        self.Ks = self.infil.Ks(is_print=False)["Ks"].values.astype(np.float64) # type:ignore
        self.Ks = torch.tensor(self.Ks, device=self.device)

    def __len__(self):
        return len(self.infil.infiltrations)

    def __getitem__(self, i):
        idx = i
        if isinstance(i, (int, float)):
            idx = [i]

        sand = torch.tensor(self.infil.infiltrations["Sand"].values[idx], device=self.device) # type:ignore
        silt = torch.tensor(self.infil.infiltrations["Silt"].values[idx], device=self.device) # type:ignore
        clay = torch.tensor(self.infil.infiltrations["Clay"].values[idx], device=self.device) # type:ignore

        # Demais dados
        Ks = (-torch.log10(self.Ks[idx]))*1000 # type:ignore
        X = torch.stack([sand, silt, clay], dim=1)

        if isinstance(i, (int, float)):
            X = X[0]
            Ks = Ks[0]


        return (X, ), Ks
    
dataset = MeuDataset(device=device)

print("Len:", len(dataset))
dataset[15][0][0].shape, dataset[1:10][0][0].shape

Lendo Tabelas
[ True  True  True  True  True  True  True  True  True False False False
 False False False False False False False False False]


  (c1, c2), covariance = curve_fit(self._equation_infiltration, t, I)


Len: 84


(torch.Size([3]), torch.Size([9, 3]))

### Configurações do treino
---

- Seed para números aleatórios
- % de treino e teste
- Métricas
- Epochs
- Batch Size

In [94]:
# Seed para permitir reprodutibilidade dos valores pseudo-aleatórios
seed = 42

# Porcentagens de Treino e Teste
train_percent = 80
test_percent  = 20

# BatchSize e Epochs
batch_size = 2
epochs     = 1000  # Poucos pontos, verificar overfitting

In [95]:
n = len(dataset)
n_train = int(train_percent*n/100)
n_test = n - n_train
g = torch.Generator().manual_seed(seed)

train_ds, test_ds = random_split(dataset, [n_train, n_test], generator=g)

print("N Total:", n, "N Train:", n_train, "N Teste:", n_test)

train_ds[1:5][0][0].shape, test_ds[1:5][0][0].shape # type: ignore

N Total: 84 N Train: 67 N Teste: 17


(torch.Size([4, 3]), torch.Size([4, 3]))

### MLP e CNN
---

MLP configurada com uma CNN também

In [96]:
# Modelo (MLP e CNN)
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.fc = nn.Sequential(
            nn.Linear(3, 16),
            nn.Sigmoid(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
        ).to(dtype=torch.float64)

    def forward(self, values):
        x = self.fc(values)
        return x

mlp = MLP().to(device=device)

X = dataset[1:4][0]
mlp(*X)

tensor([[-0.3167],
        [-0.3579],
        [-0.3483]], device='cuda:0', dtype=torch.float64,
       grad_fn=<AddmmBackward0>)

### Ajustar o modelo MLP

- Processos para ajustar a MLP pelo método do gradiente descendente

In [97]:
def nse(y_pred, y_true, mean=None):
    if mean is not None:
        y_true_mean = mean
    else:
        y_true_mean = torch.mean(y_true)
        
    numerator = torch.sum((y_pred - y_true) ** 2)
    denominator = torch.sum((y_true - y_true_mean) ** 2)
    return 1 - (numerator / denominator)

In [98]:
os.makedirs("best_model/", exist_ok=True)

# Média dos valores para compara erros
mean = dataset[:][1].mean()

model = MLP().to(device=device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = CyclicLR(
    optimizer,
    base_lr=0.0001,      # menor LR
    max_lr=0.1,          # maior LR
    step_size_up=100,    # número de iterações até atingir o max_lr
    mode='exp_range',   # 'triangular', 'triangular2' ou 'exp_range'
    cycle_momentum=True  # necessário se usar Adam em vez de SGD
)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=batch_size)

best_test_loss = -torch.inf
best_state = None

train_losses = []
val_losses = []

for epoch in range(1, epochs+1):
    # --- treino ---
    model.train()
    running_loss = 0.0
    for x, y in train_loader:

        optimizer.zero_grad()
        y_sim = model(*x)

        loss = criterion(y_sim[:, 0], y)
        running_loss += loss.item() * x[0].size(0)

        loss.backward()
        optimizer.step()
    
    train_loss = running_loss / len(train_loader.dataset) # type: ignore
    train_losses.append(train_loss)

    # --- validação ---
    model.eval()
    val_loss = 0.0
    nash = 0.0

    with torch.no_grad():
        for x, y in test_loader:
            # Forward
            y_sim = model(*x)

            loss = criterion(y_sim[:, 0], y)
            val_loss += loss.item() * x[0].size(0)

            ns = nse(y_sim[:, 0], y, mean)
            nash += ns.item() * x[0].size(0)

    # Média do loss
    val_loss /= len(test_loader.dataset) # type: ignore
    nash /= len(test_loader.dataset)     # type: ignore
    val_losses.append(val_loss)

    # early stopping simples
    current_lr = optimizer.param_groups[0]['lr']
    if nash > best_test_loss and epoch > 100:
        best_test_loss = nash
        best_state = {
            "lr":current_lr,
            "epoch": epoch,
            "epochs":epochs,
            "nash":nash,
            "val_loss":val_loss,
            "train_loss":train_loss,
            "batch_size":batch_size,
            "model_state":model.state_dict(),
        }

        torch.save(best_state, f"best_model/{str(nash).replace(".", "_")}.pth")

    # Altero a taxa de aprendizado a cada fim da epoch para melhorar o aprendizado
    scheduler.step()

    #Print das métricas atuais

    indices = np.linspace(0, len(val_losses) - 1, 150, dtype=int)
    subset = [val_losses[i] for i in indices]

    ascii_chart = asciichartpy.plot(subset, {'height': 15})

    # Limpo o terminal
    clear_output(wait=True)
    print(f"Epoch {epoch}/{epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | NASH: {(nash):.6f} | At lr:{current_lr:.4e}\n{ascii_chart}")


Epoch 176/1000 | Train Loss: 457881.3587 | Val Loss: 425244.4266 | NASH: -0.016114 | At lr:2.5075e-02
2403348.12  ┤
2274466.30  ┼──╮
2145584.48  ┤  ╰╮
2016702.67  ┤   │
1887820.85  ┤   │
1758939.03  ┤   │
1630057.22  ┤   │
1501175.40  ┤   │
1372293.58  ┤   ╰╮
1243411.77  ┤    │
1114529.95  ┤    │                                                   ╭╮
985648.14  ┤    │                                                   ││
856766.32  ┤    │                                          ╭╮       ││                ╭╮
727884.50  ┤    │                                 ╭╮       ││       ││   ╭╮           ││               ╭─╮
599002.69  ┤    │           ╭╮     ╭╮  ╭╮╭╮╭╮    ╭╯│ ╭╮    │╰─╮  ╭╮╭╯│  ╭╯│    ╭╮╭╮   │╰╮      ╭╮  ╭╮ ╭╯ │╭──╮    ╭─╮╭╮         ╭─╮    ╭╮          ╭╮
470120.87  ┤    ╰───────────╯╰─────╯╰──╯╰╯╰╯╰────╯ ╰─╯╰──╮╭╯  ╰──╯││ ╰──╯ ╰────╯╰╯╰───╯ ╰──────╯╰──╯╰─╯  ╰╯  ╰────╯ ╰╯╰─────────╯ ╰────╯╰──────────╯╰────────────
341239.05  ┤                                             ╰╯       ╰╯


KeyboardInterrupt: 