sudo apt update
sudo apt install -y python3-venv python3-pip
python3 -m venv venv
source venv/bin/activate
pip install pi-ina219 torch pandas scikit-learn


## VERSÃO PURA

In [None]:
# cnn_regressao_baseline_ina_50runs.py
# ============================================================
# CNN 1D (PyTorch) + MEDIÇÃO INA219 (inferência) — 50 rodadas
# Salva métricas e energia de cada rodada em CSV.
# ============================================================

import time
import threading
from collections import deque
from datetime import datetime

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# --- INA219 ---
try:
    from ina219 import INA219, DeviceRangeError
    INA_AVAILABLE = True
except Exception:
    INA_AVAILABLE = False
    print("[AVISO] pi-ina219 não encontrado. Rode: pip install pi-ina219")

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100
LR_BASELINE = 1e-3
BATCH_SIZE = 64
SEED = 42

DEVICE = torch.device("cpu")  # forçado CPU

# INA219
SHUNT_OHMS = 0.1      # ohms (ajuste conforme seu breakout)
I2C_ADDRESS = 0x40     # endereço padrão do INA219
SAMPLE_INTERVAL = 0.01 # 10 ms

# Bench
N_RUNS = 50
OUT_CSV = "resultados_inferencia_ina219.csv"
SLEEP_BETWEEN_RUNS = 0.20  # segundos, pequeno intervalo p/ estabilizar consumo

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)    # (N,)

# ----------------------- Medidor de Energia (INA219) -----------------------
class EnergyMeter:
    """
    Amostra V (volts), I (mA), P (W) em intervalos fixos.
    Calcula energia (J) integrando P*dt ao longo do período de medição.
    """
    def __init__(self, shunt_ohms=0.1, address=0x40, sample_interval=0.01):
        self.enabled = INA_AVAILABLE
        self.sample_interval = sample_interval
        self._thread = None
        self._stop = threading.Event()
        self.samples = deque()  # (timestamp, volts, mA, watts)
        self.start_t = None
        self.end_t = None

        if self.enabled:
            try:
                self.ina = INA219(shunt_ohms, address=address)
                self.ina.configure()  # 32V, ganho auto, 12-bit
            except Exception as e:
                print(f"[AVISO] Falha ao inicializar INA219: {e}")
                self.enabled = False

    def _sample_loop(self):
        self.start_t = time.perf_counter()
        while not self._stop.is_set():
            t = time.perf_counter()
            try:
                volts = self.ina.voltage()              # V
                current_mA = self.ina.current()         # mA
                watts = (current_mA / 1000.0) * volts   # W
                self.samples.append((t, volts, current_mA, watts))
            except DeviceRangeError:
                self.samples.append((t, float("nan"), float("nan"), float("nan")))
            except Exception:
                self.samples.append((t, float("nan"), float("nan"), float("nan")))
            time.sleep(self.sample_interval)
        self.end_t = time.perf_counter()

    def start(self):
        if not self.enabled:
            return
        self.samples.clear()
        self._stop.clear()
        self._thread = threading.Thread(target=self._sample_loop, daemon=True)
        self._thread.start()

    def stop(self):
        if not self.enabled:
            return
        self._stop.set()
        if self._thread is not None:
            self._thread.join(timeout=2.0)

    def summarize(self):
        if not self.enabled or len(self.samples) < 2:
            return {
                "duration_s": None,
                "energy_J": None,
                "avg_power_W": None,
                "peak_power_W": None,
                "avg_current_mA": None,
                "peak_current_mA": None,
                "avg_voltage_V": None,
                "n_samples": len(self.samples)
            }

        arr = np.array(self.samples, dtype=float)
        t = arr[:, 0]
        V = arr[:, 1]
        I_mA = arr[:, 2]
        P_W = arr[:, 3]

        mask = np.isfinite(P_W)
        t_valid = t[mask]
        P_valid = P_W[mask]

        energy_J = 0.0
        if len(P_valid) >= 2:
            dt = np.diff(t_valid)
            P_mid = (P_valid[:-1] + P_valid[1:]) / 2.0
            energy_J = float(np.sum(P_mid * dt))

        duration_s = (self.end_t - self.start_t) if (self.end_t and self.start_t) else float(t[-1] - t[0])
        avg_power_W = float(np.nanmean(P_W)) if np.any(np.isfinite(P_W)) else None
        peak_power_W = float(np.nanmax(P_W)) if np.any(np.isfinite(P_W)) else None
        avg_current_mA = float(np.nanmean(I_mA)) if np.any(np.isfinite(I_mA)) else None
        peak_current_mA = float(np.nanmax(I_mA)) if np.any(np.isfinite(I_mA)) else None
        avg_voltage_V = float(np.nanmean(V)) if np.any(np.isfinite(V)) else None

        return {
            "duration_s": float(duration_s),
            "energy_J": energy_J,
            "avg_power_W": avg_power_W,
            "peak_power_W": peak_power_W,
            "avg_current_mA": avg_current_mA,
            "peak_current_mA": peak_current_mA,
            "avg_voltage_V": avg_voltage_V,
            "n_samples": int(len(self.samples))
        }

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:02d}/{epochs}  - Train MSE: {epoch_loss:.6f}")

def evaluate_model_with_energy(model, loader, label="Avaliação (com energia)"):
    model.eval()
    y_true, y_pred = [], []

    meter = EnergyMeter(shunt_ohms=SHUNT_OHMS, address=I2C_ADDRESS, sample_interval=SAMPLE_INTERVAL)

    # Pré-aquecimento rápido (evita primeira chamada fria afetar muito)
    with torch.no_grad():
        for xb, _ in loader:
            _ = model(xb)
            break

    if meter.enabled:
        meter.start()
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()
    if meter.enabled:
        meter.stop()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    energy_stats = meter.summarize() if meter.enabled else None

    result = {
        "mae": _mae,
        "rmse": _rmse,
        "r2": _r2,
        "total_inference_s": total_time,
        "time_per_sample_ms": time_per_sample_ms,
    }
    if energy_stats:
        result.update({
            "duration_s": energy_stats["duration_s"],
            "energy_J": energy_stats["energy_J"],
            "avg_power_W": energy_stats["avg_power_W"],
            "peak_power_W": energy_stats["peak_power_W"],
            "avg_current_mA": energy_stats["avg_current_mA"],
            "peak_current_mA": energy_stats["peak_current_mA"],
            "avg_voltage_V": energy_stats["avg_voltage_V"],
            "n_samples": energy_stats["n_samples"],
        })
    else:
        result.update({
            "duration_s": None,
            "energy_J": None,
            "avg_power_W": None,
            "peak_power_W": None,
            "avg_current_mA": None,
            "peak_current_mA": None,
            "avg_voltage_V": None,
            "n_samples": 0,
        })

    # Opcional: print rápido
    print(f"{label} -> MAE={_mae:.4f} | RMSE={_rmse:.4f} | R2={_r2:.4f} | "
          f"t_med={time_per_sample_ms:.3f} ms | E={result['energy_J']} J")

    return result

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ---------- PRÉ-PROCESSAMENTO ----------
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ---------- TREINO (uma única vez) ----------
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando baseline (sem otimizações) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)

    # ---------- 50 RODADAS DE INFERÊNCIA ----------
    resultados = []
    print(f"\n=== Rodando {N_RUNS} inferências com medição de energia ===")
    for run in range(1, N_RUNS + 1):
        ts = datetime.now().isoformat(timespec="seconds")
        r = evaluate_model_with_energy(model, test_loader, label=f"Rodada {run:02d}")
        r["run"] = run
        r["timestamp"] = ts
        resultados.append(r)
        time.sleep(SLEEP_BETWEEN_RUNS)  # pequena pausa p/ estabilizar consumo/temperatura

    # ---------- SALVAR CSV ----------
    df_out = pd.DataFrame(resultados, columns=[
        "run","timestamp",
        "mae","rmse","r2",
        "total_inference_s","time_per_sample_ms",
        "duration_s","energy_J",
        "avg_power_W","peak_power_W",
        "avg_current_mA","peak_current_mA",
        "avg_voltage_V","n_samples"
    ])
    df_out.to_csv(OUT_CSV, index=False, float_format="%.8f")
    print(f"\n✅ Resultados salvos em: {OUT_CSV}")
    print(df_out.describe(include='all'))
    # Dica: você pode depois calcular médias/intervalos de confiança a partir desse CSV.

if __name__ == "__main__":
    main()



=== Treinando baseline (sem otimizações) ===
Epoch 01/100  - Train MSE: 6.248398
Epoch 05/100  - Train MSE: 0.270467
Epoch 10/100  - Train MSE: 0.262171
Epoch 15/100  - Train MSE: 0.258560
Epoch 20/100  - Train MSE: 0.264250
Epoch 25/100  - Train MSE: 0.264546
Epoch 30/100  - Train MSE: 0.257848
Epoch 35/100  - Train MSE: 0.268403
Epoch 40/100  - Train MSE: 0.255421
Epoch 45/100  - Train MSE: 0.264791
Epoch 50/100  - Train MSE: 0.259054
Epoch 55/100  - Train MSE: 0.253974
Epoch 60/100  - Train MSE: 0.256649
Epoch 65/100  - Train MSE: 0.261080
Epoch 70/100  - Train MSE: 0.255296
Epoch 75/100  - Train MSE: 0.252065
Epoch 80/100  - Train MSE: 0.255939
Epoch 85/100  - Train MSE: 0.253428
Epoch 90/100  - Train MSE: 0.254143
Epoch 95/100  - Train MSE: 0.256427
Epoch 100/100  - Train MSE: 0.253061

📊 Baseline (sem otimizações):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.156658 s
Tempo médio por amostra:   0.0261 ms


## OTIMIZAÇÕES

# Otimização 1: PODA

Vamos implementar 3 diferentes tipos de poda, sendo elas:

- Poda L1 não-estruturada (baseline de pruning por magnitude) - com e sem finetune
- Poda aleatória não-estruturada (controle)
- Poda estruturada L2 (ln_structured, n=2) em Conv1d (zera filtros/canais inteiros)

In [2]:
# cnn_regressao_poda_aleatoria_sem_finetune.py
# ============================================================
# Regressão de "Precipitação dia" com CNN 1D (PyTorch) + poda aleatória
# SEM fine-tuning após a poda.
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.prune as prune

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100
LR_BASELINE = 1e-3
BATCH_SIZE = 64
SEED = 42

PRUNE_AMOUNT = 0.30  # porcentagem de pesos zerados

DEVICE = torch.device("cpu")

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

def layer_sparsity(module: nn.Module) -> float:
    w = module.weight.detach().cpu().numpy()
    return float((w == 0).mean())

def report_sparsity(model: nn.Module, header="Sparsity por camada"):
    print(f"\n🔎 {header}:")
    for name, m in model.named_modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            print(f"  {name:<25s} sparsity={layer_sparsity(m):.3f}")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 10 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:03d}/{epochs} - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Poda aleatória -----------------------
def apply_random_unstructured(model: nn.Module, amount: float):
    for m in model.modules():
        if isinstance(m, nn.Conv1d):
            prune.random_unstructured(m, name="weight", amount=amount)
    for m in model.modules():
        if isinstance(m, nn.Linear):
            prune.random_unstructured(m, name="weight", amount=amount)

def remove_pruning_reparam(model: nn.Module):
    for m in model.modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            if hasattr(m, "weight_mask"):
                prune.remove(m, "weight")

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # Pré-processamento
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)
    X_test_cnn  = to_cnn1d_shape(X_test)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # Baseline
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando baseline (sem poda) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (antes da poda)")

    # Poda aleatória
    print(f"\n=== Aplicando poda aleatória não-estruturada (amount={PRUNE_AMOUNT:.2f}) ===")
    apply_random_unstructured(model, amount=PRUNE_AMOUNT)
    report_sparsity(model, header="Sparsity após aplicar a poda (reparametrizada)")

    # Avaliação sem fine-tuning
    evaluate_model(model, test_loader, label="Após poda aleatória (sem fine-tuning)")

    # Remover reparam
    print("\n=== Removendo reparametrização da poda (zeros permanentes) ===")
    remove_pruning_reparam(model)
    report_sparsity(model, header="Sparsity final (reparam removida)")
    evaluate_model(model, test_loader, label="Após poda aleatória (reparam removida)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem poda) ===
Epoch 001/100 - Train MSE: 6.248398
Epoch 010/100 - Train MSE: 0.262171
Epoch 020/100 - Train MSE: 0.264250
Epoch 030/100 - Train MSE: 0.257848
Epoch 040/100 - Train MSE: 0.255421
Epoch 050/100 - Train MSE: 0.259054
Epoch 060/100 - Train MSE: 0.256649
Epoch 070/100 - Train MSE: 0.255296
Epoch 080/100 - Train MSE: 0.255939
Epoch 090/100 - Train MSE: 0.254143
Epoch 100/100 - Train MSE: 0.253061

📊 Baseline (antes da poda):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.174049 s
Tempo médio por amostra:   0.0290 ms

=== Aplicando poda aleatória não-estruturada (amount=0.30) ===

🔎 Sparsity após aplicar a poda (reparametrizada):
  net.0                     sparsity=0.292
  net.2                     sparsity=0.299
  net.5                     sparsity=0.300
  net.7                     sparsity=0.312

📊 Após poda aleatória (sem fine-tuning):
MAE:   1.0969
RMSE:  2.9628
R²:    0.4812
Tempo total de inferência: 0.372575 s
Tempo médi

In [3]:
# cnn_regressao_poda_aleatoria_com_finetune.py
# ============================================================
# Regressão "Precipitação dia" com CNN 1D (PyTorch) + poda aleatória
# COM fine-tuning após a poda.
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.prune as prune

# ----------------------- Config -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100
LR_BASELINE     = 1e-3
BATCH_SIZE      = 64
SEED            = 42

# Poda aleatória
PRUNE_AMOUNT    = 0.30  # % de pesos zerados por camada

# Fine-tuning pós-poda
EPOCHS_FINETUNE = 10
LR_FINETUNE     = 1e-3

DEVICE = torch.device("cpu")

# ----------------------- Utils -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

def layer_sparsity(module: nn.Module) -> float:
    w = module.weight.detach().cpu().numpy()
    return float((w == 0).mean())

def report_sparsity(model: nn.Module, header="Sparsity por camada"):
    print(f"\n🔎 {header}:")
    for name, m in model.named_modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            print(f"  {name:<25s} sparsity={layer_sparsity(m):.3f}")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 10 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:03d}/{epochs} - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Poda aleatória -----------------------
def apply_random_unstructured(model: nn.Module, amount: float):
    for m in model.modules():
        if isinstance(m, nn.Conv1d):
            prune.random_unstructured(m, name="weight", amount=amount)
    for m in model.modules():
        if isinstance(m, nn.Linear):
            prune.random_unstructured(m, name="weight", amount=amount)

def remove_pruning_reparam(model: nn.Module):
    for m in model.modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            if hasattr(m, "weight_mask"):
                prune.remove(m, "weight")

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # Pré-processamento (igual ao seu)
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)
    X_test_cnn  = to_cnn1d_shape(X_test)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # Baseline
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando baseline (sem poda) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (antes da poda)")

    # Poda aleatória (impacto bruto)
    print(f"\n=== Aplicando poda aleatória não-estruturada (amount={PRUNE_AMOUNT:.2f}) ===")
    apply_random_unstructured(model, amount=PRUNE_AMOUNT)
    report_sparsity(model, header="Sparsity após aplicar a poda (reparametrizada)")
    evaluate_model(model, test_loader, label="Após poda aleatória (sem fine-tuning)")

    # Fine-tuning pós-poda
    print("\n=== Fine-tuning pós-poda ===")
    train_model(model, train_loader, epochs=EPOCHS_FINETUNE, lr=LR_FINETUNE)
    evaluate_model(model, test_loader, label="Após poda aleatória + fine-tuning (reparam presente)")

    # Remover reparam e reavaliar
    print("\n=== Removendo reparametrização da poda (zeros permanentes) ===")
    remove_pruning_reparam(model)
    report_sparsity(model, header="Sparsity final (reparam removida)")
    evaluate_model(model, test_loader, label="Após poda aleatória + fine-tuning (reparam removida)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem poda) ===
Epoch 001/100 - Train MSE: 6.248398
Epoch 010/100 - Train MSE: 0.262171
Epoch 020/100 - Train MSE: 0.264250
Epoch 030/100 - Train MSE: 0.257848
Epoch 040/100 - Train MSE: 0.255421
Epoch 050/100 - Train MSE: 0.259054
Epoch 060/100 - Train MSE: 0.256649
Epoch 070/100 - Train MSE: 0.255296
Epoch 080/100 - Train MSE: 0.255939
Epoch 090/100 - Train MSE: 0.254143
Epoch 100/100 - Train MSE: 0.253061

📊 Baseline (antes da poda):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.191164 s
Tempo médio por amostra:   0.0318 ms

=== Aplicando poda aleatória não-estruturada (amount=0.30) ===

🔎 Sparsity após aplicar a poda (reparametrizada):
  net.0                     sparsity=0.292
  net.2                     sparsity=0.299
  net.5                     sparsity=0.300
  net.7                     sparsity=0.312

📊 Após poda aleatória (sem fine-tuning):
MAE:   1.0969
RMSE:  2.9628
R²:    0.4812
Tempo total de inferência: 0.247201 s
Tempo médi

In [1]:
# cnn_regressao_poda_l1_sem_finetune.py
# ============================================================
# Regressão de "Precipitação dia" com CNN 1D (PyTorch) + poda L1
# SEM fine-tuning após a poda.
# Pré-processamento exatamente como especificado.
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.prune as prune

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100        # <- 100 épocas, conforme pedido
LR_BASELINE = 1e-3
BATCH_SIZE = 64
SEED = 42

# Forçar CPU
DEVICE = torch.device("cpu")

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

def layer_sparsity(module: nn.Module) -> float:
    w = module.weight.detach().cpu().numpy()
    return float((w == 0).mean())

def report_sparsity(model: nn.Module, header="Sparsity por camada"):
    print(f"\n🔎 {header}:")
    for name, m in model.named_modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            print(f"  {name:<25s} sparsity={layer_sparsity(m):.3f}")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)    # (N,)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)            # CPU direto
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 10 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:03d}/{epochs}  - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Poda L1 (não-estruturada) -----------------------
def apply_l1_unstructured(model: nn.Module, amount: float):
    # Aplica L1 não-estruturada em Conv1d e Linear (apenas nos pesos)
    for m in model.modules():
        if isinstance(m, nn.Conv1d):
            prune.l1_unstructured(m, name="weight", amount=amount)
    for m in model.modules():
        if isinstance(m, nn.Linear):
            prune.l1_unstructured(m, name="weight", amount=amount)

def remove_pruning_reparam(model: nn.Module):
    # Remove reparametrização (weight_orig/weight_mask) e fixa zeros permanentemente
    for m in model.modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            if hasattr(m, "weight_mask"):
                prune.remove(m, "weight")

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================================================
    # PRÉ-PROCESSAMENTO (exatamente como informado)
    # ============================================================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================================================
    # BASELINE: treino e avaliação ANTES da poda
    # ============================================================
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)

    print("\n=== Treinando baseline (sem poda) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (antes da poda)")

    # ============================================================
    # Poda L1 não-estruturada (SEM fine-tuning)
    # ============================================================
    PRUNE_AMOUNT = 0.30  # ajuste aqui se quiser testar outros níveis
    print(f"\n=== Aplicando poda L1 não-estruturada (amount={PRUNE_AMOUNT:.2f}) ===")
    apply_l1_unstructured(model, amount=PRUNE_AMOUNT)
    report_sparsity(model, header="Sparsity após aplicar a poda (reparametrizada)")

    # Avaliação imediatamente após a poda (sem treinar de novo)
    evaluate_model(model, test_loader, label="Após poda L1 (sem fine-tuning)")

    # ============================================================
    # Remover reparametrização e reavaliar
    # ============================================================
    print("\n=== Removendo reparametrização da poda (zeros permanentes) ===")
    remove_pruning_reparam(model)
    report_sparsity(model, header="Sparsity final (reparam removida)")
    evaluate_model(model, test_loader, label="Após poda L1 (reparam removida, sem fine-tuning)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem poda) ===
Epoch 001/100  - Train MSE: 6.248398
Epoch 010/100  - Train MSE: 0.262171
Epoch 020/100  - Train MSE: 0.264250
Epoch 030/100  - Train MSE: 0.257848
Epoch 040/100  - Train MSE: 0.255421
Epoch 050/100  - Train MSE: 0.259054
Epoch 060/100  - Train MSE: 0.256649
Epoch 070/100  - Train MSE: 0.255296
Epoch 080/100  - Train MSE: 0.255939
Epoch 090/100  - Train MSE: 0.254143
Epoch 100/100  - Train MSE: 0.253061

📊 Baseline (antes da poda):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.258988 s
Tempo médio por amostra:   0.0431 ms

=== Aplicando poda L1 não-estruturada (amount=0.30) ===

🔎 Sparsity após aplicar a poda (reparametrizada):
  net.0                     sparsity=0.292
  net.2                     sparsity=0.299
  net.5                     sparsity=0.300
  net.7                     sparsity=0.312

📊 Após poda L1 (sem fine-tuning):
MAE:   0.1807
RMSE:  1.0713
R²:    0.9322
Tempo total de inferência: 0.222869 s
Tempo médio p

In [1]:
# cnn_regressao_poda_l1_com_finetune.py
# ============================================================
# Regressão de "Precipitação dia" com CNN 1D (PyTorch) + poda L1
# COM fine-tuning após a poda.
# Pré-processamento exatamente como especificado.
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.prune as prune

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE  = 100       # treino inicial (baseline)
LR_BASELINE      = 1e-3
BATCH_SIZE       = 64
SEED             = 42

# Poda L1
PRUNE_AMOUNT     = 0.30      # 30% dos pesos zerados em cada camada Conv/Linear

# Fine-tuning pós-poda
EPOCHS_FINETUNE  = 10        # ajuste aqui se quiser
LR_FINETUNE      = 1e-3

# Forçar CPU
DEVICE = torch.device("cpu")

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

def layer_sparsity(module: nn.Module) -> float:
    w = module.weight.detach().cpu().numpy()
    return float((w == 0).mean())

def report_sparsity(model: nn.Module, header="Sparsity por camada"):
    print(f"\n🔎 {header}:")
    for name, m in model.named_modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            print(f"  {name:<25s} sparsity={layer_sparsity(m):.3f}")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)    # (N,)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)            # CPU direto
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 10 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:03d}/{epochs}  - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Poda L1 (não-estruturada) -----------------------
def apply_l1_unstructured(model: nn.Module, amount: float):
    # Aplica L1 não-estruturada em Conv1d e Linear (apenas nos pesos)
    for m in model.modules():
        if isinstance(m, nn.Conv1d):
            prune.l1_unstructured(m, name="weight", amount=amount)
    for m in model.modules():
        if isinstance(m, nn.Linear):
            prune.l1_unstructured(m, name="weight", amount=amount)

def remove_pruning_reparam(model: nn.Module):
    # Remove reparametrização (weight_orig/weight_mask) e fixa zeros permanentemente
    for m in model.modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            if hasattr(m, "weight_mask"):
                prune.remove(m, "weight")

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================================================
    # PRÉ-PROCESSAMENTO (exatamente como informado)
    # ============================================================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================================================
    # BASELINE: treino e avaliação ANTES da poda
    # ============================================================
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)

    print("\n=== Treinando baseline (sem poda) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (antes da poda)")

    # ============================================================
    # Poda L1 não-estruturada
    # ============================================================
    print(f"\n=== Aplicando poda L1 não-estruturada (amount={PRUNE_AMOUNT:.2f}) ===")
    apply_l1_unstructured(model, amount=PRUNE_AMOUNT)
    report_sparsity(model, header="Sparsity após aplicar a poda (reparametrizada)")

    # Avaliar imediatamente após a poda (sem treino) — impacto "bruto"
    evaluate_model(model, test_loader, label="Após poda L1 (sem fine-tuning)")

    # ============================================================
    # Fine-tuning pós-poda
    # ============================================================
    print("\n=== Fine-tuning pós-poda ===")
    train_model(model, train_loader, epochs=EPOCHS_FINETUNE, lr=LR_FINETUNE)

    # Avaliar após fine-tune (ainda com reparam)
    evaluate_model(model, test_loader, label="Após poda L1 + fine-tuning (reparam presente)")

    # ============================================================
    # Remover reparametrização e reavaliar
    # ============================================================
    print("\n=== Removendo reparametrização da poda (zeros permanentes) ===")
    remove_pruning_reparam(model)
    report_sparsity(model, header="Sparsity final (reparam removida)")
    evaluate_model(model, test_loader, label="Após poda L1 + fine-tuning (reparam removida)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem poda) ===
Epoch 001/100  - Train MSE: 6.248398
Epoch 010/100  - Train MSE: 0.262171
Epoch 020/100  - Train MSE: 0.264250
Epoch 030/100  - Train MSE: 0.257848
Epoch 040/100  - Train MSE: 0.255421
Epoch 050/100  - Train MSE: 0.259054
Epoch 060/100  - Train MSE: 0.256649
Epoch 070/100  - Train MSE: 0.255296
Epoch 080/100  - Train MSE: 0.255939
Epoch 090/100  - Train MSE: 0.254143
Epoch 100/100  - Train MSE: 0.253061

📊 Baseline (antes da poda):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.175219 s
Tempo médio por amostra:   0.0292 ms

=== Aplicando poda L1 não-estruturada (amount=0.30) ===

🔎 Sparsity após aplicar a poda (reparametrizada):
  net.0                     sparsity=0.292
  net.2                     sparsity=0.299
  net.5                     sparsity=0.300
  net.7                     sparsity=0.312

📊 Após poda L1 (sem fine-tuning):
MAE:   0.1807
RMSE:  1.0713
R²:    0.9322
Tempo total de inferência: 0.194088 s
Tempo médio p

In [4]:
# cnn_regressao_poda_l2_estruturada_sem_finetune.py
# ============================================================
# Regressão "Precipitação dia" com CNN 1D (PyTorch)
# Poda estruturada L2 (ln_structured, n=2) SEM fine-tuning
# - Conv1d: zera filtros (dim=0)
# - Linear: zera neurônios de saída (dim=0)
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.prune as prune

# ----------------------- Config -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100
LR_BASELINE     = 1e-3
BATCH_SIZE      = 64
SEED            = 42

# Poda estruturada L2
PRUNE_AMOUNT    = 0.30   # proporção de filtros/neurônios a serem zerados por camada
N_NORM          = 2      # L2
DIM_CONV        = 0      # prune filtros (out_channels)
DIM_LINEAR      = 0      # prune neurônios de saída (out_features)

DEVICE = torch.device("cpu")

# ----------------------- Utils -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

def layer_sparsity(module: nn.Module) -> float:
    w = module.weight.detach().cpu().numpy()
    return float((w == 0).mean())

def report_sparsity(model: nn.Module, header="Sparsity por camada"):
    print(f"\n🔎 {header}:")
    for name, m in model.named_modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            print(f"  {name:<25s} sparsity={layer_sparsity(m):.3f}")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 10 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:03d}/{epochs} - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Poda estruturada L2 -----------------------
def apply_l2_structured(model: nn.Module, amount: float, n_norm: int = 2):
    # Conv1d: prune filtros inteiros (dim=0)
    for m in model.modules():
        if isinstance(m, nn.Conv1d):
            prune.ln_structured(m, name="weight", amount=amount, n=n_norm, dim=DIM_CONV)
    # Linear: prune neurônios de saída inteiros (dim=0)
    for m in model.modules():
        if isinstance(m, nn.Linear):
            prune.ln_structured(m, name="weight", amount=amount, n=n_norm, dim=DIM_LINEAR)

def remove_pruning_reparam(model: nn.Module):
    for m in model.modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            if hasattr(m, "weight_mask"):
                prune.remove(m, "weight")

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # Pré-processamento
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # Baseline
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando baseline (sem poda) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (antes da poda)")

    # Poda L2 estruturada (impacto bruto)
    print(f"\n=== Aplicando poda estruturada L2 (amount={PRUNE_AMOUNT:.2f}, n={N_NORM}, dim=0) ===")
    apply_l2_structured(model, amount=PRUNE_AMOUNT, n_norm=N_NORM)
    report_sparsity(model, header="Sparsity após aplicar a poda (reparametrizada)")
    evaluate_model(model, test_loader, label="Após poda L2 estruturada (sem fine-tuning)")

    # Remover reparam e reavaliar
    print("\n=== Removendo reparametrização da poda (zeros permanentes) ===")
    remove_pruning_reparam(model)
    report_sparsity(model, header="Sparsity final (reparam removida)")
    evaluate_model(model, test_loader, label="Após poda L2 estruturada (reparam removida)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem poda) ===
Epoch 001/100 - Train MSE: 6.248398
Epoch 010/100 - Train MSE: 0.262171
Epoch 020/100 - Train MSE: 0.264250
Epoch 030/100 - Train MSE: 0.257848
Epoch 040/100 - Train MSE: 0.255421
Epoch 050/100 - Train MSE: 0.259054
Epoch 060/100 - Train MSE: 0.256649
Epoch 070/100 - Train MSE: 0.255296
Epoch 080/100 - Train MSE: 0.255939
Epoch 090/100 - Train MSE: 0.254143
Epoch 100/100 - Train MSE: 0.253061

📊 Baseline (antes da poda):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.450163 s
Tempo médio por amostra:   0.0749 ms

=== Aplicando poda estruturada L2 (amount=0.30, n=2, dim=0) ===

🔎 Sparsity após aplicar a poda (reparametrizada):
  net.0                     sparsity=0.250
  net.2                     sparsity=0.312
  net.5                     sparsity=0.312
  net.7                     sparsity=0.000

📊 Após poda L2 estruturada (sem fine-tuning):
MAE:   0.8318
RMSE:  1.9054
R²:    0.7854
Tempo total de inferência: 0.611392 s
Temp

In [5]:
# cnn_regressao_poda_l2_estruturada_com_finetune.py
# ============================================================
# Regressão "Precipitação dia" com CNN 1D (PyTorch)
# Poda estruturada L2 (ln_structured, n=2) COM fine-tuning
# - Conv1d: zera filtros (dim=0)
# - Linear: zera neurônios de saída (dim=0)
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.prune as prune

# ----------------------- Config -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE  = 100
LR_BASELINE      = 1e-3
BATCH_SIZE       = 64
SEED             = 42

# Poda estruturada L2
PRUNE_AMOUNT     = 0.30   # proporção de filtros/neurônios a zerar por camada
N_NORM           = 2      # L2
DIM_CONV         = 0      # out_channels (filtros)
DIM_LINEAR       = 0      # out_features (neurônios de saída)

# Fine-tuning pós-poda (mesmo esquema para todas as podas)
EPOCHS_FINETUNE  = 10
LR_FINETUNE      = 1e-3

DEVICE = torch.device("cpu")

# ----------------------- Utils -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

def layer_sparsity(module: nn.Module) -> float:
    w = module.weight.detach().cpu().numpy()
    return float((w == 0).mean())

def report_sparsity(model: nn.Module, header="Sparsity por camada"):
    print(f"\n🔎 {header}:")
    for name, m in model.named_modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            print(f"  {name:<25s} sparsity={layer_sparsity(m):.3f}")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 10 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:03d}/{epochs} - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Poda L2 estruturada -----------------------
def apply_l2_structured(model: nn.Module, amount: float, n_norm: int = 2):
    # Conv1d: zera filtros (dim=0)
    for m in model.modules():
        if isinstance(m, nn.Conv1d):
            prune.ln_structured(m, name="weight", amount=amount, n=n_norm, dim=DIM_CONV)
    # Linear: zera neurônios de saída (dim=0)
    for m in model.modules():
        if isinstance(m, nn.Linear):
            prune.ln_structured(m, name="weight", amount=amount, n=n_norm, dim=DIM_LINEAR)

def remove_pruning_reparam(model: nn.Module):
    for m in model.modules():
        if isinstance(m, (nn.Conv1d, nn.Linear)):
            if hasattr(m, "weight_mask"):
                prune.remove(m, "weight")

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # Pré-processamento (igual aos demais scripts)
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # Baseline
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando baseline (sem poda) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (antes da poda)")

    # Poda estruturada L2 (impacto bruto)
    print(f"\n=== Aplicando poda estruturada L2 (amount={PRUNE_AMOUNT:.2f}, n={N_NORM}, dim=0) ===")
    apply_l2_structured(model, amount=PRUNE_AMOUNT, n_norm=N_NORM)
    report_sparsity(model, header="Sparsity após aplicar a poda (reparametrizada)")
    evaluate_model(model, test_loader, label="Após poda L2 estruturada (sem fine-tuning)")

    # Fine-tuning pós-poda (mesma config usada nas demais podas)
    print("\n=== Fine-tuning pós-poda L2 estruturada ===")
    train_model(model, train_loader, epochs=EPOCHS_FINETUNE, lr=LR_FINETUNE)
    evaluate_model(model, test_loader, label="Após poda L2 estruturada + fine-tuning (reparam presente)")

    # Remover reparam e reavaliar
    print("\n=== Removendo reparametrização da poda (zeros permanentes) ===")
    remove_pruning_reparam(model)
    report_sparsity(model, header="Sparsity final (reparam removida)")
    evaluate_model(model, test_loader, label="Após poda L2 estruturada + fine-tuning (reparam removida)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem poda) ===
Epoch 001/100 - Train MSE: 6.248398
Epoch 010/100 - Train MSE: 0.262171
Epoch 020/100 - Train MSE: 0.264250
Epoch 030/100 - Train MSE: 0.257848
Epoch 040/100 - Train MSE: 0.255421
Epoch 050/100 - Train MSE: 0.259054
Epoch 060/100 - Train MSE: 0.256649
Epoch 070/100 - Train MSE: 0.255296
Epoch 080/100 - Train MSE: 0.255939
Epoch 090/100 - Train MSE: 0.254143
Epoch 100/100 - Train MSE: 0.253061

📊 Baseline (antes da poda):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.466527 s
Tempo médio por amostra:   0.0776 ms

=== Aplicando poda estruturada L2 (amount=0.30, n=2, dim=0) ===

🔎 Sparsity após aplicar a poda (reparametrizada):
  net.0                     sparsity=0.250
  net.2                     sparsity=0.312
  net.5                     sparsity=0.312
  net.7                     sparsity=0.000

📊 Após poda L2 estruturada (sem fine-tuning):
MAE:   0.8318
RMSE:  1.9054
R²:    0.7854
Tempo total de inferência: 0.625842 s
Temp

In [None]:
# Relatório das podas

# Otimização 2: Quantização

- Quantização dinâmica
- Quantização Estática pós-treino
- Quantização Consciente de Treino

In [6]:
# cnn_regressao_baseline.py
# ============================================================
# Regressão de "Precipitação dia" com CNN 1D (PyTorch) - BASELINE
# Com SEED e LR_BASELINE para reprodutibilidade.
# Sem poda / quantização / qualquer otimização.
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# >>>>>>>>>>>>>>>>>>>>>>> ADIÇÃO <<<<<<<<<<<<<<<<<<<<<<<
import torch.ao.quantization as quant  # para quantização dinâmica
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100
LR_BASELINE = 1e-3
BATCH_SIZE = 64
SEED = 42

DEVICE = torch.device("cpu")  # forçado CPU, como você pediu

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)    # (N,)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)            # CPU direto
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:02d}/{epochs}  - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================================================
    # PRÉ-PROCESSAMENTO (exatamente como você definiu)
    # ============================================================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================================================
    # BASELINE: treino e avaliação
    # ============================================================
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)

    print("\n=== Treinando baseline (sem otimizações) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (sem otimizações)")

    # ============================================================
    # QUANTIZAÇÃO DINÂMICA (apenas Linear) - mínima mudança
    # ============================================================
    model.eval()
    qmodel = quant.quantize_dynamic(
        model,
        {nn.Linear},          # quantiza apenas camadas Lineares
        dtype=torch.qint8     # pesos INT8; ativações permanecem em float
    )
    evaluate_model(qmodel, test_loader, label="Quantização Dinâmica (INT8)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem otimizações) ===
Epoch 01/100  - Train MSE: 6.248398
Epoch 05/100  - Train MSE: 0.270467
Epoch 10/100  - Train MSE: 0.262171
Epoch 15/100  - Train MSE: 0.258560
Epoch 20/100  - Train MSE: 0.264250
Epoch 25/100  - Train MSE: 0.264546
Epoch 30/100  - Train MSE: 0.257848
Epoch 35/100  - Train MSE: 0.268403
Epoch 40/100  - Train MSE: 0.255421
Epoch 45/100  - Train MSE: 0.264791
Epoch 50/100  - Train MSE: 0.259054
Epoch 55/100  - Train MSE: 0.253974
Epoch 60/100  - Train MSE: 0.256649
Epoch 65/100  - Train MSE: 0.261080
Epoch 70/100  - Train MSE: 0.255296
Epoch 75/100  - Train MSE: 0.252065
Epoch 80/100  - Train MSE: 0.255939
Epoch 85/100  - Train MSE: 0.253428
Epoch 90/100  - Train MSE: 0.254143
Epoch 95/100  - Train MSE: 0.256427
Epoch 100/100  - Train MSE: 0.253061

📊 Baseline (sem otimizações):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.127923 s
Tempo médio por amostra:   0.0213 ms

📊 Quantização Dinâmica (INT8):
MAE:   0.1428
RMS

In [10]:
# cnn_regressao_baseline.py
# ============================================================
# Regressão de "Precipitação dia" com CNN 1D (PyTorch) - BASELINE
# + PTQ Estática (INT8) com backend auto (fbgemm/qnnpack) e última camada em float
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# >>> PTQ estática (FX) - imports
from torch.ao.quantization import get_default_qconfig
try:
    from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
except ImportError:
    from torch.ao.quantization.fx import prepare_fx, convert_fx  # type: ignore
from torch.ao.quantization.qconfig_mapping import QConfigMapping

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100
LR_BASELINE = 1e-3
BATCH_SIZE = 64
SEED = 42

DEVICE = torch.device("cpu")  # forçado CPU

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)             # <- manter em float na PTQ
        )
    def forward(self, x):
        return self.net(x).squeeze(1)    # (N,)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)            # CPU direto
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:02d}/{epochs}  - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================================================
    # PRÉ-PROCESSAMENTO (igual ao baseline)
    # ============================================================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================================================
    # BASELINE: treino e avaliação
    # ============================================================
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)

    print("\n=== Treinando baseline (sem otimizações) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline (sem otimizações)")

    # ============================================================
    # PTQ ESTÁTICA (INT8) — engine auto + QConfigMapping
    # ============================================================
    print("\n=== PTQ Estática (INT8) ===")
    engines = torch.backends.quantized.supported_engines
    engine = "fbgemm" if "fbgemm" in engines else ("qnnpack" if "qnnpack" in engines else None)
    if engine is None:
        raise RuntimeError(f"Nenhum backend de quantização suportado encontrado: {engines}")
    torch.backends.quantized.engine = engine
    print(f"Backend de quantização: {engine}")

    example_input = torch.randn(1, 1, X_train_cnn.shape[2])

    qconfig = get_default_qconfig(engine)
    qconfig_mapping = (
        QConfigMapping()
        .set_global(qconfig)              # aplica a todos
        .set_module_name("net.7", None)   # NÃO quantizar a última Linear
    )

    model.eval()
    prepared = prepare_fx(model, qconfig_mapping, example_inputs=example_input)

    with torch.inference_mode():
        for xb, _ in train_loader:
            prepared(xb)

    quantized_model = convert_fx(prepared).to(DEVICE).eval()

    evaluate_model(quantized_model, test_loader, label="PTQ Estática (INT8, engine auto)")

if __name__ == "__main__":
    main()



=== Treinando baseline (sem otimizações) ===
Epoch 01/100  - Train MSE: 6.248398
Epoch 05/100  - Train MSE: 0.270467
Epoch 10/100  - Train MSE: 0.262171
Epoch 15/100  - Train MSE: 0.258560
Epoch 20/100  - Train MSE: 0.264250
Epoch 25/100  - Train MSE: 0.264546
Epoch 30/100  - Train MSE: 0.257848
Epoch 35/100  - Train MSE: 0.268403
Epoch 40/100  - Train MSE: 0.255421
Epoch 45/100  - Train MSE: 0.264791
Epoch 50/100  - Train MSE: 0.259054
Epoch 55/100  - Train MSE: 0.253974
Epoch 60/100  - Train MSE: 0.256649
Epoch 65/100  - Train MSE: 0.261080
Epoch 70/100  - Train MSE: 0.255296
Epoch 75/100  - Train MSE: 0.252065
Epoch 80/100  - Train MSE: 0.255939
Epoch 85/100  - Train MSE: 0.253428
Epoch 90/100  - Train MSE: 0.254143
Epoch 95/100  - Train MSE: 0.256427
Epoch 100/100  - Train MSE: 0.253061

📊 Baseline (sem otimizações):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.171769 s
Tempo médio por amostra:   0.0286 ms

=== PTQ Estática (INT8) ===
Backend de quantizaç




📊 PTQ Estática (INT8, engine auto):
MAE:   0.1423
RMSE:  1.0706
R²:    0.9322
Tempo total de inferência: 0.047312 s
Tempo médio por amostra:   0.0079 ms


In [12]:
# cnn_regressao_qat.py
# ============================================================
# Regressão "Precipitação dia" com CNN 1D (PyTorch) - QAT ONLY
# - Backend auto (fbgemm/qnnpack)
# - Última camada em float
# - Fine-tuning curto com fake-quant + conversão para INT8
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# >>> QAT (FX) - imports
from torch.ao.quantization import get_default_qat_qconfig
try:
    from torch.ao.quantization.quantize_fx import prepare_qat_fx, convert_fx
except ImportError:
    from torch.ao.quantization.fx import prepare_qat_fx, convert_fx  # type: ignore
from torch.ao.quantization.qconfig_mapping import QConfigMapping

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_BASELINE = 100
LR_BASELINE = 1e-3
BATCH_SIZE = 64
SEED = 42

# QAT: fine-tuning curto
EPOCHS_QAT = 10         # ajuste conforme necessidade (5–20 costuma bastar)
LR_QAT = 3e-4           # LR menor para estabilidade durante QAT

DEVICE = torch.device("cpu")  # CPU

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelo -----------------------
class CNNRegressor(nn.Module):
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)             # manter em float na QAT/PTQ
        )
    def forward(self, x):
        return self.net(x).squeeze(1)    # (N,)

# ----------------------- Treino / Avaliação -----------------------
def train_model(model, loader, epochs, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"Epoch {epoch:02d}/{epochs}  - Train MSE: {epoch_loss:.6f}")

def fine_tune_qat(model_qat, loader, epochs, lr):
    """Fine-tuning com fake-quant (QAT)."""
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model_qat.parameters(), lr=lr)
    model_qat.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model_qat(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        print(f"[QAT] Epoch {epoch:02d}/{epochs} - Train MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================================================
    # PRÉ-PROCESSAMENTO (igual ao baseline)
    # ============================================================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================================================
    # 1) Treino BASELINE (FP32)
    # ============================================================
    model = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando baseline (FP32) ===")
    train_model(model, train_loader, epochs=EPOCHS_BASELINE, lr=LR_BASELINE)
    evaluate_model(model, test_loader, label="Baseline FP32")

    # ============================================================
    # 2) QAT: preparar modelo com fake-quant, fine-tuning e conversão
    # ============================================================
    print("\n=== QAT (fake-quant) + Conversão INT8 ===")

    # Backend de quantização adequado
    engines = torch.backends.quantized.supported_engines
    engine = "fbgemm" if "fbgemm" in engines else ("qnnpack" if "qnnpack" in engines else None)
    if engine is None:
        raise RuntimeError(f"Nenhum backend de quantização suportado encontrado: {engines}")
    torch.backends.quantized.engine = engine
    print(f"Backend de quantização: {engine}")

    # QConfig para QAT e mapeamento (exclui última camada)
    from torch.ao.quantization import get_default_qat_qconfig, disable_observer
    from torch.nn.utils import clip_grad_norm_

    qconfig = get_default_qat_qconfig(engine)
    qconfig_mapping = (
        QConfigMapping()
        .set_global(qconfig)              # aplica a todos
        .set_module_name("net.7", None)   # NÃO quantizar a última Linear
    )

    example_input = torch.randn(1, 1, X_train_cnn.shape[2])

    # **PREPARE EM MODO TREINO**
    model.train()
    model_qat = prepare_qat_fx(model, qconfig_mapping, example_inputs=example_input)

    # Fine-tuning QAT com congelamento de observadores na metade
    EPOCHS_QAT = 15
    LR_QAT = 1e-4
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model_qat.parameters(), lr=LR_QAT)

    for epoch in range(1, EPOCHS_QAT + 1):
        model_qat.train()
        if epoch == (EPOCHS_QAT // 2) + 1:
            # congela ranges (scales/zero-points) para estabilizar o resto do treino
            disable_observer(model_qat)

        running = 0.0
        for xb, yb in train_loader:
            pred = model_qat(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            # clipping leve para estabilidade
            clip_grad_norm_(model_qat.parameters(), max_norm=1.0)
            optimizer.step()
            running += loss.item() * xb.size(0)
        running /= len(train_loader.dataset)
        print(f"[QAT] Epoch {epoch:02d}/{EPOCHS_QAT} - Train MSE: {running:.6f}")

    # Converter para quantizado real (INT8 onde suportado)
    model_qat.eval()
    quantized_model = convert_fx(model_qat).to(DEVICE).eval()

    # Avaliar modelo quantizado
    evaluate_model(quantized_model, test_loader, label="QAT → INT8 (fix)")

if __name__ == "__main__":
    main()



=== Treinando baseline (FP32) ===
Epoch 01/100  - Train MSE: 6.248398
Epoch 05/100  - Train MSE: 0.270467
Epoch 10/100  - Train MSE: 0.262171
Epoch 15/100  - Train MSE: 0.258560
Epoch 20/100  - Train MSE: 0.264250
Epoch 25/100  - Train MSE: 0.264546
Epoch 30/100  - Train MSE: 0.257848
Epoch 35/100  - Train MSE: 0.268403
Epoch 40/100  - Train MSE: 0.255421
Epoch 45/100  - Train MSE: 0.264791
Epoch 50/100  - Train MSE: 0.259054
Epoch 55/100  - Train MSE: 0.253974
Epoch 60/100  - Train MSE: 0.256649
Epoch 65/100  - Train MSE: 0.261080
Epoch 70/100  - Train MSE: 0.255296
Epoch 75/100  - Train MSE: 0.252065
Epoch 80/100  - Train MSE: 0.255939
Epoch 85/100  - Train MSE: 0.253428
Epoch 90/100  - Train MSE: 0.254143
Epoch 95/100  - Train MSE: 0.256427
Epoch 100/100  - Train MSE: 0.253061

📊 Baseline FP32:
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.159183 s
Tempo médio por amostra:   0.0265 ms

=== QAT (fake-quant) + Conversão INT8 ===
Backend de quantização: fbgemm



[QAT] Epoch 01/15 - Train MSE: 2.920815
[QAT] Epoch 02/15 - Train MSE: 3.455787
[QAT] Epoch 03/15 - Train MSE: 3.481976
[QAT] Epoch 04/15 - Train MSE: 3.542840
[QAT] Epoch 05/15 - Train MSE: 3.610246
[QAT] Epoch 06/15 - Train MSE: 3.696140
[QAT] Epoch 07/15 - Train MSE: 3.468919
[QAT] Epoch 08/15 - Train MSE: 3.490423
[QAT] Epoch 09/15 - Train MSE: 3.440322
[QAT] Epoch 10/15 - Train MSE: 3.646849
[QAT] Epoch 11/15 - Train MSE: 3.684465
[QAT] Epoch 12/15 - Train MSE: 3.456197
[QAT] Epoch 13/15 - Train MSE: 3.544619
[QAT] Epoch 14/15 - Train MSE: 3.661553
[QAT] Epoch 15/15 - Train MSE: 3.564728

📊 QAT → INT8 (fix):
MAE:   0.1268
RMSE:  1.2724
R²:    0.9043
Tempo total de inferência: 0.117162 s
Tempo médio por amostra:   0.0195 ms


# Otimização 3: Knowledge Distillation
1. Response KD com Huber
2. Feature KD (uma camada) com projetor 1x1
3. Relational KD no embedding (pós-flatten)

In [14]:
# cnn_regressao_kd_response.py
# ============================================================
# Regressão "Precipitação dia" - Knowledge Distillation (Response KD + Huber)
# - Teacher = sua CNN baseline (FP32)
# - Student = CNN menor
# - Loss: L = α*Huber(y, y_s) + (1-α)*Huber(y_t, y_s)
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_TEACHER = 100
LR_TEACHER = 1e-3

EPOCHS_STUDENT = 60
LR_STUDENT = 1e-3

BATCH_SIZE = 64
SEED = 42
ALPHA = 0.5        # peso da perda supervisionada vs distilação
HUBER_DELTA = 1.0  # delta da HuberLoss

DEVICE = torch.device("cpu")  # CPU

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelos -----------------------
class CNNRegressor(nn.Module):
    """Teacher (baseline)"""
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 16*length)
            nn.Linear(16 * length, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

class CNNRegressorStudent(nn.Module):
    """Student menor"""
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=4, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=4, out_channels=8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten(),                 # (N, 8*length)
            nn.Linear(8 * length, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# ----------------------- Treino / Avaliação -----------------------
def train_supervised(model, loader, epochs, lr, label="Train"):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"[{label}] Epoch {epoch:02d}/{epochs} - MSE: {epoch_loss:.6f}")

def train_student_kd_response(student, teacher, loader, epochs, lr, alpha=0.5, delta=1.0):
    """
    Response KD para regressão com Huber:
    L = α*Huber(y, y_s) + (1-α)*Huber(y_t, y_s)
    """
    huber = nn.HuberLoss(delta=delta)
    optimizer = optim.Adam(student.parameters(), lr=lr)

    teacher.eval()
    for p in teacher.parameters():
        p.requires_grad = False

    for epoch in range(1, epochs + 1):
        student.train()
        run_loss = 0.0
        for xb, yb in loader:
            with torch.no_grad():
                y_teacher = teacher(xb)  # alvo suave

            y_student = student(xb)
            loss_sup  = huber(y_student, yb)
            loss_dist = huber(y_student, y_teacher)
            loss = alpha * loss_sup + (1.0 - alpha) * loss_dist

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            run_loss += loss.item() * xb.size(0)

        run_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"[KD-Response] Epoch {epoch:02d}/{epochs} - Loss: {run_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================================================
    # PRÉ-PROCESSAMENTO (mesmo do baseline)
    # ============================================================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================================================
    # 1) Teacher: treino e avaliação
    # ============================================================
    teacher = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando TEACHER (baseline) ===")
    train_supervised(teacher, train_loader, epochs=EPOCHS_TEACHER, lr=LR_TEACHER, label="Teacher")
    evaluate_model(teacher, test_loader, label="Teacher (FP32)")

    # ============================================================
    # 2) Student: KD (Response + Huber)
    # ============================================================
    student = CNNRegressorStudent(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando STUDENT com Response KD (Huber) ===")
    train_student_kd_response(student, teacher, train_loader,
                              epochs=EPOCHS_STUDENT, lr=LR_STUDENT,
                              alpha=ALPHA, delta=HUBER_DELTA)
    evaluate_model(student, test_loader, label=f"Student (KD-Response, α={ALPHA}, δ={HUBER_DELTA})")

if __name__ == "__main__":
    main()



=== Treinando TEACHER (baseline) ===
[Teacher] Epoch 01/100 - MSE: 6.248398
[Teacher] Epoch 05/100 - MSE: 0.270467
[Teacher] Epoch 10/100 - MSE: 0.262171
[Teacher] Epoch 15/100 - MSE: 0.258560
[Teacher] Epoch 20/100 - MSE: 0.264250
[Teacher] Epoch 25/100 - MSE: 0.264546
[Teacher] Epoch 30/100 - MSE: 0.257848
[Teacher] Epoch 35/100 - MSE: 0.268403
[Teacher] Epoch 40/100 - MSE: 0.255421
[Teacher] Epoch 45/100 - MSE: 0.264791
[Teacher] Epoch 50/100 - MSE: 0.259054
[Teacher] Epoch 55/100 - MSE: 0.253974
[Teacher] Epoch 60/100 - MSE: 0.256649
[Teacher] Epoch 65/100 - MSE: 0.261080
[Teacher] Epoch 70/100 - MSE: 0.255296
[Teacher] Epoch 75/100 - MSE: 0.252065
[Teacher] Epoch 80/100 - MSE: 0.255939
[Teacher] Epoch 85/100 - MSE: 0.253428
[Teacher] Epoch 90/100 - MSE: 0.254143
[Teacher] Epoch 95/100 - MSE: 0.256427
[Teacher] Epoch 100/100 - MSE: 0.253061

📊 Teacher (FP32):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.575093 s
Tempo médio por amostra:   0.0957 ms

=== T

In [15]:
# cnn_regressao_kd_feature.py
# ============================================================
# Regressão "Precipitação dia" - Knowledge Distillation (Feature KD + Response KD)
# - Teacher = sua CNN baseline (FP32)
# - Student = CNN menor
# - Loss total:
#   L = α*Huber(y, y_s) + (1-α)*Huber(y_t, y_s) + β*MSE( f_T, P(f_S) )
#     onde P é um projetor 1x1 que mapeia canais do student para os do teacher
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_TEACHER = 100
LR_TEACHER = 1e-3

EPOCHS_STUDENT = 60
LR_STUDENT = 1e-3

BATCH_SIZE = 64
SEED = 42

# Pesos das perdas
ALPHA = 0.5       # supervisionada vs resposta do teacher
BETA = 0.1        # termo de feature KD
HUBER_DELTA = 1.0

DEVICE = torch.device("cpu")  # CPU

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelos -----------------------
class CNNRegressor(nn.Module):
    """Teacher (baseline)"""
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),  # 0
            nn.ReLU(),                                                           # 1
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1), # 2
            nn.ReLU(),                                                           # 3 <- capturar aqui (pós 2ª Conv)
            nn.Flatten(),                                                        # 4
            nn.Linear(16 * length, 32),                                          # 5
            nn.ReLU(),                                                           # 6
            nn.Linear(32, 1)                                                     # 7
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

class CNNRegressorStudent(nn.Module):
    """Student menor (menos canais e hidden reduzido)"""
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=4, kernel_size=3, padding=1),  # 0
            nn.ReLU(),                                                           # 1
            nn.Conv1d(in_channels=4, out_channels=8, kernel_size=3, padding=1),  # 2
            nn.ReLU(),                                                           # 3 <- capturar aqui (pós 2ª Conv)
            nn.Flatten(),                                                        # 4
            nn.Linear(8 * length, 16),                                           # 5
            nn.ReLU(),                                                           # 6
            nn.Linear(16, 1)                                                     # 7
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# ----------------------- Treino / Avaliação -----------------------
def train_supervised(model, loader, epochs, lr, label="Train"):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"[{label}] Epoch {epoch:02d}/{epochs} - MSE: {epoch_loss:.6f}")

def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- KD (Response + Feature 1×1) -----------------------
def train_student_kd_feature(student, teacher, loader, epochs, lr, alpha=0.5, beta=0.1, delta=1.0):
    """
    Treina student com:
      - Response KD (Huber) entre y_teacher e y_student
      - Feature KD (MSE) entre f_T e P(f_S), capturadas pós 2ª Conv (índice 3 nas sequentials)
    """
    huber = nn.HuberLoss(delta=delta)
    feat_mse = nn.MSELoss()
    optimizer = optim.Adam(student.parameters(), lr=lr)

    # Congela teacher
    teacher.eval()
    for p in teacher.parameters():
        p.requires_grad = False

    # Projetor 1x1 para casar canais: student C=8 -> teacher C=16
    projector = nn.Conv1d(in_channels=8, out_channels=16, kernel_size=1, bias=False).to(DEVICE)

    # Hooks para capturar features (pós 2ª Conv: índice 3 = ReLU)
    t_feat = {"val": None}
    s_feat = {"val": None}

    def t_hook(_m, _inp, out): t_feat.update(val=out.detach())
    def s_hook(_m, _inp, out): s_feat.update(val=out)

    t_handle = teacher.net[3].register_forward_hook(t_hook)
    s_handle = student.net[3].register_forward_hook(s_hook)

    try:
        for epoch in range(1, epochs + 1):
            student.train()
            projector.train()
            run_loss = 0.0

            for xb, yb in loader:
                # Forward teacher (sem grad)
                with torch.no_grad():
                    y_teacher = teacher(xb)

                # Forward student (hooks capturam s_feat['val'])
                y_student = student(xb)

                # Response KD (saídas)
                loss_sup  = huber(y_student, yb)
                loss_dist = huber(y_student, y_teacher)

                # Feature KD: projeta feature do student para os canais do teacher e compara
                if (t_feat["val"] is None) or (s_feat["val"] is None):
                    # Segurança: se por algum motivo o hook não populou, faz um forward “seco”
                    with torch.no_grad():
                        teacher(xb)
                    _ = student(xb)

                proj_s = projector(s_feat["val"])
                loss_feat = feat_mse(proj_s, t_feat["val"])

                loss = alpha * loss_sup + (1.0 - alpha) * loss_dist + beta * loss_feat

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                run_loss += loss.item() * xb.size(0)

            run_loss /= len(loader.dataset)
            if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
                print(f"[KD-Feature] Epoch {epoch:02d}/{epochs} - Loss: {run_loss:.6f}")

    finally:
        # Limpa hooks
        t_handle.remove()
        s_handle.remove()

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================================================
    # PRÉ-PROCESSAMENTO (igual ao baseline)
    # ============================================================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================================================
    # 1) Teacher: treino e avaliação (FP32)
    # ============================================================
    teacher = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando TEACHER (baseline) ===")
    train_supervised(teacher, train_loader, epochs=EPOCHS_TEACHER, lr=LR_TEACHER, label="Teacher")
    evaluate_model(teacher, test_loader, label="Teacher (FP32)")

    # ============================================================
    # 2) Student: KD (Response + Feature 1×1)
    # ============================================================
    student = CNNRegressorStudent(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando STUDENT com Feature KD (1×1) + Response KD ===")
    train_student_kd_feature(student, teacher, train_loader,
                             epochs=EPOCHS_STUDENT, lr=LR_STUDENT,
                             alpha=ALPHA, beta=BETA, delta=HUBER_DELTA)

    evaluate_model(student, test_loader,
                   label=f"Student (Feature KD + Response KD, α={ALPHA}, β={BETA}, δ={HUBER_DELTA})")

if __name__ == "__main__":
    main()



=== Treinando TEACHER (baseline) ===
[Teacher] Epoch 01/100 - MSE: 6.248398
[Teacher] Epoch 05/100 - MSE: 0.270467
[Teacher] Epoch 10/100 - MSE: 0.262171
[Teacher] Epoch 15/100 - MSE: 0.258560
[Teacher] Epoch 20/100 - MSE: 0.264250
[Teacher] Epoch 25/100 - MSE: 0.264546
[Teacher] Epoch 30/100 - MSE: 0.257848
[Teacher] Epoch 35/100 - MSE: 0.268403
[Teacher] Epoch 40/100 - MSE: 0.255421
[Teacher] Epoch 45/100 - MSE: 0.264791
[Teacher] Epoch 50/100 - MSE: 0.259054
[Teacher] Epoch 55/100 - MSE: 0.253974
[Teacher] Epoch 60/100 - MSE: 0.256649
[Teacher] Epoch 65/100 - MSE: 0.261080
[Teacher] Epoch 70/100 - MSE: 0.255296
[Teacher] Epoch 75/100 - MSE: 0.252065
[Teacher] Epoch 80/100 - MSE: 0.255939
[Teacher] Epoch 85/100 - MSE: 0.253428
[Teacher] Epoch 90/100 - MSE: 0.254143
[Teacher] Epoch 95/100 - MSE: 0.256427
[Teacher] Epoch 100/100 - MSE: 0.253061

📊 Teacher (FP32):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.448194 s
Tempo médio por amostra:   0.0746 ms

=== T

In [16]:
# cnn_regressao_kd_rkd.py
# ============================================================
# Regressão "Precipitação dia" - Knowledge Distillation (RKD)
# - Teacher = sua CNN baseline (FP32)
# - Student = CNN menor
# - Loss total:
#   L = α*Huber(y, y_s) + (1-α)*Huber(y_t, y_s) + β*L_RKD
#   onde L_RKD = L_dist(pairwise) + γ*L_angle(sampled triplets)
# ============================================================

import time
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# ----------------------- Configuração -----------------------
CSV_PATH = "bd_EstacaoVargemFria_e_Pesca.csv"

EPOCHS_TEACHER = 100
LR_TEACHER = 1e-3

EPOCHS_STUDENT = 60
LR_STUDENT = 1e-3

BATCH_SIZE = 64
SEED = 42

# Pesos das perdas
ALPHA = 0.5        # supervisionada vs resposta do teacher
BETA = 0.1         # peso do termo RKD
GAMMA = 1.0        # peso relativo do termo de ângulo dentro do RKD
HUBER_DELTA = 1.0

# Triplets por minibatch para o termo de ângulo (amostragem)
MAX_TRIPLETS = 2048

DEVICE = torch.device("cpu")  # CPU

# ----------------------- Utilidades -----------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def to_cnn1d_shape(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return x.reshape(x.shape[0], 1, x.shape[1])  # (N, 1, F)

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_pred - y_true)))

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_pred - y_true) ** 2)))

def r2_score_np(y_true, y_pred):
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return float(1.0 - ss_res / ss_tot) if ss_tot > 0 else float("nan")

# ----------------------- Dataset -----------------------
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)                         # (N, 1, F)
        self.y = torch.from_numpy(y.astype(np.float32).reshape(-1))  # (N,)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------- Modelos -----------------------
class CNNRegressor(nn.Module):
    """Teacher (baseline)"""
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, padding=1),  # 0
            nn.ReLU(),                                                           # 1
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1), # 2
            nn.ReLU(),                                                           # 3
            nn.Flatten(),                                                        # 4  <- EMBEDDING AQUI
            nn.Linear(16 * length, 32),                                          # 5
            nn.ReLU(),                                                           # 6
            nn.Linear(32, 1)                                                     # 7
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

class CNNRegressorStudent(nn.Module):
    """Student menor (menos canais e hidden reduzido)"""
    def __init__(self, in_channels=1, length=5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=4, kernel_size=3, padding=1),  # 0
            nn.ReLU(),                                                           # 1
            nn.Conv1d(in_channels=4, out_channels=8, kernel_size=3, padding=1),  # 2
            nn.ReLU(),                                                           # 3
            nn.Flatten(),                                                        # 4  <- EMBEDDING AQUI
            nn.Linear(8 * length, 16),                                           # 5
            nn.ReLU(),                                                           # 6
            nn.Linear(16, 1)                                                     # 7
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

# ----------------------- Métricas -----------------------
def evaluate_model(model, loader, label="Avaliação"):
    model.eval()
    y_true, y_pred = [], []
    t0 = time.time()
    with torch.no_grad():
        for xb, yb in loader:
            preds = model(xb).cpu().numpy()
            y_pred.append(preds)
            y_true.append(yb.cpu().numpy())
    t1 = time.time()

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    _mae = mae(y_true, y_pred)
    _rmse = rmse(y_true, y_pred)
    _r2 = r2_score_np(y_true, y_pred)
    total_time = t1 - t0
    time_per_sample_ms = (total_time / len(loader.dataset)) * 1000.0

    print(f"\n📊 {label}:")
    print(f"MAE:   {_mae:.4f}")
    print(f"RMSE:  {_rmse:.4f}")
    print(f"R²:    {_r2:.4f}")
    print(f"Tempo total de inferência: {total_time:.6f} s")
    print(f"Tempo médio por amostra:   {time_per_sample_ms:.4f} ms")

    return _mae, _rmse, _r2, time_per_sample_ms

# ----------------------- RKD helpers -----------------------
def pairwise_distance_matrix(x: torch.Tensor, eps: float = 1e-12):
    # x: (B, D) -> D_ij = ||x_i - x_j||
    d = torch.cdist(x, x)  # (B, B)
    mean = d[d > 0].mean() if (d > 0).any() else torch.tensor(1.0, device=x.device)
    return d / (mean + eps)

def rkd_distance_loss(e_s: torch.Tensor, e_t: torch.Tensor):
    ds = pairwise_distance_matrix(e_s)
    dt = pairwise_distance_matrix(e_t)
    # compara apenas parte superior (i<j) para não duplicar
    idx = torch.triu_indices(ds.size(0), ds.size(1), offset=1)
    return nn.functional.mse_loss(ds[idx[0], idx[1]], dt[idx[0], idx[1]])

def sample_triplets(B: int, max_triplets: int, device):
    # amostra tríades (i, j, k) distintas
    import math, random
    n_all = B * (B - 1) * (B - 2) // 6  # aprox (i<j<k)
    t = min(max_triplets, n_all) if n_all > 0 else 0
    if t == 0:
        return None
    triplets = set()
    while len(triplets) < t:
        i, j, k = np.random.choice(B, 3, replace=False)
        triplets.add(tuple(sorted((int(i), int(j), int(k)))))
    triplets = np.array(list(triplets), dtype=np.int64)
    return torch.from_numpy(triplets).to(device)

def rkd_angle_loss(e_s: torch.Tensor, e_t: torch.Tensor, max_triplets=2048):
    """
    Define ângulos usando tríades (i, j, k): ∠(x_j - x_i, x_k - x_i)
    Compara cosenos dos ângulos (teacher vs student).
    """
    B = e_s.size(0)
    tri = sample_triplets(B, max_triplets, e_s.device)
    if tri is None:
        return torch.tensor(0.0, device=e_s.device)

    i = tri[:, 0]; j = tri[:, 1]; k = tri[:, 2]

    def angle_cos(e):
        v1 = e[j] - e[i]  # (t, D)
        v2 = e[k] - e[i]
        v1 = nn.functional.normalize(v1, dim=1)
        v2 = nn.functional.normalize(v2, dim=1)
        return (v1 * v2).sum(dim=1)  # cosenos (t,)
    cs = angle_cos(e_s)
    ct = angle_cos(e_t)
    return nn.functional.mse_loss(cs, ct)

# ----------------------- Treinos -----------------------
def train_supervised(model, loader, epochs, lr, label="Train"):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(1, epochs + 1):
        epoch_loss = 0.0
        for xb, yb in loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * xb.size(0)
        epoch_loss /= len(loader.dataset)
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"[{label}] Epoch {epoch:02d}/{epochs} - MSE: {epoch_loss:.6f}")

def train_student_kd_rkd(student, teacher, loader, epochs, lr,
                         alpha=0.5, beta=0.1, gamma=1.0, delta=1.0,
                         max_triplets=2048):
    """
    L = α*Huber(y, y_s) + (1-α)*Huber(y_t, y_s) + β*( L_dist + γ*L_angle )
    - Embeddings: saída do Flatten (índice 4 nos sequentials)
    """
    huber = nn.HuberLoss(delta=delta)
    optimizer = optim.Adam(student.parameters(), lr=lr)

    # Hooks para capturar embeddings (pós Flatten)
    t_emb = {"val": None}
    s_emb = {"val": None}
    def t_hook(_m, _i, o): t_emb.update(val=o.detach())
    def s_hook(_m, _i, o): s_emb.update(val=o)

    t_handle = teacher.net[4].register_forward_hook(t_hook)
    s_handle = student.net[4].register_forward_hook(s_hook)

    # Teacher congelado
    teacher.eval()
    for p in teacher.parameters():
        p.requires_grad = False

    try:
        for epoch in range(1, epochs + 1):
            student.train()
            run_loss = 0.0

            for xb, yb in loader:
                with torch.no_grad():
                    y_teacher = teacher(xb)

                y_student = student(xb)

                # KD de resposta (Huber)
                loss_sup  = huber(y_student, yb)
                loss_distill = huber(y_student, y_teacher)

                # RKD: usa embeddings capturados pelos hooks
                if (t_emb["val"] is None) or (s_emb["val"] is None):
                    with torch.no_grad():
                        teacher(xb)
                    _ = student(xb)

                e_t = t_emb["val"]  # (B, D_t)
                e_s = s_emb["val"]  # (B, D_s)

                # Distâncias (pairwise)
                Ld = rkd_distance_loss(e_s, e_t)

                # Ângulos (tríades amostradas)
                La = rkd_angle_loss(e_s, e_t, max_triplets=max_triplets)

                L_rkd = Ld + gamma * La

                loss = alpha * loss_sup + (1.0 - alpha) * loss_distill + beta * L_rkd

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                run_loss += loss.item() * xb.size(0)

            run_loss /= len(loader.dataset)
            if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
                print(f"[KD-RKD] Epoch {epoch:02d}/{epochs} - Loss: {run_loss:.6f}")

    finally:
        t_handle.remove()
        s_handle.remove()

# ----------------------- Main -----------------------
def main():
    set_seed(SEED)

    # ============================
    # PRÉ-PROCESSAMENTO (original)
    # ============================
    df = pd.read_csv(CSV_PATH)
    df = df[df['Nome'].str.strip() == 'Estação Pesca - UFRPE']
    df['Data estação'] = pd.to_datetime(df['Data estação'], errors='coerce')
    df = df.sort_values('Data estação')
    df = df.interpolate(method='linear', limit_direction='forward')
    df['Precipitação anterior'] = df['Precipitação dia'].shift(1)
    df = df.dropna()

    colunas_features = ['Temperatura', 'Umidade', 'Velocidade Vento', 'Rajada Vento', 'Precipitação anterior']
    coluna_saida = 'Precipitação dia'
    df = df[colunas_features + [coluna_saida]]

    X = df[colunas_features].values
    y = df[coluna_saida].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=SEED
    )

    X_train_cnn = to_cnn1d_shape(X_train)   # (N, 1, 5)
    X_test_cnn  = to_cnn1d_shape(X_test)    # (N, 1, 5)

    train_ds = WeatherDataset(X_train_cnn, y_train)
    test_ds  = WeatherDataset(X_test_cnn,  y_test)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

    # ============================
    # 1) Teacher: treino + avaliação
    # ============================
    teacher = CNNRegressor(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando TEACHER (baseline) ===")
    train_supervised(teacher, train_loader, epochs=EPOCHS_TEACHER, lr=LR_TEACHER, label="Teacher")
    evaluate_model(teacher, test_loader, label="Teacher (FP32)")

    # ============================
    # 2) Student: RKD (Response + Distância + Ângulo)
    # ============================
    student = CNNRegressorStudent(in_channels=1, length=X_train_cnn.shape[2]).to(DEVICE)
    print("\n=== Treinando STUDENT com RKD (Response + Distance + Angle) ===")
    train_student_kd_rkd(
        student, teacher, train_loader,
        epochs=EPOCHS_STUDENT, lr=LR_STUDENT,
        alpha=ALPHA, beta=BETA, gamma=GAMMA, delta=HUBER_DELTA,
        max_triplets=MAX_TRIPLETS
    )
    evaluate_model(student, test_loader,
                   label=f"Student (RKD: α={ALPHA}, β={BETA}, γ={GAMMA}, δ={HUBER_DELTA})")

if __name__ == "__main__":
    main()



=== Treinando TEACHER (baseline) ===
[Teacher] Epoch 01/100 - MSE: 6.248398
[Teacher] Epoch 05/100 - MSE: 0.270467
[Teacher] Epoch 10/100 - MSE: 0.262171
[Teacher] Epoch 15/100 - MSE: 0.258560
[Teacher] Epoch 20/100 - MSE: 0.264250
[Teacher] Epoch 25/100 - MSE: 0.264546
[Teacher] Epoch 30/100 - MSE: 0.257848
[Teacher] Epoch 35/100 - MSE: 0.268403
[Teacher] Epoch 40/100 - MSE: 0.255421
[Teacher] Epoch 45/100 - MSE: 0.264791
[Teacher] Epoch 50/100 - MSE: 0.259054
[Teacher] Epoch 55/100 - MSE: 0.253974
[Teacher] Epoch 60/100 - MSE: 0.256649
[Teacher] Epoch 65/100 - MSE: 0.261080
[Teacher] Epoch 70/100 - MSE: 0.255296
[Teacher] Epoch 75/100 - MSE: 0.252065
[Teacher] Epoch 80/100 - MSE: 0.255939
[Teacher] Epoch 85/100 - MSE: 0.253428
[Teacher] Epoch 90/100 - MSE: 0.254143
[Teacher] Epoch 95/100 - MSE: 0.256427
[Teacher] Epoch 100/100 - MSE: 0.253061

📊 Teacher (FP32):
MAE:   0.1425
RMSE:  1.0632
R²:    0.9332
Tempo total de inferência: 0.185584 s
Tempo médio por amostra:   0.0309 ms

=== T