<a href="https://colab.research.google.com/github/dastias/Projeto-doutorado/blob/main/vibration_analysis_aero.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install PyWavelets



In [None]:
# vibration_analysis_aero.py - Código completo corrigido

import os, math
import numpy as np
import pywt
from scipy import signal

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


class SignalPreprocessor:
    def __init__(self, window_size=1024, overlap=0.5, sampling_rate=12800):
        self.window_size = window_size
        self.overlap = overlap
        self.sampling_rate = sampling_rate

    def remove_noise(self, data: np.ndarray) -> np.ndarray:
        coeffs = pywt.wavedec(data, 'db4', level=4)
        thr = np.median(np.abs(coeffs[-1])) / 0.6745
        for i in range(1, len(coeffs)):
            coeffs[i] = pywt.threshold(coeffs[i], thr, mode='soft')
        return pywt.waverec(coeffs, 'db4')

    def generate_spectrogram(self, data: np.ndarray) -> np.ndarray:
        nperseg = self.window_size
        noverlap = int(nperseg * self.overlap)
        if noverlap >= nperseg:
            noverlap = nperseg - 1
        _, _, Sxx = signal.spectrogram(
            data,
            fs=self.sampling_rate,
            window='hann',
            nperseg=nperseg,
            noverlap=noverlap
        )
        Sxx = 10 * np.log10(Sxx + 1e-10)
        return (Sxx - Sxx.min()) / (Sxx.max() - Sxx.min() + 1e-10)

class UnsupervisedAeroDataset(Dataset):
    def __init__(self, csv_paths, transform=None):
        self.transform = transform
        self.pre = SignalPreprocessor()
        self.samples = []
        self.raw_signals = []

        for path in csv_paths:
            data = np.loadtxt(path, delimiter=',')
            for row in data:
                x = row[2:]
                den = self.pre.remove_noise(x)
                spec = self.pre.generate_spectrogram(den)
                tensor = torch.tensor(spec).float().unsqueeze(0)
                if self.transform:
                    tensor = self.transform(tensor)
                self.samples.append(tensor)
                self.raw_signals.append(x)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx], self.raw_signals[idx], self.pre.remove_noise(self.raw_signals[idx])

class VibrationAnalysisCNN(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))
        )
        self.classifier = nn.Sequential(
            nn.Linear(128, 256), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = x.float()
        x = self.features(x).view(x.size(0), -1)
        return self.classifier(x)

class LoRAAdapter(nn.Module):
    def __init__(self, layer: nn.Linear, rank=4, scale=0.01):
        super().__init__()
        self.layer = layer
        self.layer.weight.requires_grad = False
        if self.layer.bias is not None:
            self.layer.bias.requires_grad = False

        in_f, out_f = layer.in_features, layer.out_features
        self.A = nn.Parameter(torch.zeros(in_f, rank))
        self.B = nn.Parameter(torch.zeros(rank, out_f))
        self.scale = scale
        nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))
        nn.init.zeros_(self.B)

    def forward(self, x):
        return self.layer(x) + (x @ self.A @ self.B) * self.scale

class SimCLRModelLoRA(nn.Module):
    def __init__(self, projection_dim=128, num_classes=2, lora_rank=4, lora_scale=0.01):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))
        )
        self.projection = nn.Sequential(
            nn.Linear(128, 256), nn.ReLU(), nn.Linear(256, projection_dim)
        )
        self.classifier = nn.Sequential(
            LoRAAdapter(nn.Linear(128, 256), rank=lora_rank, scale=lora_scale),
            nn.ReLU(), nn.Dropout(0.5),
            LoRAAdapter(nn.Linear(256, num_classes), rank=lora_rank, scale=lora_scale)
        )

    def forward(self, x):
        x = x.float()
        h = self.encoder(x).view(x.size(0), -1)
        return self.projection(h)

    def forward_classifier(self, x):
        x = x.float()
        h = self.encoder(x).view(x.size(0), -1)
        return self.classifier(h)

class ModelAgent:
    def __init__(self):
        self.models = {}
        self.scores = {}

    def update_models(self, models: dict):
        self.models = models
        self.scores = {k: 1.0 for k in models}

    def update_performance(self, name, score):
        self.scores[name] = 0.9 * self.scores[name] + 0.1 * score

    def select_best_model(self):
        return max(self.scores, key=self.scores.get)

class VibrationAnalyzer:
    def __init__(self, config, agent):
        self.config = config
        self.agent = agent
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.pre = SignalPreprocessor()

    def pretrain_simclr(self, loader):
        model = self.agent.models['simclr_lora'].to(self.device)
        opt = torch.optim.Adam(model.parameters(), lr=self.config['learning_rate'])
        model.train()
        for _ in range(self.config['simclr_epochs']):
            for spec, _, _ in loader:
                spec = spec.to(self.device).float()
                opt.zero_grad()
                loss = torch.norm(model(spec), dim=1).mean()
                loss.backward()
                opt.step()

    def extract_embeddings(self, loader):
        model = self.agent.models['simclr_lora'].to(self.device).eval()
        embs = []
        with torch.no_grad():
            for spec, _, _ in loader:
                embs.append(model(spec.to(self.device).float()).cpu().numpy())
        return np.vstack(embs)

    def train_and_eval_fold(self, train_ds, val_ds, fold_idx):
        # etapa temporária para extrair embeddings com modelo default
        self.agent.update_models({
            'simclr_lora': SimCLRModelLoRA(projection_dim=128, num_classes=2)
        })
        loader_all = DataLoader(train_ds, batch_size=self.config['batch_size'], shuffle=False)
        embs = self.extract_embeddings(loader_all)

        best_k, best_s = 2, -1
        for k in range(2, 11):
            km = KMeans(n_clusters=k, random_state=42).fit(embs)
            s = silhouette_score(embs, km.labels_)
            if s > best_s:
                best_k, best_s = k, s

        self.agent.update_models({
            'cnn': VibrationAnalysisCNN(num_classes=best_k),
            'simclr_lora': SimCLRModelLoRA(projection_dim=128, num_classes=best_k)
        })

        self.pretrain_simclr(DataLoader(train_ds, batch_size=self.config['batch_size'], shuffle=True))

        km = KMeans(n_clusters=best_k, random_state=42).fit(embs)
        pseudo = km.labels_

        class PLDS(Dataset):
            def __init__(self, base_ds, labels):
                self.base, self.labels = base_ds, labels
            def __len__(self): return len(self.base)
            def __getitem__(self, i):
                spec, raw, den = self.base[i]
                return spec, self.labels[i], raw, den

        sup_tr = PLDS(train_ds, pseudo)
        sup_va = PLDS(val_ds, pseudo)

        for name, model in self.agent.models.items():
            model = model.to(self.device)
            opt = torch.optim.Adam(model.parameters(), lr=self.config['learning_rate'])
            loss_fn = nn.CrossEntropyLoss()

            history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
            for ep in range(self.config['epochs']):
                model.train(); tl = 0
                for spec, label, _, _ in DataLoader(sup_tr, batch_size=self.config['batch_size'], shuffle=True):
                    spec = spec.to(self.device).float()
                    label = label.to(self.device).long()
                    opt.zero_grad()
                    out = model(spec) if name == 'cnn' else model.forward_classifier(spec)
                    loss = loss_fn(out, label)
                    loss.backward(); opt.step()
                    tl += loss.item()
                history['train_loss'].append(tl / len(sup_tr))

                model.eval(); vl, corr = 0, 0
                with torch.no_grad():
                    for spec, label, _, _ in DataLoader(sup_va, batch_size=self.config['batch_size']):
                        spec = spec.to(self.device).float()
                        label = label.to(self.device).long()
                        out = model(spec) if name == 'cnn' else model.forward_classifier(spec)
                        vl += loss_fn(out, label).item()
                        preds = out.argmax(dim=1)
                        corr += (preds == label).sum().item()
                history['val_loss'].append(vl / len(sup_va))
                history['val_acc'].append(corr / len(sup_va))

            # métricas finais
            all_preds, all_labels = [], []
            with torch.no_grad():
                for spec, label, _, _ in DataLoader(sup_va, batch_size=self.config['batch_size']):
                    out = model(spec.to(self.device).float()) if name == 'cnn' else model.forward_classifier(spec.to(self.device).float())
                    preds = out.argmax(dim=1).cpu().numpy()
                    labs = label.cpu().numpy()
                    all_preds.extend(preds)
                    all_labels.extend(labs)

            metrics = {
                'accuracy': accuracy_score(all_labels, all_preds),
                'precision': precision_score(all_labels, all_preds, average='weighted', zero_division=0),
                'recall': recall_score(all_labels, all_preds, average='weighted', zero_division=0),
                'f1': f1_score(all_labels, all_preds, average='weighted', zero_division=0)
            }
            print(f"{name} – acc {metrics['accuracy']:.3f}, prec {metrics['precision']:.3f}, rec {metrics['recall']:.3f}, f1 {metrics['f1']:.3f}")
            self.agent.update_performance(name, metrics['f1'])
            pd.DataFrame([metrics]).to_csv(os.path.join(self.config['output_dir'], f'fold{fold_idx}_{name}_metrics.csv'), index=False)
            dfh = pd.DataFrame(history)
            dfh['epoch'] = np.arange(1, self.config['epochs'] + 1)
            dfh.to_csv(os.path.join(self.config['output_dir'], f'fold{fold_idx}_{name}_history.csv'), index=False)
            plt.figure()
            plt.plot(dfh['epoch'], dfh['train_loss'], label='Train Loss')
            plt.plot(dfh['epoch'], dfh['val_loss'], label='Val Loss')
            plt.legend(); plt.title(f'Loss Curve – {name}')
            plt.savefig(os.path.join(self.config['output_dir'], f'fold{fold_idx}_{name}_loss.png'))
            plt.close()

            plt.figure()
            plt.plot(dfh['epoch'], dfh['val_acc'], label='Val Accuracy')
            plt.legend(); plt.title(f'Accuracy Curve – {name}')
            plt.savefig(os.path.join(self.config['output_dir'], f'fold{fold_idx}_{name}_acc.png'))
            plt.close()

            cm = confusion_matrix(all_labels, all_preds)
            plt.figure(figsize=(6,5))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title(f'Matriz de Confusão – {name}')
            plt.xlabel('Predito')
            plt.ylabel('Real')
            plt.savefig(os.path.join(self.config['output_dir'], f'fold{fold_idx}_{name}_cm.png'))
            plt.close()

            torch.save(model.state_dict(), os.path.join(self.config['output_dir'], f'fold{fold_idx}_{name}_model.pt'))

    def cross_validate(self, dataset):
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        for i, (tr, va) in enumerate(kf.split(dataset)):
            self.train_and_eval_fold(Subset(dataset, tr), Subset(dataset, va), i)

if __name__ == '__main__':
    from torchvision import transforms

    config = {
        'learning_rate': 1e-3,
        'simclr_epochs': 5,
        'batch_size': 32,
        'epochs': 10,
        'output_dir': '/content/drive/My Drive/aero_results/'
    }
    os.makedirs(config['output_dir'], exist_ok=True)

    tfm = transforms.Normalize([0.5], [0.5])
    csvs = [
        '/content/drive/My Drive/DATASET_AERO_SWEDEN/DATASET06.csv',
        '/content/drive/My Drive/DATASET_AERO_SWEDEN/DATASET07.csv'
    ]
    ds = UnsupervisedAeroDataset(csvs, transform=tfm)

    agent = ModelAgent()
    analyzer = VibrationAnalyzer(config, agent)
    analyzer.cross_validate(ds)


cnn – acc 0.562, prec 0.612, rec 0.562, f1 0.582
simclr_lora – acc 0.578, prec 0.603, rec 0.578, f1 0.589
cnn – acc 0.501, prec 0.545, rec 0.501, f1 0.514
simclr_lora – acc 0.517, prec 0.549, rec 0.517, f1 0.528
cnn – acc 0.547, prec 0.603, rec 0.547, f1 0.567
simclr_lora – acc 0.557, prec 0.595, rec 0.557, f1 0.572
cnn – acc 0.588, prec 0.639, rec 0.588, f1 0.608
simclr_lora – acc 0.589, prec 0.641, rec 0.589, f1 0.609
cnn – acc 0.542, prec 0.617, rec 0.542, f1 0.570
simclr_lora – acc 0.559, prec 0.617, rec 0.559, f1 0.582


In [None]:
# vibration_analysis_aero.py - Código completo corrigido e com sugestões implementadas

import os
import math
import numpy as np
import pywt
from scipy import signal

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from torchvision import transforms # Movido para o topo para melhor organização

# Helper para garantir que os diretórios de output existem
def ensure_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

class SignalPreprocessor:
    def __init__(self, window_size=1024, overlap=0.5, sampling_rate=12800):
        self.window_size = window_size
        self.overlap = overlap
        self.sampling_rate = sampling_rate

    def remove_noise(self, data: np.ndarray) -> np.ndarray:
        coeffs = pywt.wavedec(data, 'db4', level=4)
        sigma = np.median(np.abs(coeffs[-1])) / 0.6745
        thr = sigma * np.sqrt(2 * np.log(len(data))) if len(data) > 1 else 0 # Limiar de Donoho
        for i in range(1, len(coeffs)):
            coeffs[i] = pywt.threshold(coeffs[i], thr, mode='soft')
        reconstructed_signal = pywt.waverec(coeffs, 'db4')
        if len(reconstructed_signal) != len(data):
            reconstructed_signal = reconstructed_signal[:len(data)]
        return reconstructed_signal

    def generate_spectrogram(self, data: np.ndarray) -> np.ndarray:
        nperseg = self.window_size
        noverlap = int(nperseg * self.overlap)
        if noverlap >= nperseg:
            noverlap = nperseg - 1

        if len(data) < nperseg:
            padding_length = nperseg - len(data)
            data = np.pad(data, (0, padding_length), 'constant', constant_values=(0,0))

        _, _, Sxx = signal.spectrogram(
            data,
            fs=self.sampling_rate,
            window='hann',
            nperseg=nperseg,
            noverlap=noverlap
        )
        Sxx = 10 * np.log10(Sxx + 1e-10)
        min_sxx = Sxx.min()
        max_sxx = Sxx.max()
        if max_sxx - min_sxx < 1e-10:
            return np.zeros_like(Sxx)
        return (Sxx - min_sxx) / (max_sxx - min_sxx + 1e-10)

class UnsupervisedAeroDataset(Dataset):
    def __init__(self, csv_paths, signal_preprocessor: SignalPreprocessor, transform=None):
        self.transform = transform
        self.pre = signal_preprocessor
        self.samples = []

        print(f"Carregando e pré-processando dados de: {csv_paths}")
        for path_idx, path in enumerate(csv_paths):
            try:
                raw_data_file = pd.read_csv(path, header=None, low_memory=False)
                for row_idx, row_series in raw_data_file.iterrows():
                    try:
                        signal_data = pd.to_numeric(row_series.iloc[2:], errors='coerce').to_numpy()
                        signal_data = signal_data[~np.isnan(signal_data)]

                        if len(signal_data) < self.pre.window_size :
                            continue

                        denoised_signal = self.pre.remove_noise(signal_data)

                        if len(denoised_signal) < self.pre.window_size:
                             continue

                        spectrogram = self.pre.generate_spectrogram(denoised_signal)
                        tensor = torch.tensor(spectrogram, dtype=torch.float32).unsqueeze(0)

                        if self.transform:
                            tensor = self.transform(tensor)

                        self.samples.append(tensor)
                    except Exception as e:
                        print(f"Erro processando linha {row_idx+1} do arquivo {path}: {e}. Pulando linha.")
                        continue
            except Exception as e:
                print(f"Erro lendo ou processando arquivo CSV {path}: {e}. Pulando arquivo.")
                continue
        print(f"Total de {len(self.samples)} amostras carregadas.")
        if not self.samples:
            raise ValueError("Nenhuma amostra foi carregada. Verifique os arquivos CSV e o pré-processamento.")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]


class PLDS(Dataset): # Pseudo-Label Dataset
    def __init__(self, base_subset: Subset, labels: np.ndarray):
        self.base_subset = base_subset
        self.labels = labels
        assert len(self.base_subset) == len(self.labels), \
            f"Dataset (len: {len(self.base_subset)}) e labels (len: {len(self.labels)}) devem ter o mesmo tamanho."

    def __len__(self):
        return len(self.base_subset)

    def __getitem__(self, idx):
        spectrogram_tensor = self.base_subset[idx]
        label = self.labels[idx]
        # Retorna apenas os dados relevantes
        return spectrogram_tensor, torch.tensor(label, dtype=torch.long)


class VibrationAnalysisCNN(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1))
        )
        self.classifier = nn.Sequential(
            nn.Linear(128, 256), nn.ReLU(inplace=True), nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)


class LoRAAdapter(nn.Module):
    def __init__(self, layer: nn.Linear, rank=4, scale=0.01):
        super().__init__()
        self.layer = layer
        self.layer.weight.requires_grad = False
        if self.layer.bias is not None:
            self.layer.bias.requires_grad = False

        in_f, out_f = layer.in_features, layer.out_features
        self.A = nn.Parameter(torch.Tensor(in_f, rank))
        self.B = nn.Parameter(torch.Tensor(rank, out_f))
        self.scale = scale

        nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))
        nn.init.zeros_(self.B)

    def forward(self, x):
        return self.layer(x) + (x @ self.A @ self.B) * self.scale


class SimCLRModelLoRA(nn.Module):
    def __init__(self, projection_dim=128, num_classes=2, lora_rank=4, lora_scale=0.01):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1))
        )
        self.projection = nn.Sequential(
            nn.Linear(128, 256), nn.ReLU(inplace=True),
            nn.Linear(256, projection_dim)
        )
        self.classifier = nn.Sequential(
            LoRAAdapter(nn.Linear(128, 256), rank=lora_rank, scale=lora_scale),
            nn.ReLU(inplace=True), nn.Dropout(0.5),
            LoRAAdapter(nn.Linear(256, num_classes), rank=lora_rank, scale=lora_scale)
        )

    def forward_encoder_projection(self, x):
        h = self.encoder(x)
        h = h.view(h.size(0), -1)
        return self.projection(h)

    def forward_classifier(self, x):
        h = self.encoder(x)
        h = h.view(h.size(0), -1)
        return self.classifier(h)


class ModelAgent:
    def __init__(self):
        self.models = {}
        self.scores = {}

    def update_models(self, models: dict):
        self.models = models
        self.scores = {name: 0.0 for name in models}

    def update_performance(self, model_name, score):
        self.scores[model_name] = score

    def get_best_model_name(self):
        if not self.scores:
            return None
        return max(self.scores, key=self.scores.get)


class VibrationAnalyzer:
    def __init__(self, config, agent: ModelAgent, signal_preprocessor: SignalPreprocessor):
        self.config = config
        self.agent = agent
        self.signal_preprocessor = signal_preprocessor
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Usando dispositivo: {self.device}")
        ensure_dir(self.config['output_dir'])

    def _train_encoder_module(self, model_to_train: SimCLRModelLoRA, loader: DataLoader, epochs: int, learning_rate: float):
        print(f"Iniciando pré-treinamento do encoder por {epochs} épocas...")
        model_to_train.to(self.device)
        optimizer = torch.optim.Adam(list(model_to_train.encoder.parameters()) + list(model_to_train.projection.parameters()), lr=learning_rate)

        model_to_train.train()
        for epoch in range(epochs):
            total_loss = 0
            for batch_idx, spectrograms in enumerate(loader): # CORRIGIDO AQUI
                spectrograms = spectrograms.to(self.device)

                optimizer.zero_grad()
                projections = model_to_train.forward_encoder_projection(spectrograms)
                loss = torch.norm(projections, p=2, dim=1).mean()
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            avg_loss = total_loss / len(loader) if len(loader) > 0 else 0
            print(f"Encoder Pretrain Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
        print("Pré-treinamento do encoder concluído.")

    def _extract_embeddings_from_module(self, model_extractor: SimCLRModelLoRA, loader: DataLoader) -> np.ndarray:
        model_extractor.to(self.device)
        model_extractor.eval()
        all_embeddings = []
        with torch.no_grad():
            for spectrograms in loader: # CORRIGIDO AQUI
                spectrograms = spectrograms.to(self.device)
                embeddings = model_extractor.forward_encoder_projection(spectrograms)
                all_embeddings.append(embeddings.cpu().numpy())
        if not all_embeddings: # Lidar com caso de lista vazia
            return np.array([])
        return np.vstack(all_embeddings)

    def train_and_eval_fold(self, train_subset: Subset, val_subset: Subset, fold_idx: int):
        print(f"\n--- Fold {fold_idx + 1} ---")

        # ETAPA 1: Pré-treinamento do Encoder para Extração de Embeddings
        print(f"Fold {fold_idx + 1}: (Etapa 1) Pré-treinando encoder para K-Means...")
        encoder_trainer_model = SimCLRModelLoRA(
            projection_dim=self.config['projection_dim_simclr'],
            num_classes=2
        ).to(self.device)

        pretrain_loader = DataLoader(train_subset, batch_size=self.config['batch_size'], shuffle=True,
                                     num_workers=self.config.get('num_workers', 0), pin_memory=True) # num_workers=0 para debug no Colab se necessário

        if len(pretrain_loader) > 0:
            self._train_encoder_module(encoder_trainer_model, pretrain_loader,
                                    self.config['simclr_epochs_for_kmeans'], self.config['learning_rate_simclr_kmeans'])
        else:
            print(f"Fold {fold_idx + 1}: Pretrain_loader vazio. Pulando pré-treinamento do encoder.")


        # ETAPA 2: Extração de Embeddings com o Encoder Pré-treinado
        print(f"Fold {fold_idx + 1}: (Etapa 2) Extraindo embeddings...")
        emb_loader_train = DataLoader(train_subset, batch_size=self.config['batch_size'], shuffle=False,
                                      num_workers=self.config.get('num_workers', 0), pin_memory=True)
        emb_loader_val = DataLoader(val_subset, batch_size=self.config['batch_size'], shuffle=False,
                                    num_workers=self.config.get('num_workers', 0), pin_memory=True)

        train_embeddings = self._extract_embeddings_from_module(encoder_trainer_model, emb_loader_train)
        val_embeddings = self._extract_embeddings_from_module(encoder_trainer_model, emb_loader_val)

        if len(train_embeddings) == 0:
            print(f"Fold {fold_idx + 1}: Nenhum embedding de treino extraído. Pulando fold.")
            return

        # ETAPA 3: K-Means para Número de Classes e Pseudo-Rótulos
        print(f"Fold {fold_idx + 1}: (Etapa 3) Executando K-Means...")
        best_k, best_silhouette_score = 2, -1.0

        max_possible_k = min(self.config['max_k_means_clusters'], len(train_embeddings))

        if max_possible_k < 2:
            print(f"Fold {fold_idx + 1}: Número insuficiente de amostras de treino ({len(train_embeddings)}) para K-Means com k >= 2. Usando k=2 por padrão.")
            best_k = 2
            if len(train_embeddings) < 2 :
                 print(f"Fold {fold_idx + 1}: Menos de 2 amostras de treino. Impossível prosseguir com classificação. Pulando fold.")
                 return
        else:
            for k_try in range(2, max_possible_k + 1):
                try:
                    kmeans = KMeans(n_clusters=k_try, random_state=self.config['random_seed'], n_init='auto').fit(train_embeddings)
                    if len(np.unique(kmeans.labels_)) > 1:
                        score = silhouette_score(train_embeddings, kmeans.labels_)
                        if score > best_silhouette_score:
                            best_silhouette_score = score
                            best_k = k_try
                except Exception as e:
                    print(f"Erro durante K-Means ou Silhouette para k={k_try}: {e}")
                    continue
        print(f"Fold {fold_idx + 1}: Número de classes estimado (best_k) = {best_k} (Silhouette: {best_silhouette_score:.3f})")

        final_kmeans = KMeans(n_clusters=best_k, random_state=self.config['random_seed'], n_init='auto').fit(train_embeddings)
        train_pseudo_labels = final_kmeans.labels_

        val_pseudo_labels = np.array([])
        if len(val_embeddings) > 0:
            try:
                val_pseudo_labels = final_kmeans.predict(val_embeddings)
            except Exception as e: # Kmeans pode não conseguir prever se val_embeddings for muito diferente
                print(f"Erro ao prever pseudo-rótulos de validação: {e}. Validação será limitada.")
        else:
            print(f"Fold {fold_idx + 1}: Nenhum embedding de validação. Validação será limitada.")


        # ETAPA 4: Instanciar e Treinar Modelos Finais com Pseudo-Rótulos
        print(f"Fold {fold_idx + 1}: (Etapa 4) Preparando modelos finais...")
        cnn_classifier = VibrationAnalysisCNN(num_classes=best_k).to(self.device)

        simclr_lora_classifier = SimCLRModelLoRA(
            projection_dim=self.config['projection_dim_simclr'],
            num_classes=best_k,
            lora_rank=self.config['lora_rank'],
            lora_scale=self.config['lora_scale']
        ).to(self.device)

        if hasattr(encoder_trainer_model, 'encoder'): # Checar se o modelo de treino do encoder existe
            simclr_lora_classifier.encoder.load_state_dict(encoder_trainer_model.encoder.state_dict())
            print(f"Fold {fold_idx + 1}: Pesos do encoder pré-treinado carregados no SimCLRModelLoRA final.")

            if self.config['freeze_encoder_after_load']:
                for param in simclr_lora_classifier.encoder.parameters():
                    param.requires_grad = False
                print(f"Fold {fold_idx + 1}: Encoder do SimCLRModelLoRA final congelado.")

        self.agent.update_models({
            'VibrationCNN': cnn_classifier,
            'SimCLR_LoRA_Classifier': simclr_lora_classifier
        })

        sup_train_dataset = PLDS(train_subset, train_pseudo_labels)

        sup_val_dataset = None
        if len(val_pseudo_labels) > 0 and len(val_subset) == len(val_pseudo_labels):
            sup_val_dataset = PLDS(val_subset, val_pseudo_labels)
        else:
            print(f"Fold {fold_idx + 1}: Conjunto de validação ou pseudo-rótulos de validação incompatíveis/vazios. Validação será limitada.")


        # Loop de Treinamento Supervisionado
        for model_name, model_instance in self.agent.models.items():
            print(f"\nFold {fold_idx + 1}: Treinando modelo '{model_name}'...")
            model_instance.to(self.device)

            trainable_params = filter(lambda p: p.requires_grad, model_instance.parameters())
            optimizer = torch.optim.Adam(trainable_params, lr=self.config['learning_rate_classifier'])
            criterion = nn.CrossEntropyLoss()

            history = {'epoch': [], 'train_loss': [], 'val_loss': [], 'val_acc': []}

            train_loader_sup = DataLoader(sup_train_dataset, batch_size=self.config['batch_size'], shuffle=True,
                                          num_workers=self.config.get('num_workers', 0), pin_memory=True)

            val_loader_sup = None
            if sup_val_dataset and len(sup_val_dataset) > 0:
                 val_loader_sup = DataLoader(sup_val_dataset, batch_size=self.config['batch_size'], shuffle=False,
                                        num_workers=self.config.get('num_workers', 0), pin_memory=True)

            for epoch in range(self.config['epochs_classifier']):
                model_instance.train()
                epoch_train_loss = 0
                if len(train_loader_sup) == 0:
                    print(f"Epoch {epoch+1}/{self.config['epochs_classifier']} - {model_name} - Train loader vazio. Pulando treino.")
                    history['train_loss'].append(0)
                    history['epoch'].append(epoch + 1)
                    history['val_loss'].append(0)
                    history['val_acc'].append(0)
                    continue # Pula para a próxima época

                for batch_idx, (specs, labels) in enumerate(train_loader_sup): # CORRIGIDO AQUI
                    specs, labels = specs.to(self.device), labels.to(self.device)

                    optimizer.zero_grad()
                    if model_name == 'VibrationCNN':
                        outputs = model_instance(specs)
                    else:
                        outputs = model_instance.forward_classifier(specs)

                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    epoch_train_loss += loss.item()

                avg_epoch_train_loss = epoch_train_loss / len(train_loader_sup) if len(train_loader_sup) > 0 else 0
                history['train_loss'].append(avg_epoch_train_loss)
                history['epoch'].append(epoch + 1)

                epoch_val_loss = 0
                correct_val = 0
                total_val = 0
                all_preds_val, all_labels_val = [], []

                if val_loader_sup and len(val_loader_sup) > 0:
                    model_instance.eval()
                    with torch.no_grad():
                        for specs, labels in val_loader_sup: # CORRIGIDO AQUI
                            specs, labels = specs.to(self.device), labels.to(self.device)
                            if model_name == 'VibrationCNN':
                                outputs = model_instance(specs)
                            else:
                                outputs = model_instance.forward_classifier(specs)

                            loss_val = criterion(outputs, labels) # Renomear variável de loss
                            epoch_val_loss += loss_val.item()

                            _, predicted = torch.max(outputs.data, 1)
                            total_val += labels.size(0)
                            correct_val += (predicted == labels).sum().item()
                            all_preds_val.extend(predicted.cpu().numpy())
                            all_labels_val.extend(labels.cpu().numpy())

                    avg_epoch_val_loss = epoch_val_loss / len(val_loader_sup) if len(val_loader_sup) > 0 else 0
                    val_accuracy = correct_val / total_val if total_val > 0 else 0
                    history['val_loss'].append(avg_epoch_val_loss)
                    history['val_acc'].append(val_accuracy)
                    print(f"Epoch {epoch+1}/{self.config['epochs_classifier']} - {model_name} - Train Loss: {avg_epoch_train_loss:.4f}, Val Loss: {avg_epoch_val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
                else:
                    history['val_loss'].append(0)
                    history['val_acc'].append(0)
                    print(f"Epoch {epoch+1}/{self.config['epochs_classifier']} - {model_name} - Train Loss: {avg_epoch_train_loss:.4f} (Validação pulada ou loader vazio)")

            if val_loader_sup and total_val > 0 :
                metrics = {
                    'accuracy': accuracy_score(all_labels_val, all_preds_val),
                    'precision': precision_score(all_labels_val, all_preds_val, average='weighted', zero_division=0),
                    'recall': recall_score(all_labels_val, all_preds_val, average='weighted', zero_division=0),
                    'f1': f1_score(all_labels_val, all_preds_val, average='weighted', zero_division=0)
                }
                print(f"Fold {fold_idx + 1} - Modelo '{model_name}' - Métricas Finais (Val): "
                      f"Acc: {metrics['accuracy']:.3f}, Prec: {metrics['precision']:.3f}, "
                      f"Rec: {metrics['recall']:.3f}, F1: {metrics['f1']:.3f}")
                self.agent.update_performance(model_name, metrics['f1'])

                metrics_df = pd.DataFrame([metrics])
                metrics_df.to_csv(os.path.join(self.config['output_dir'], f'fold{fold_idx+1}_{model_name}_metrics.csv'), index=False)

                cm = confusion_matrix(all_labels_val, all_preds_val, labels=list(range(best_k))) # Adicionar labels para garantir o tamanho da matriz
                plt.figure(figsize=(max(6, best_k), max(5, best_k-1)))
                sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(best_k), yticklabels=range(best_k))
                plt.title(f'Matriz de Confusão - Fold {fold_idx+1} - {model_name}')
                plt.xlabel('Predito'); plt.ylabel('Real')
                plt.savefig(os.path.join(self.config['output_dir'], f'fold{fold_idx+1}_{model_name}_cm.png'))
                plt.close()
            else:
                print(f"Fold {fold_idx + 1} - Modelo '{model_name}' - Validação pulada ou sem dados válidos para métricas.")
                self.agent.update_performance(model_name, 0.0)

            history_df = pd.DataFrame(history)
            history_df.to_csv(os.path.join(self.config['output_dir'], f'fold{fold_idx+1}_{model_name}_history.csv'), index=False)

            plt.figure()
            plt.plot(history_df['epoch'], history_df['train_loss'], label='Train Loss')
            if val_loader_sup and len(val_loader_sup) > 0 : plt.plot(history_df['epoch'], history_df['val_loss'], label='Val Loss')
            plt.legend(); plt.title(f'Curva de Loss - Fold {fold_idx+1} - {model_name}')
            plt.xlabel('Epoch'); plt.ylabel('Loss')
            plt.savefig(os.path.join(self.config['output_dir'], f'fold{fold_idx+1}_{model_name}_loss_curve.png'))
            plt.close()

            if val_loader_sup and len(val_loader_sup) > 0:
                plt.figure()
                plt.plot(history_df['epoch'], history_df['val_acc'], label='Val Accuracy')
                plt.legend(); plt.title(f'Curva de Acurácia (Val) - Fold {fold_idx+1} - {model_name}')
                plt.xlabel('Epoch'); plt.ylabel('Accuracy')
                plt.savefig(os.path.join(self.config['output_dir'], f'fold{fold_idx+1}_{model_name}_acc_curve.png'))
                plt.close()

            torch.save(model_instance.state_dict(), os.path.join(self.config['output_dir'], f'fold{fold_idx+1}_{model_name}_model.pt'))
            print(f"Modelo '{model_name}' salvo para o Fold {fold_idx+1}.")


    def cross_validate(self, full_dataset: UnsupervisedAeroDataset):
        if len(full_dataset) < self.config.get('n_folds', 1) : # n_folds deve ser pelo menos 1
            print(f"Erro: Número de amostras ({len(full_dataset)}) é menor que o número mínimo de folds requerido.")
            print("Reduza n_folds ou forneça mais dados.")
            return

        kf = KFold(n_splits=self.config['n_folds'], shuffle=True, random_state=self.config['random_seed'])

        for fold_index, (train_indices, val_indices) in enumerate(kf.split(range(len(full_dataset)))): # Usar range(len()) para kf.split
            if len(train_indices) == 0 or len(val_indices) == 0:
                print(f"Fold {fold_index + 1} tem 0 amostras de treino ou validação. Pulando este fold.")
                continue
            train_subset = Subset(full_dataset, train_indices)
            val_subset = Subset(full_dataset, val_indices)
            self.train_and_eval_fold(train_subset, val_subset, fold_index)

        print("\n--- Validação Cruzada Concluída ---")


if __name__ == '__main__':
    config = {
        'csv_paths': [
          '/content/drive/My Drive/DATASET_AERO_SWEDEN/DATASET06.csv',
          '/content/drive/My Drive/DATASET_AERO_SWEDEN/DATASET07.csv'
        ],
        'window_size': 1024, 'overlap': 0.5, 'sampling_rate': 12800,
        'output_dir': '/content/drive/My Drive/aero_results/',
        'random_seed': 42,
        'num_workers': 0, # Para Colab, 0 ou 2. Comece com 0 se houver problemas com workers.

        'projection_dim_simclr': 128,
        'simclr_epochs_for_kmeans': 5,
        'learning_rate_simclr_kmeans': 1e-4,
        'max_k_means_clusters': 10,

        'learning_rate_classifier': 1e-4,
        'epochs_classifier': 30,
        'batch_size': 32,

        'lora_rank': 8, 'lora_scale': 0.01,
        'freeze_encoder_after_load': False,

        'n_folds': 5
    }

    np.random.seed(config['random_seed'])
    torch.manual_seed(config['random_seed'])
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(config['random_seed'])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    if not config['csv_paths']:
        print("Nenhum caminho CSV fornecido. Criando dummy CSVs para teste.")
        ensure_dir('dummy_data_final')
        for i in range(2):
            dummy_signal_length = config['window_size'] * 2
            data = np.random.rand(100, 2 + dummy_signal_length)
            pd.DataFrame(data).to_csv(f'dummy_data_final/dummy_dataset_{i+1}.csv', header=False, index=False)
            config['csv_paths'].append(f'dummy_data_final/dummy_dataset_{i+1}.csv')

    signal_preprocessor = SignalPreprocessor(
        window_size=config['window_size'],
        overlap=config['overlap'],
        sampling_rate=config['sampling_rate']
    )

    data_transform = transforms.Compose([
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])

    try:
        full_aero_dataset = UnsupervisedAeroDataset(
            csv_paths=config['csv_paths'],
            signal_preprocessor=signal_preprocessor,
            transform=data_transform
        )
    except ValueError as e:
        print(f"Erro ao inicializar o dataset: {e}")
        print("Encerrando o script.")
        exit()

    if len(full_aero_dataset) == 0:
        print("Dataset está vazio após o processamento. Verifique os dados de entrada e os logs.")
        print("Encerrando o script.")
        exit()

    print(f"Dataset carregado com {len(full_aero_dataset)} amostras.")

    model_agent = ModelAgent()
    vibration_analyzer = VibrationAnalyzer(config, model_agent, signal_preprocessor)

    vibration_analyzer.cross_validate(full_aero_dataset)

    print("\nAnálise concluída.")
    if model_agent.models:
        print("Scores F1 finais (do último fold processado com sucesso):")
        for name, score in model_agent.scores.items():
            print(f"  {name}: {score:.4f}")
        best_model_overall = model_agent.get_best_model_name()
        if best_model_overall:
            print(f"Melhor modelo no último fold (baseado em F1): {best_model_overall} "
                  f"(F1: {model_agent.scores[best_model_overall]:.4f})")

Carregando e pré-processando dados de: ['/content/drive/My Drive/DATASET_AERO_SWEDEN/DATASET06.csv', '/content/drive/My Drive/DATASET_AERO_SWEDEN/DATASET07.csv']
Total de 5929 amostras carregadas.
Dataset carregado com 5929 amostras.
Usando dispositivo: cuda

--- Fold 1 ---
Fold 1: (Etapa 1) Pré-treinando encoder para K-Means...
Iniciando pré-treinamento do encoder por 5 épocas...
Encoder Pretrain Epoch 1/5, Loss: 0.0798
Encoder Pretrain Epoch 2/5, Loss: 0.0040
Encoder Pretrain Epoch 3/5, Loss: 0.0029
Encoder Pretrain Epoch 4/5, Loss: 0.0028
Encoder Pretrain Epoch 5/5, Loss: 0.0025
Pré-treinamento do encoder concluído.
Fold 1: (Etapa 2) Extraindo embeddings...
Fold 1: (Etapa 3) Executando K-Means...
Fold 1: Número de classes estimado (best_k) = 2 (Silhouette: 0.232)
Fold 1: (Etapa 4) Preparando modelos finais...
Fold 1: Pesos do encoder pré-treinado carregados no SimCLRModelLoRA final.

Fold 1: Treinando modelo 'VibrationCNN'...
Epoch 1/30 - VibrationCNN - Train Loss: 0.6913, Val Loss: