In [1]:
from vibdata.deep.DeepDataset import DeepDataset, convertDataset
import vibdata.raw as raw_datasets
from vibdata.deep.signal.transforms import FilterByValue, Sequential

dataset_name = "CWRU"

# Get raw root_dir
raw_root_dir = "../data/raw_data/cwru"
raw_dataset = getattr(raw_datasets, dataset_name + "_raw")(raw_root_dir, download=True)

  from .autonotebook import tqdm as notebook_tqdm
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


In [2]:
import os

import numpy as np
import numpy.typing as npt
from tqdm import tqdm
from vibdata.raw.base import RawVibrationDataset
from vibdata.deep.signal.core import SignalSample


class GroupDataset:
    def __init__(
        self, dataset: RawVibrationDataset, custom_name: str = None, shuffle: bool = False
    ) -> None:
        self.dataset = dataset
        #self.config
        self.shuffle_before_iter = shuffle
        self.groups_dir = "../data/test_group"
        file_name = "groups_" + (custom_name if custom_name else "cwru")
        self.groups_file = os.path.join(self.groups_dir, file_name + ".npy")

    def groups(self) -> npt.NDArray[np.int_]:
        """
        Get the groups from all samples of the dataset. It tries to load from memory at `groups_dir` but if it
        doesnt exists it will compute the groups and save it in `groups_file`.

        Returns:
            npt.NDArray[np.int_]: groups of all dataset
        """
        if os.path.exists(self.groups_file):
            print(f"Loading group dataset from: {self.groups_file}")
            return np.load(self.groups_file,allow_pickle=True)
        else:
            groups = self._random_grouping() if self.shuffle_before_iter else self._sequential_grouping()
            os.makedirs(self.groups_dir, exist_ok=True)  # Ensure that the directory exists
            np.save(self.groups_file, groups)
            return groups

    def _sequential_grouping(self) -> npt.NDArray[np.int_]:
        """Generate the groups iterating sequentially over the dataset

        Returns:
            npt.NDArray[np.int_]: groups of each sample in dataset
        """
        mapped_samples = map(
            self._assigne_group,
            tqdm(self.dataset, total=len(self.dataset), unit="sample", desc="Grouping dataset: "),
        )
        groups = np.array(list(mapped_samples))
        return groups

    def _random_grouping(self) -> npt.NDArray[np.int_]:
        """Generate the groups randomly iterating over the dataset, is equivalent to make a shuffle
        in the dataset. Despite the shuffle, the groups are ordered back to the original order.

        This kind of grouping is needed for datasets where grouping are not predefined

        Returns:
            npt.NDArray[np.int_]: groups of each sample in dataset, in the original order
        """
        # Create the indexes shuffled
        rng = np.random.default_rng(self.config["seed"])  # Ensure thats the seed is correct
        indexs_shuffled = np.arange(len(self.dataset))
        rng.shuffle(indexs_shuffled)
        # Map the dataset ramdomly
        mapped_samples = list(
            map(
                lambda i: self._assigne_group(self.dataset[i]),
                tqdm(indexs_shuffled, total=len(self.dataset), unit="sample", desc="Grouping dataset: "),
            )
        )
        # Sort the output back to the dataset original order
        groups = np.array([value for _, value in sorted(zip(indexs_shuffled, mapped_samples))])
        return groups

    @staticmethod
    def _assigne_group(sample: SignalSample) -> int:
        """
        Get a signal sample and based on the dataset criterion, assigne a group
        to the given sample

        Args:
            sample (SignalSample): sample to be assigned

        Returns:
            int: group id
        """
        pass


class GroupCWRULoad(GroupDataset):
    @staticmethod
    def _assigne_group(sample: SignalSample) -> int:
        return sample["metainfo"]["load"]
    
class GroupCWRUSeverity(GroupDataset):
    @staticmethod
    def _assigne_group(sample: SignalSample) -> int:
        severity = sample["metainfo"]["fault_size"]
        
        match severity:
            case 0.0:
                return sample["metainfo"]["load"]
            case 0.007:
                return 0
            case 0.014:
                return 1
            case 0.021:
                return 2
            case 0.028:
                return 3
        
        return None

In [3]:
load_folds = GroupCWRULoad(raw_dataset,custom_name="load_cwru")

In [4]:
folds = load_folds.groups()
folds

Loading group dataset from: ../data/test_group/groups_load_cwru.npy


array([[0.],
       [0.],
       [1.],
       [1.],
       [2.],
       [2.],
       [3.],
       [3.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [2.],
       [2.],
       [2.],
       [3.],
       [3.],
       [3.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [2.],
       [2.],
       [2.],
       [3.],
       [3.],
       [3.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [2.],
       [2.],
       [2.],
       [3.],
       [3.],
       [3.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [2.],
       [2.],
       [2.],
       [3.],
       [3.],
       [3.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [2.],
       [2.],
       [2.],
       [3.],
       [3.],
       [3.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [2.],
       [2.],
       [2.],

In [5]:
import sys
sys.path.append('../')  

In [None]:
from sklearn.ensemble import RandomForestClassifier
from src.experiments.features_1d import Features1DExperiment
import numpy as np

def rms(signal):
    return np.sqrt(np.mean(signal**2))

def peak_to_peak(signal):
    return np.max(signal) - np.min(signal)

# 1. Criar e executar experimento
experiment = Features1DExperiment(
    name="Vibration_Analysis_RF",
    description="Análise de vibração com RandomForest e features no domínio do tempo",
    feature_functions=[rms, peak_to_peak, np.mean, np.std],
    dataset=raw_dataset,
    data_fold_idxs= folds,
    n_inner_folds=3,
    feature_selector=None,
    model=RandomForestClassifier(n_estimators=200, random_state=42)
)

results = experiment.run()

Carregando dados processados de processed_data/CWRU_Vibration_Analysis_RF_features.csv

=== Fold Externo 1/4 ===
FOLD RESULT> FoldResults(fold_index=0, y_true=array([0, 0, 1, 1, 1, 3, 3, 3, 2, 2, 2, 1, 1, 1, 3, 3, 3, 2, 2, 2, 3, 3,
       3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 3, 3, 3,
       3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 2, 2, 3, 3, 2, 2,
       1, 1, 3, 3, 2, 2]), y_pred=array([0, 3, 1, 1, 1, 3, 3, 3, 3, 2, 1, 2, 3, 1, 3, 3, 3, 1, 2, 3, 2, 1,
       3, 1, 1, 1, 1, 1, 2, 2, 3, 2, 2, 2, 2, 2, 2, 3, 1, 1, 1, 3, 3, 1,
       2, 3, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 3, 2, 2, 1, 2, 2, 1, 1,
       2, 2, 2, 1, 2, 2]), y_proba=array([[0.74 , 0.07 , 0.075, 0.115],
       [0.305, 0.065, 0.04 , 0.59 ],
       [0.   , 0.83 , 0.075, 0.095],
       [0.   , 0.6  , 0.39 , 0.01 ],
       [0.005, 0.48 , 0.28 , 0.235],
       [0.005, 0.07 , 0.305, 0.62 ],
       [0.   , 0.185, 0.38 , 0.435],
       [0.   , 0.045, 0.255, 0.7  ],
       [0.   , 0.365, 0.145, 0.49 ],
 

In [7]:
from src.experiments.features_1d import ExperimentResults
import pandas as pd

loaded_results = ExperimentResults.load_json("vibration_analysis_results_Vibration_Analysis_RF_features_['feature_rms', 'feature_peak_to_peak', 'feature_mean', 'feature_std'].json")

# 4. Acessar dados específicos
print("\nMatriz de Confusão Global:")
print(np.array(loaded_results.overall_metrics['confusion_matrix']))

print("\nPredições do Primeiro Fold:")
print(f"Valores Reais: {loaded_results.folds[0].y_true}")
print(f"Predições: {loaded_results.folds[0].y_pred}")

# 5. Exportar para DataFrame
def results_to_dataframe(results: ExperimentResults) -> pd.DataFrame:
    """Converte todos os resultados para um DataFrame."""
    rows = []
    for fold in results.folds:
        for true, pred, proba in zip(fold.y_true, fold.y_pred, 
                                   fold.y_proba if fold.y_proba is not None else [None]*len(fold.y_true)):
            row = {
                'fold': fold.fold_index,
                'y_true': true,
                'y_pred': pred,
                'y_proba': proba
            }
            rows.append(row)
    return pd.DataFrame(rows)

df_results = results_to_dataframe(loaded_results)
print("\nDataFrame com todos os resultados:")
print(df_results.head())


Matriz de Confusão Global:
[[ 6  0  0  2]
 [ 0 70 19  5]
 [ 0 21 54 21]
 [ 2 10 21 63]]

Predições do Primeiro Fold:
Valores Reais: [0 0 1 1 1 3 3 3 2 2 2 1 1 1 3 3 3 2 2 2 3 3 3 1 1 1 1 1 1 3 3 3 1 1 1 2 2
 2 1 1 1 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 1 1 3 3 2 2 3 3 2 2 1 1 3 3 2 2]
Predições: [0 3 1 1 1 3 3 3 3 2 1 2 3 1 3 3 3 1 2 3 2 1 3 1 1 1 1 1 2 2 3 2 2 2 2 2 2
 3 1 1 1 3 3 1 2 3 2 2 2 2 1 2 2 1 2 2 2 1 3 2 2 1 2 2 1 1 2 2 2 1 2 2]

DataFrame com todos os resultados:
   fold  y_true  y_pred                     y_proba
0     0       0       0  [0.74, 0.07, 0.075, 0.115]
1     0       0       3  [0.305, 0.065, 0.04, 0.59]
2     0       1       1   [0.0, 0.83, 0.075, 0.095]
3     0       1       1      [0.0, 0.6, 0.39, 0.01]
4     0       1       1  [0.005, 0.48, 0.28, 0.235]
