In [1]:
from vibdata.deep.DeepDataset import convertDataset
import vibdata.raw as raw_datasets
from vibdata.deep.signal.transforms import Sequential,SplitSampleRate,FeatureExtractor,FilterByValue

dataset_name = "CWRU"

# Get raw root_dir
raw_root_dir = "../data/raw_data/cwru"
raw_dataset = getattr(raw_datasets, dataset_name + "_raw")(raw_root_dir, download=True)

  from .autonotebook import tqdm as notebook_tqdm
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


In [2]:
from vibdata.deep.signal.transforms import Kurtosis,RootMeanSquare,StandardDeviation,Mean,LogAttackTime,TemporalDecrease,TemporalCentroid,EffectiveDuration,ZeroCrossingRate
import numpy as np
from scipy.stats import skew
from vibdata.deep.signal.transforms import Transform

class PeakValue(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        return np.max(np.abs(data["signal"]))
    
class CrestFactor(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        signal = data["signal"]
        rms = np.sqrt(sum(np.square(signal)) / len(signal))
        if rms == 0:
            return 0
        return np.max(np.abs(signal)) / rms
    
class Skewness(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        signal = data["signal"]
        return skew(signal)

class ClearanceFactor(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        signal = data["signal"]
        peak_value = np.max(np.abs(signal))
        base = sum(np.sqrt(np.square(signal))) / len(signal)
        return peak_value / base
    
class ImpulseFactor(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        signal = data["signal"]
        peak_value = np.max(np.abs(signal))
        mean_value = np.mean(np.abs(signal))
        if mean_value == 0:
            return 0
        return peak_value / mean_value

class ShapeFactor(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        signal = data["signal"]
        rms = np.sqrt(sum(np.square(signal)) / len(signal))
        mean_value = np.mean(np.abs(signal))
        if mean_value == 0:
            return 0
        return rms / mean_value

class UpperBoundValueHistogram(Transform):
    def __init__(self, bins=10):
        super().__init__()
        self.bins = bins

    def transform(self, data):
        signal = data["signal"]
        max = np.max(signal)
        min = np.min(signal)
        return max + ( (0.5 * (max - min) ) / (len(signal) - 1) )
    
class LowerBoundValueHistogram(Transform):
    def __init__(self, bins=10):
        super().__init__()
        self.bins = bins

    def transform(self, data):
        signal = data["signal"]
        max = np.max(signal)
        min = np.min(signal)
        return min + ( (0.5 * (max - min) ) / (len(signal) - 1) )

class Variance(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        signal = data["signal"]
        return np.var(signal)
    
class PeakToPeak(Transform):
    def __init__(self):
        super().__init__()

    def transform(self, data):
        signal = data["signal"]
        return np.ptp(signal)  # Peak to peak value

features_funcs = [Kurtosis(),
            RootMeanSquare(),
            StandardDeviation(),
            Mean(),
            LogAttackTime(),
            TemporalDecrease(),
            TemporalCentroid(),
            EffectiveDuration(),
            ZeroCrossingRate(),
            PeakValue(),
            CrestFactor(),
            Skewness(),
            ClearanceFactor(),
            ImpulseFactor(), 
            ShapeFactor(),
            UpperBoundValueHistogram(),
            LowerBoundValueHistogram(),
            Variance(),
            PeakToPeak()
            ]

transforms = Sequential(
    [   
        #FilterByValue(on_field="sample_rate", values=12000,remove=False),
        SplitSampleRate(),
        FeatureExtractor(features=features_funcs),
    ]
)
print(transforms)


Sequential(transforms=[SplitSampleRate(),
                       FeatureExtractor(features=[Kurtosis(), RootMeanSquare(),
                                                  StandardDeviation(), Mean(),
                                                  LogAttackTime(),
                                                  TemporalDecrease(),
                                                  TemporalCentroid(),
                                                  EffectiveDuration(),
                                                  ZeroCrossingRate(),
                                                  PeakValue(), CrestFactor(),
                                                  Skewness(), ClearanceFactor(),
                                                  ImpulseFactor(),
                                                  ShapeFactor(),
                                                  UpperBoundValueHistogram(),
                                                  LowerBoundValueHistogram(),
    

In [3]:
deep_root_dir = "../data/deep_data/features_igor"
deep_dataset = convertDataset(raw_dataset, transforms=transforms, dir_path=deep_root_dir, batch_size=32)

In [4]:
import sys
sys.path.append('../')  

In [5]:
import os

import numpy as np
import numpy.typing as npt
from tqdm import tqdm
from vibdata.deep.DeepDataset import DeepDataset
from vibdata.deep.signal.core import SignalSample
from src.utils.group_dataset import GroupDataset
from src.utils.fold_idx_generator import FoldIdxGenerator

class GroupMultiRoundCWRULoad(GroupDataset):
    @staticmethod
    def _assigne_group(sample: SignalSample) -> int:
        sample_metainfo = sample["metainfo"]
        return sample_metainfo["label"].astype(str) + " " + sample_metainfo["load"].astype(int).astype(str)

class GroupCWRULoad(GroupDataset):
    @staticmethod
    def _assigne_group(sample: SignalSample) -> int:
        return sample["metainfo"]["load"]
    
class GroupCWRUSeverity(GroupDataset):
    @staticmethod
    def _assigne_group(sample: SignalSample) -> int:
        severity = sample["metainfo"]["fault_size"]
        
        match severity:
            case 0.0:
                return sample["metainfo"]["load"]
            case 0.007:
                return 0
            case 0.014:
                return 1
            case 0.021:
                return 2
            case 0.028:
                return 3
        
        return None

In [6]:
folds_singleround = FoldIdxGenerator(deep_dataset, GroupCWRULoad , dataset_name="CWRU").generate_folds()
folds_singleround

Loading group dataset from: ../data/grouping/groups_CustomGroupCWRU.npy


array([0., 0., 0., ..., 3., 3., 3.])

In [7]:
CLASS_DEF = {0: "N", 1: "O", 2: "I", 3: "R"}
CONDITION_DEF = {"0": "0", "1": "1", "2": "2", "3": "3"}
folds_multiround = FoldIdxGenerator(deep_dataset,
                                    GroupMultiRoundCWRULoad ,
                                    dataset_name="CWRU",
                                    multiround=True,
                                    class_def=CLASS_DEF,
                                    condition_def=CONDITION_DEF).generate_folds()
folds_multiround

Loading group dataset from: ../data/grouping/groups_CustomGroupCWRU_multiround.npy
Per round splits:  4
Number of repeats:  8
Total combinations of folds: 256
Total combinations between folds 174792640
Time to generate combinations: 10.46 seconds


  0%|          | 126975/172869516 [00:02<53:04, 54249.37it/s]


Total combs:  8
round:  0
fold:  0 -> N 0, O 0, I 0, R 0,  => 0
fold:  1 -> N 1, O 1, I 1, R 1,  => 1
fold:  2 -> N 2, O 2, I 2, R 2,  => 2
fold:  3 -> N 3, O 3, I 3, R 3,  => 3

round:  1
fold:  0 -> N 0, O 3, I 2, R 2,  => 4
fold:  1 -> N 1, O 2, I 1, R 3,  => 5
fold:  2 -> N 2, O 1, I 0, R 1,  => 6
fold:  3 -> N 3, O 0, I 3, R 0,  => 7

round:  2
fold:  0 -> N 0, O 2, I 2, R 3,  => 8
fold:  1 -> N 1, O 1, I 3, R 2,  => 9
fold:  2 -> N 2, O 3, I 0, R 0,  => 10
fold:  3 -> N 3, O 0, I 1, R 1,  => 11

round:  3
fold:  0 -> N 0, O 2, I 3, R 2,  => 12
fold:  1 -> N 1, O 3, I 0, R 1,  => 13
fold:  2 -> N 2, O 0, I 1, R 0,  => 14
fold:  3 -> N 3, O 1, I 2, R 3,  => 15

round:  4
fold:  0 -> N 0, O 1, I 3, R 3,  => 16
fold:  1 -> N 1, O 0, I 0, R 2,  => 17
fold:  2 -> N 2, O 2, I 2, R 0,  => 18
fold:  3 -> N 3, O 3, I 1, R 1,  => 19

round:  5
fold:  0 -> N 0, O 3, I 1, R 1,  => 20
fold:  1 -> N 1, O 0, I 2, R 3,  => 21
fold:  2 -> N 2, O 2, I 0, R 0,  => 22
fold:  3 -> N 3, O 1, I 3, R 2, 

[array([0, 0, 0, ..., 3, 3, 3]),
 array([0, 0, 0, ..., 3, 3, 3]),
 array([0, 0, 0, ..., 1, 1, 1]),
 array([0, 0, 0, ..., 0, 0, 0]),
 array([0, 0, 0, ..., 0, 0, 0]),
 array([0, 0, 0, ..., 3, 3, 3]),
 array([0, 0, 0, ..., 1, 1, 1]),
 array([0, 0, 0, ..., 2, 2, 2])]

In [8]:
from sklearn.ensemble import RandomForestClassifier
from src.experiments.features_1d import Features1DExperiment
import numpy as np
import scipy
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.neighbors import KNeighborsClassifier


knn = KNeighborsClassifier(n_neighbors=9)
sfs = SequentialFeatureSelector(knn, n_features_to_select=5)
kbest =  SelectKBest(f_classif, k=9)

# 1. Criar e executar experimento
experiment = Features1DExperiment(
    name="Vibration_Analysis_RF",
    description="Análise de vibração com RandomForest e features no domínio do tempo",
    feature_names= features_funcs,
    dataset=deep_dataset,
    data_fold_idxs=folds_multiround,
    n_inner_folds=4,
    #feature_selector=kbest,
    model=RandomForestClassifier(random_state=42),
    model_parameters_search_space = {
        "model__n_estimators": [50,100,200],
        "model__criterion": ["gini", "entropy","log_loss"],
        "model__max_depth": [10, 25, 50],
        "model__min_samples_split": [2, 5, 10]
    }
)

results = experiment.run()


### Rodada 1/8 ###

=== Fold Externo 1/4 ===
Fitting 4 folds for each of 81 candidates, totalling 324 fits
  Teste - Acurácia: 0.7114, F1: 0.7111

=== Fold Externo 2/4 ===
Fitting 4 folds for each of 81 candidates, totalling 324 fits
  Teste - Acurácia: 0.8287, F1: 0.8272

=== Fold Externo 3/4 ===
Fitting 4 folds for each of 81 candidates, totalling 324 fits
  Teste - Acurácia: 0.8525, F1: 0.8486

=== Fold Externo 4/4 ===
Fitting 4 folds for each of 81 candidates, totalling 324 fits
  Teste - Acurácia: 0.7925, F1: 0.7923

=== Resultados Finais ===
Acurácia Média: 0.7963 ± 0.0535
F1-Score Médio: 0.7948 ± 0.0524
Changed working directory to: results_1d_features/vibration_analysis_results_Vibration_Analysis_RF_20250909_140053

### Rodada 2/8 ###

=== Fold Externo 1/4 ===
Fitting 4 folds for each of 81 candidates, totalling 324 fits
  Teste - Acurácia: 0.7711, F1: 0.7725

=== Fold Externo 2/4 ===
Fitting 4 folds for each of 81 candidates, totalling 324 fits
  Teste - Acurácia: 0.8762, F1:

In [12]:
from src.experiments.features_1d import ExperimentResults
import pandas as pd

# read the json from multiround experiment

results_list = []
for i in range(1,9):
    loaded_results = ExperimentResults.load_json(f"/home/vbbonella/mestrado/tese-mestrado/experiments_notebooks/results_1d_features/vibration_analysis_results_Vibration_Analysis_RF_20250909_140053/round{i}.json")

    # 4. Acessar dados específicos
    print("\nMatriz de Confusão Global:")
    print(loaded_results.overall_metrics['mean_f1'])
    results_list.append(loaded_results.overall_metrics['mean_f1'])

print(np.mean(results_list))
'''
print("\nPredições do Primeiro Fold:")
print(f"Valores Reais: {loaded_results.folds[0].y_true}")
print(f"Predições: {loaded_results.folds[0].y_pred}")

# 5. Exportar para DataFrame
def results_to_dataframe(results: ExperimentResults) -> pd.DataFrame:
    """Converte todos os resultados para um DataFrame."""
    rows = []
    for fold in results.folds:
        for true, pred, proba in zip(fold.y_true, fold.y_pred, 
                                   fold.y_proba if fold.y_proba is not None else [None]*len(fold.y_true)):
            row = {
                'fold': fold.fold_index,
                'y_true': true,
                'y_pred': pred,
                'y_proba': proba
            }
            rows.append(row)
    return pd.DataFrame(rows)

df_results = results_to_dataframe(loaded_results)
print("\nDataFrame com todos os resultados:")
print(df_results.head())
'''


Matriz de Confusão Global:
0.7948052236416578

Matriz de Confusão Global:
0.7847714220293017

Matriz de Confusão Global:
0.8020205731788945

Matriz de Confusão Global:
0.8075867614390981

Matriz de Confusão Global:
0.7945059588074382

Matriz de Confusão Global:
0.800105853580073

Matriz de Confusão Global:
0.789527374501976

Matriz de Confusão Global:
0.7924218667312547
0.7957181292387118


'\nprint("\nPredições do Primeiro Fold:")\nprint(f"Valores Reais: {loaded_results.folds[0].y_true}")\nprint(f"Predições: {loaded_results.folds[0].y_pred}")\n\n# 5. Exportar para DataFrame\ndef results_to_dataframe(results: ExperimentResults) -> pd.DataFrame:\n    """Converte todos os resultados para um DataFrame."""\n    rows = []\n    for fold in results.folds:\n        for true, pred, proba in zip(fold.y_true, fold.y_pred, \n                                   fold.y_proba if fold.y_proba is not None else [None]*len(fold.y_true)):\n            row = {\n                \'fold\': fold.fold_index,\n                \'y_true\': true,\n                \'y_pred\': pred,\n                \'y_proba\': proba\n            }\n            rows.append(row)\n    return pd.DataFrame(rows)\n\ndf_results = results_to_dataframe(loaded_results)\nprint("\nDataFrame com todos os resultados:")\nprint(df_results.head())\n'