# Main

In [None]:
import soundfile as sf
import numpy as np
from tg_utils import ExtractFeatures
import pandas as pd

In [None]:
## Defining Constants
BUFFER_LEN = 2048       # Tamanho do buffer utilizado para o calculo da FFT
INPUT_AUDIO_CHANNEL = 0 # Seleciona o canal dos arquivos de audio do microfone (0, 1)
FAULT_ID_OFF = 1
FAULT_ID_HEALTH = 2
FAULT_ID_BEARING = 3
FAULT_ID_FAN = 4
FAULT_ID_GEAR = 5


In [None]:
## Read Audio Sample
Off_Cond_Data, Off_Cond_FS          = sf.read('../data/audio_files/original/M1_OFF_S1.flac') # Off Condition with noise ((Fault_ID1))
Health_Cond_Data, Health_Cond_FS    = sf.read('../data/audio_files/original/M1_H_S1.flac')   # Healthy condition (Fault_ID2)
Bearing_Cond_Data, Bearing_Cond_FS  = sf.read('../data/audio_files/original/M1_F1_S1.flac')  # Bearing fault (Fault_ID3)
Fan_Cond_Data, Fan_Cond_FS          = sf.read('../data/audio_files/original/M1_F2_S1.flac')  # Fan fault (Fault_ID4)
Gear_Cond_Data, Gear_Cond_FS        = sf.read('../data/audio_files/original/M1_F3_S1.flac')  # Gear fault (Fault_ID5)

In [None]:
## Extracts one channel of the input audio signal and ignore the first 20*BUFFER_LEN samples (no data)
Off_Cond_Data      = Off_Cond_Data[(20*BUFFER_LEN)+1:, INPUT_AUDIO_CHANNEL]
Health_Cond_Data   = Health_Cond_Data[(20*BUFFER_LEN)+1:, INPUT_AUDIO_CHANNEL]
Bearing_Cond_Data  = Bearing_Cond_Data[(20*BUFFER_LEN)+1:, INPUT_AUDIO_CHANNEL]
Fan_Cond_Data      = Fan_Cond_Data[(20*BUFFER_LEN)+1:, INPUT_AUDIO_CHANNEL]
Gear_Cond_Data     = Gear_Cond_Data[(20*BUFFER_LEN)+1:, INPUT_AUDIO_CHANNEL]

In [None]:
# Read the number of sample in each audio data and store data in variable
SampleSize_Off_Cond_Data      = Off_Cond_Data.size
SampleSize_Health_Cond_Data   = Health_Cond_Data.size
SampleSize_Bearing_Cond_Data  = Bearing_Cond_Data.size
SampleSize_Fan_Cond_Data      = Fan_Cond_Data.size
SampleSize_Gear_Cond_Data     = Gear_Cond_Data.size

In [None]:
print(f'SampleSize_Off_Cond_Data: {SampleSize_Off_Cond_Data}')
print(f'SampleSize_Health_Cond_Data: {SampleSize_Health_Cond_Data}')
print(f'SampleSize_Bearing_Cond_Data: {SampleSize_Bearing_Cond_Data}')
print(f'SampleSize_Fan_Cond_Data: {SampleSize_Fan_Cond_Data}')
print(f'SampleSize_Gear_Cond_Data: {SampleSize_Gear_Cond_Data}')

In [None]:
# Formatando dataset para ter quantidade de amostras em termos do BufferLen

## Off Condition with noise (Fault_ID1)
re_off = (SampleSize_Off_Cond_Data % BUFFER_LEN) # Quantidade de amostras em termos do BufferLen
Off_Cond_Data = Off_Cond_Data[:(SampleSize_Off_Cond_Data - re_off)]
NumOfRaw_Off_Cond_Data = (SampleSize_Off_Cond_Data - re_off) / BUFFER_LEN

## Health condition (Fault_ID2)
re_health = (SampleSize_Health_Cond_Data % BUFFER_LEN)
Health_Cond_Data = Health_Cond_Data[:(SampleSize_Health_Cond_Data - re_health)]
NumOfRaw_Health_Cond_Data = (SampleSize_Health_Cond_Data - re_health) / BUFFER_LEN

## Bearing fault (Fault_ID3)
re_bearing = (SampleSize_Bearing_Cond_Data % BUFFER_LEN)
Bearing_Cond_Data = Bearing_Cond_Data[:(SampleSize_Bearing_Cond_Data - re_bearing)]
NumOfRaw_Bearing_Cond_Data = (SampleSize_Bearing_Cond_Data - re_bearing) / BUFFER_LEN

## Fan fault (Fault_ID4)
re_fan = (SampleSize_Fan_Cond_Data % BUFFER_LEN)
Fan_Cond_Data = Fan_Cond_Data[:(SampleSize_Fan_Cond_Data - re_fan)]
NumOfRaw_Fan_Cond_Data = (SampleSize_Fan_Cond_Data - re_fan) / BUFFER_LEN

## Gear fault (Fault_ID5)
re_gear = (SampleSize_Gear_Cond_Data % BUFFER_LEN)
Gear_Cond_Data = Gear_Cond_Data[:(SampleSize_Gear_Cond_Data - re_gear)]
NumOfRaw_Gear_Cond_Data = (SampleSize_Gear_Cond_Data - re_gear) / BUFFER_LEN

In [None]:
# DEBUG: Print shape BEFORE reshape
print(f'Off_Cond_Data shape:        {Off_Cond_Data.shape}')
print(f'Health_Cond_Data shape:     {Health_Cond_Data.shape}')
print(f'Bearing_Cond_Data shape:    {Bearing_Cond_Data.shape}')
print(f'Fan_Cond_Data shape:        {Fan_Cond_Data.shape}')
print(f'Gear_Cond_Data shape:       {Gear_Cond_Data.shape}')

## Pre-processamento

In [None]:
## Pre-process data - reshape 

Off_Cond_Data     = Off_Cond_Data.reshape([BUFFER_LEN, int(NumOfRaw_Off_Cond_Data)], order='F')
Health_Cond_Data  = Health_Cond_Data.reshape([BUFFER_LEN, int(NumOfRaw_Health_Cond_Data)], order='F')
Bearing_Cond_Data = Bearing_Cond_Data.reshape([BUFFER_LEN, int(NumOfRaw_Bearing_Cond_Data)], order='F')
Fan_Cond_Data     = Fan_Cond_Data.reshape([BUFFER_LEN, int(NumOfRaw_Fan_Cond_Data)], order='F')
Gear_Cond_Data    = Gear_Cond_Data.reshape([BUFFER_LEN, int(NumOfRaw_Gear_Cond_Data)], order='F')

In [None]:
# DEBUG: Print shape AFTER reshape
print(f'Off_Cond_Data shape:        {Off_Cond_Data.shape}')
print(f'Health_Cond_Data shape:     {Health_Cond_Data.shape}')
print(f'Bearing_Cond_Data shape:    {Bearing_Cond_Data.shape}')
print(f'Fan_Cond_Data shape:        {Fan_Cond_Data.shape}')
print(f'Gear_Cond_Data shape:       {Gear_Cond_Data.shape}')

In [None]:
total_buff = NumOfRaw_Off_Cond_Data \
            + NumOfRaw_Health_Cond_Data \
            + NumOfRaw_Bearing_Cond_Data \
            + NumOfRaw_Fan_Cond_Data \
            + NumOfRaw_Gear_Cond_Data

columns=['RMS','Mean','Median','Variance','Skewness','Kurtosis', 'CrestFactor','ShapeFactor','ImpulseFactor','MarginFactor', 'Peak1','Peak2','Peak3','PeakLocs1','PeakLocs2','PeakLocs3','FaultID']

In [None]:
# Extract features from Off Condition with noise (Fault_ID1)
off_data_list = []
for i in range(int(NumOfRaw_Off_Cond_Data)):
    DataBuff = Off_Cond_Data[:BUFFER_LEN,i] # gets 2048 samples
    features = ExtractFeatures(DataBuff=DataBuff, BufferLen=BUFFER_LEN, Fs=Off_Cond_FS)
    off_data_list.append(dict(zip(columns, [*features, FAULT_ID_OFF])))

In [None]:
# Extract features Health condition (Fault_ID2)
health_data_list = []
for i in range(int(NumOfRaw_Health_Cond_Data)):
    DataBuff = Health_Cond_Data[:BUFFER_LEN,i]
    features = ExtractFeatures(DataBuff=DataBuff, BufferLen=BUFFER_LEN, Fs=Health_Cond_FS)
    health_data_list.append(dict(zip(columns, [*features, FAULT_ID_HEALTH])))

In [None]:
# Extract features Bearing fault (Fault_ID3)
bearing_data_list = []
for i in range(int(NumOfRaw_Bearing_Cond_Data)):
    DataBuff = Bearing_Cond_Data[:BUFFER_LEN,i]
    features = ExtractFeatures(DataBuff=DataBuff, BufferLen=BUFFER_LEN, Fs=Bearing_Cond_FS)
    bearing_data_list.append(dict(zip(columns, [*features, FAULT_ID_BEARING])))

In [None]:
# Extract features Fan fault (Fault_ID4)
fan_data_list = []
for i in range(int(NumOfRaw_Fan_Cond_Data)):
    DataBuff = Fan_Cond_Data[:BUFFER_LEN,i]
    features = ExtractFeatures(DataBuff=DataBuff, BufferLen=BUFFER_LEN, Fs=Fan_Cond_FS)
    fan_data_list.append(dict(zip(columns, [*features, FAULT_ID_FAN])))

In [None]:
# Extract features Gear fault (Fault_ID5)
gear_data_list = []
for i in range(int(NumOfRaw_Gear_Cond_Data)):
    DataBuff = Gear_Cond_Data[:BUFFER_LEN,i]
    features = ExtractFeatures(DataBuff=DataBuff, BufferLen=BUFFER_LEN, Fs=Gear_Cond_FS)
    gear_data_list.append(dict(zip(columns, [*features, FAULT_ID_GEAR])))

In [None]:
# DEBUG - Verificando se o tamanho dos dados processados sao equivalentes a quantidade de dados brutos
print(f'off list size: {len(off_data_list)}, shape: {Off_Cond_Data.shape}')
print(f'health list size: {len(health_data_list)}, shape: {Health_Cond_Data.shape}')
print(f'bearing list size: {len(bearing_data_list)}, shape: {Bearing_Cond_Data.shape}')
print(f'fan list size: {len(fan_data_list)}, shape: {Fan_Cond_Data.shape}')
print(f'gear list size: {len(gear_data_list)}, shape: {Gear_Cond_Data.shape}')

In [None]:
# Concatenate all processed data list to one list and creates de DataFrame to store the data
processed_data_list = []
processed_data_list = off_data_list + health_data_list + bearing_data_list + fan_data_list + gear_data_list

# creates the dataframe
fault_data_df = pd.DataFrame(processed_data_list, columns=columns)

In [None]:
fault_data_df.to_csv('../data/extracted_features/features_like_artigo.csv', index=False)