In [2]:
def PrimaryFeatureExtractor(X):
    from scipy.stats import kurtosis,skew
    import numpy as np
    Y=[]
    rms=np.sqrt(np.mean(np.square(X)))
    #Crest Factor: Crest factor is a parameter of a waveform,
    #such as alternating current or sound,
    #showing the ratio of peak values to the effective value.
    CrestFactor=abs(X).max()/rms
    #shape factor is defined as the ratio of the signal's RMS value to its absolute mean
    ShapeFactor=rms/(abs(X).mean())
    Y.append([abs(X).mean(),X.min(),X.max(),X.std(),rms,skew(X),kurtosis(X),CrestFactor,ShapeFactor])

    return Y

In [5]:
def FFT_BasedFeatures(X):
    import matplotlib.pyplot as plt
    import numpy as np
    from scipy.fftpack import fft
    NBins=100
    N=len(X)
    T=1/N
    FreqList=np.linspace(0.0, 1.0/(2.0*T), N//2)
    X_fft=fft(X)
    X_fft[0]=0
    X_fft_magnitude=2.0/N*np.abs(X_fft[0:N//2])
    
    BinCounts=(N//2)//NBins
    
    SpecEnergy=[]
    #generating data columns label
    labels=[]
    for i in range(0,NBins):
        SpecEnergy.append(X_fft_magnitude[i*BinCounts:(i+1)*BinCounts].sum())
        labels.append('FFT'+str(i+1))
    
    
    return SpecEnergy,labels

In [17]:
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

MaxExpNo=225
counter=-1
labels=['Bearing','Flywheel','Healthy','LIV','LOV','NRV','Piston','Riderbelt']

data_columns_PrimaryStatFeatures=['Mean','Min','Max','StdDv','RMS','Skewness','Kurtosis','CrestFactor','ShapeFactor']
data_columns_Target=['Fault']
Faults={labels[0]:int(0),labels[1]:int(1),labels[2]:int(2),labels[3]:int(3),labels[4]:int(4),labels[5]:int(5),labels[6]:int(6),labels[7]:int(7)}
import os
import glob
data = pd.DataFrame()  # ✅ Define empty DataFrame to hold all data
base_path = os.path.expanduser('~/Downloads/AirCompressor_Data')
for label in labels:
    path = os.path.join(base_path, label, 'preprocess_Reading*.dat')
    files = sorted(glob.glob(path))
    if not files:
        print(f"No files found for label: {label} in path: {path}")

    for file in files:
        try:
            counter += 1
            X = np.loadtxt(file, delimiter=',')
            if counter % 10 == 0:
                print(f'Loading files: {round(counter / (len(labels) * MaxExpNo) * 100, 2)}% completed')

            StatFeatures = PrimaryFeatureExtractor(X)
            FFT_Features, data_columns_FFT_Features = FFT_BasedFeatures(X)
            data_columns = data_columns_PrimaryStatFeatures + data_columns_FFT_Features + data_columns_Target

            if counter == 0:
                data = pd.DataFrame(columns=data_columns)

            StatFeatures[0].extend(FFT_Features)
            StatFeatures[0].extend([Faults[label]])

            df_temp = pd.DataFrame(StatFeatures, index=[counter], columns=data_columns)
            data = pd.concat([data, df_temp], ignore_index=True)

        except Exception as e:
            print(f"Error processing file {file}: {e}")

print("Columns in data:", data.columns.tolist())

input_data=data.drop(columns=['Fault'])

Loading files: 0.0% completed


  data = pd.concat([data, df_temp], ignore_index=True)


Loading files: 0.56% completed
Loading files: 1.11% completed
Loading files: 1.67% completed
Loading files: 2.22% completed
Loading files: 2.78% completed
Loading files: 3.33% completed
Loading files: 3.89% completed
Loading files: 4.44% completed
Loading files: 5.0% completed
Loading files: 5.56% completed
Loading files: 6.11% completed
Loading files: 6.67% completed
Loading files: 7.22% completed
Loading files: 7.78% completed
Loading files: 8.33% completed
Loading files: 8.89% completed
Loading files: 9.44% completed
Loading files: 10.0% completed
Loading files: 10.56% completed
Loading files: 11.11% completed
Loading files: 11.67% completed
Loading files: 12.22% completed
Loading files: 12.78% completed
Loading files: 13.33% completed
Loading files: 13.89% completed
Loading files: 14.44% completed
Loading files: 15.0% completed
Loading files: 15.56% completed
Loading files: 16.11% completed
Loading files: 16.67% completed
Loading files: 17.22% completed
Loading files: 17.78% comple

In [18]:
input_data

Unnamed: 0,Mean,Min,Max,StdDv,RMS,Skewness,Kurtosis,CrestFactor,ShapeFactor,FFT1,...,FFT91,FFT92,FFT93,FFT94,FFT95,FFT96,FFT97,FFT98,FFT99,FFT100
0,0.140067,-1.5920,1.3448,0.186773,0.192985,-0.186858,4.518489,8.249357,1.377799,0.032111,...,0.000141,0.000132,0.000123,0.000114,0.000106,0.000099,0.000093,0.000087,0.000084,0.000082
1,0.187374,-1.4158,1.3472,0.202243,0.239738,-0.297777,3.182940,5.905620,1.279463,0.035926,...,0.001102,0.001102,0.001103,0.001103,0.001103,0.001103,0.001104,0.001104,0.001103,0.001104
2,0.201925,-1.3502,1.1123,0.230012,0.259409,-0.205917,1.754806,5.204907,1.284679,0.038505,...,0.001468,0.001472,0.001475,0.001477,0.001480,0.001482,0.001483,0.001485,0.001485,0.001486
3,0.177199,-1.3254,1.1532,0.223281,0.235517,-0.205812,2.146448,5.627623,1.329111,0.063200,...,0.000268,0.000253,0.000239,0.000224,0.000211,0.000199,0.000189,0.000179,0.000174,0.000171
4,0.182402,-1.2071,1.2910,0.237785,0.239437,-0.075352,1.286465,5.391815,1.312691,0.155544,...,0.003041,0.003039,0.003037,0.003035,0.003035,0.003033,0.003033,0.003033,0.003032,0.003031
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,0.143902,-1.3802,1.5637,0.194700,0.194728,0.025085,2.808758,8.030176,1.353194,0.029575,...,0.000227,0.000229,0.000230,0.000231,0.000231,0.000232,0.000233,0.000233,0.000233,0.000234
1796,0.137908,-1.4787,1.4960,0.180747,0.187902,-0.053696,4.104418,7.961604,1.362513,0.025515,...,0.000054,0.000049,0.000045,0.000040,0.000035,0.000031,0.000027,0.000024,0.000021,0.000020
1797,0.131200,-1.2317,1.5648,0.178472,0.181081,0.075692,4.157256,8.641426,1.380187,0.021799,...,0.000821,0.000821,0.000821,0.000821,0.000821,0.000821,0.000821,0.000821,0.000821,0.000821
1798,0.131577,-1.3973,1.3526,0.178887,0.180757,0.011308,3.914086,7.730254,1.373775,0.039722,...,0.001398,0.001396,0.001395,0.001393,0.001391,0.001390,0.001389,0.001388,0.001388,0.001388


In [19]:
from sklearn import preprocessing
normalization_status='RobustScaler'   
''' Choices:
                                        1. Normalization
                                        2. StandardScaler
                                        3. MinMaxScaler
                                        4. RobustScaler
                                        5. Normalizer
                                        6. WithoutNormalization   '''
input_data_columns=data_columns_PrimaryStatFeatures+data_columns_FFT_Features

if (normalization_status=='Normalization'):
    data_array=preprocessing.normalize(input_data,norm='l2',axis=0)
    input_data=pd.DataFrame(data_array,columns=input_data_columns)
elif (normalization_status=='StandardScaler'):
    scaler = preprocessing.StandardScaler()
    scaled_df = scaler.fit_transform(input_data)
    input_data = pd.DataFrame(scaled_df, columns=input_data_columns)
elif (normalization_status=='MinMaxScaler'):
    scaler = preprocessing.MinMaxScaler()
    scaled_df = scaler.fit_transform(input_data)
    input_data = pd.DataFrame(scaled_df, columns=input_data_columns)
elif (normalization_status=='RobustScaler'):
    scaler = preprocessing.RobustScaler()
    scaled_df = scaler.fit_transform(input_data)
    input_data = pd.DataFrame(scaled_df, columns=input_data_columns)
elif (normalization_status=='Normalizer'):
    scaler = preprocessing.Normalizer()
    scaled_df = scaler.fit_transform(input_data)
    input_data = pd.DataFrame(scaled_df, columns=input_data_columns)
elif (normalization_status=='WithoutNormalization'):
    print ('No normalization is required')

target_data=pd.DataFrame(data['Fault'],columns=['Fault'],dtype=int)