# SB-PdM: a tool for predictive maintenance of rolling bearings based on limited labeled data

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
from numpy import asarray
import numpy as np
import pandas as pd
import numpy.matlib
from scipy import signal
import scipy
from scipy.fftpack import fft
from sklearn.metrics import confusion_matrix,accuracy_score
from scipy.spatial import distance

In [3]:
# Function for FFT

#Inputs:
# x: input signal
# fs: samplin freq. 
# num_samples = length of insput signal

#Outputs:
#f: freq. contents
#freq_values: freq. spacing

def apply_fft(x, fs, num_samples):
    f = np.linspace(0.0, (fs/2.0), num_samples//2)
    freq_values = fft(x)
    freq_values = 2.0/num_samples * np.abs(freq_values[0:num_samples//2])
    return f, freq_values

In [4]:
# Function to perforem similarity-based_classification:
# It applies similarity measures between reference samples and test samples.
# The similarity measures are: Euclidean distance and cosine similarity.

# Inputs: 
#baselines: features of labeled reference vibration segments
#test_data: features of test vibration segments
#baseline_labels: Labels of reference vibration segments

# The function returns:
#cos_s:  Cosine similarity  scores of test vibration segments 
#euc_s: Euclidean distance  scores of test vibration segments
#y_cos:  Predicted classes using cosine similarity  scores of vibration segments 
#y_euc: Predicted classes using Euclidean distance

def similarity_based_classification(baselines,test_data, baseline_labels):

    cos_s = np.empty(len(test_data)) # Cosine similarity  scores of test smaples 
    euc_s = np.empty(len(test_data)) # Euclidean distance similarity  scores of test smaples
    
    y_cos = np.empty(len(test_data)) # Predicted classes using cosine similarity  scores of test smaples 
    y_euc = np.empty(len(test_data)) # Predicted classes using Euclidean distance
 
    for i in range(len(test_data)):
        s = test_data[i]
        dist_record = np.empty(len(baselines))
        dist2_record = np.empty(len(baselines))
        for k in range(len(baselines)):
            
            dist_record[k] = distance.cosine(baselines[k].reshape(-1), test_data[i].reshape(-1)) # Cosine similaritiy
            dist2_record[k] = np.mean( sum(abs(s1-s2)**2 for s1, s2 in zip( baselines[k], test_data[i] ))**(1/2) ) # Ecludian distance
            
        cos_s[i] = baseline_labels[int(np.argmin(dist_record))]
        euc_s[i] = baseline_labels[int(np.argmin(dist2_record))]
        y_cos[i] = np.min(cos_s) 
        y_euc[i] = np.min(euc_s)
        
    return cos_s, euc_s, y_cos, y_euc

==========================================================================================================================
# Loading Data:

## Note:

### The vibratinon samples are extracted CWRU dataset. Link to  access the dataset: https://engineering.case.edu/bearingdatacenter

=========================================================================================================================== 

In [8]:
## Load Test samples and thier labels from CSV file:
df = pd.read_csv("./Test_Samples.csv")
y_test = df['labels'].to_numpy()
test_data = df.drop(['labels'], axis=1).to_numpy()


## Load Reference samples and thier labels from CSV file:
df = pd.read_csv("./Reference_Samples.csv")
baselines_labels = df['labels'].to_numpy()
baselines = df.drop(['labels'], axis=1).to_numpy()


X = np.concatenate( (test_data, baselines) , axis=0, out=None) # Nummpy array contians test [samples:reference labels] for feature extraction
print("Size of test & reference vibration samples:", X.shape)

Size of test & reference vibration samples: (3019, 2000)


In [9]:
test_data.shape

(2979, 2000)

In [10]:
fs = 12000 # Sample rate of vibration data.
num_samples = X.shape[1] # number of datapoints in each vibration sample

==========================================================================================================================
# Feature Extraction:
=========================================================================================================================== 

In [11]:
# Note: Here, a noise-free scenario is considered.

# FFT features extraction:
fft_z = []
for i in range(len(X)):
    f, z = apply_fft(X[i], fs, num_samples)
    fft_z.append(np.squeeze(np.asarray ( abs(z) ) ) )
fft_features = np.asarray(fft_z, dtype="float")
print('FFT features shape:', fft_features.shape)

# STFT features extraction:
stft_z = []
for i in range(len(X)):
    f, t, z = signal.stft(X[i], fs, window='hamming', nperseg=(num_samples/2), noverlap=int( 0.95*(num_samples/2) ), nfft=1028)
    stft_z.append(np.squeeze(np.asarray ( abs(z) ) ) )
stft_features = np.asarray(stft_z, dtype="float")
print('STFT features shape:', stft_features.shape)

FFT features shape: (3019, 1000)
STFT features shape: (3019, 515, 41)


In [12]:
# Select feature type: FFT features or STFT features
features =  fft_features # ====>  fft_features / stft_features 

==========================================================================================================================
# Similarity-Based Classification:
=========================================================================================================================== 

In [13]:
# Separating features of test samples and reference samples and preparing for Similarity-based classification 

test_samples = features[:len(test_data)]
print("Test samples size;", test_samples.shape)
baseline_samples = features[len(test_data):]
print("Reference samples size;", baseline_samples.shape)
baselines_df = pd.DataFrame(data= baseline_samples.reshape(len(baseline_samples),-1) )
baselines_df['labels']= baselines_labels

# Averaging reference samples based on thier classes;
# Since each class involves 4 different motor speeds. Refers to the paper for more details.
baselines_features = baselines_df.groupby('labels').mean().to_numpy()
y_ref_true = np.unique( baselines_labels, axis= 0)


if np.sum(features) == np.sum(stft_features): # Reshape STFT features:
    w, h = test_samples[0].shape
    baselines_features = baselines_features.reshape(len(y_ref_true), w, h )
    
print("Averaged Reference samples shape;", baselines_features.shape)

Test samples size; (2979, 1000)
Reference samples size; (40, 1000)
Averaged Reference samples shape; (10, 1000)


In [14]:
# Similarity-based classification
y_pred_cos, y_pred_Ecl, cos_score, Ecl_score = similarity_based_classification(baselines_features, 
                                                                                               test_samples, y_ref_true)

==========================================================================================================================
# Performance Metrics:
=========================================================================================================================== 

In [15]:
# Cosine similarity
y_predicted = y_pred_cos
accuracy =accuracy_score(y_test, y_predicted) 
conf_mat =confusion_matrix(y_test, y_predicted)
print("Accuracy:", accuracy*100, "%")
print("Confusion Matrix:")
print(conf_mat)

Accuracy: 99.59718026183283 %
Confusion Matrix:
[[842   0   0   0   0   0   0   0   0   0]
 [  0 238   0   0   0   0   0   0   0   0]
 [  0   0 228   0   0   0   0   0   0   8]
 [  0   0   0 237   0   0   0   0   0   0]
 [  0   0   0   0 237   0   0   0   0   0]
 [  0   0   0   0   0 238   0   0   0   0]
 [  0   0   0   0   4   0 234   0   0   0]
 [  0   0   0   0   0   0   0 238   0   0]
 [  0   0   0   0   0   0   0   0 237   0]
 [  0   0   0   0   0   0   0   0   0 238]]


In [16]:
# Euclidean distance
y_predicted = y_pred_Ecl
accuracy =accuracy_score(y_test, y_predicted) 
conf_mat =confusion_matrix(y_test, y_predicted)
print("Accuracy:", accuracy*100, "%")
print("Confusion Matrix:")
print(conf_mat)

Accuracy: 99.36220208123532 %
Confusion Matrix:
[[842   0   0   0   0   0   0   0   0   0]
 [  0 238   0   0   0   0   0   0   0   0]
 [  0   0 236   0   0   0   0   0   0   0]
 [  0   0   0 237   0   0   0   0   0   0]
 [  0   0   0   0 236   0   1   0   0   0]
 [  0   0   0   0   0 229   1   0   8   0]
 [  0   0   0   0   7   2 229   0   0   0]
 [  0   0   0   0   0   0   0 238   0   0]
 [  0   0   0   0   0   0   0   0 237   0]
 [  0   0   0   0   0   0   0   0   0 238]]
