In [1]:
# Import Libraries
import os
import pandas as pd
import numpy as np
import librosa

from matplotlib import pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

from IPython.display import Image

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score


In [2]:
# Read data frame from pickle file
df = pd.read_pickle('sampled_audio_data.pkl')
df.head()

Unnamed: 0,Sound Source,SNR,Model,.wav File,Sample Rate,Raw Data,Classification
145,fan,-6dB,id_00,00000174.wav,16000,"[-0.005207062, -0.0051956177, -0.00573349, -0....",abnormal
170,fan,-6dB,id_00,00000188.wav,16000,"[0.010253906, 0.013095856, 0.013023376, 0.0088...",abnormal
2,fan,-6dB,id_00,00000065.wav,16000,"[-0.0024223328, -0.004142761, -0.003967285, -0...",abnormal
329,fan,-6dB,id_00,00000251.wav,16000,"[-0.0028533936, -0.005077362, -0.0044822693, -...",abnormal
362,fan,-6dB,id_00,00000308.wav,16000,"[-0.005332947, -0.0024414062, 0.00037002563, 0...",abnormal


## Processing labels and Train/Test/Val Split for Multiclass classification

In [3]:
# Hold audio vectors and labels as numpy array
features = []
machine_labels = []


# Encode 'Sound Source' labels
machine_label_encoder = LabelEncoder()
df['Sound Source'] = machine_label_encoder.fit_transform(df['Sound Source'])
machine_label_mapping = {index: label for index, label in enumerate(machine_label_encoder.classes_)}

# Encode 'Classification' labels
state_label_encoder = LabelEncoder()
df['Classification'] = state_label_encoder.fit_transform(df['Classification'])
state_label_mapping = {index: label for index, label in enumerate(state_label_encoder.classes_)}

# Convert labales to integers and store vectors/labels
for idx, row in df.iterrows():
    audio_vector = row['Raw Data']
    machine_label = row['Sound Source']
    features.append(audio_vector)
    machine_labels.append(machine_label)

X = np.array(features)
y_machine = np.array(machine_labels)

In [4]:
# See label encoding for machine type
machine_label_mapping

{0: 'fan', 1: 'pump', 2: 'slider', 3: 'valve'}

In [5]:
# See label encoding for machine status
state_label_mapping

{0: 'abnormal', 1: 'normal'}

In [6]:
# Split data to train/test/val sets with distribution of 0.7/0.15/0.15
X_train, X_temp, y_train, y_temp = train_test_split(X, y_machine, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [7]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

(7660, 160000)
(1642, 160000)
(1642, 160000)


In [8]:
# Check the distribution of labels
ylabels = [y_train, y_test, y_val]

print('Distribution of labels in the different sets\n')
for l in ylabels:
    unique_values, counts = np.unique(l, return_counts=True)
    print(dict(zip(unique_values, counts)))

Distribution of labels in the different sets

{0: 1907, 1: 1921, 2: 1900, 3: 1932}
{0: 414, 1: 400, 2: 430, 3: 398}
{0: 415, 1: 415, 2: 406, 3: 406}


## Processing labels and Train/Test/Val Split for Binary classification

In [32]:
# Separate data for each machine type for binary classification
fan_df = df[df['Sound Source'] == 0]
pump_df = df[df['Sound Source'] == 1]
slider_df = df[df['Sound Source'] == 2]
valve_df = df[df['Sound Source'] == 3]

# Fans
X_fan = []
fan_labels = []

for idx, row in fan_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_fan.append(audio_vector)
    fan_labels.append(label)
    
X_fan = np.array(X_fan)
fan_labels = np.array(fan_labels)

# Pumps
X_pump = []
pump_labels = []

for idx, row in pump_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_pump.append(audio_vector)
    pump_labels.append(label)
    
X_pump = np.array(X_pump)
pump_labels = np.array(pump_labels)

# Sliders
X_slider = []
slider_labels = []

for idx, row in slider_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_slider.append(audio_vector)
    slider_labels.append(label)
    
X_slider = np.array(X_slider)
slider_labels = np.array(slider_labels)

# Valves
X_valve = []
valve_labels = []

for idx, row in valve_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_valve.append(audio_vector)
    valve_labels.append(label)
    
X_valve = np.array(X_valve)
valve_labels = np.array(valve_labels)

In [33]:
# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for FANS
X_train_fan, X_temp, y_train_fan, y_temp = train_test_split(X_fan, fan_labels, test_size=0.3, random_state=42)
X_test_fan, X_val_fan, y_test_fan, y_val_fan = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for PUMPS
X_train_pump, X_temp, y_train_pump, y_temp = train_test_split(X_pump, pump_labels, test_size=0.3, random_state=42)
X_test_pump, X_val_pump, y_test_pump, y_val_pump = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for SLIDERS
X_train_slider, X_temp, y_train_slider, y_temp = train_test_split(X_slider, slider_labels, test_size=0.3, random_state=42)
X_test_slider, X_val_slider, y_test_slider, y_val_slider = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for VALVES
X_train_valve, X_temp, y_train_valve, y_temp = train_test_split(X_valve, valve_labels, test_size=0.3, random_state=42)
X_test_valve, X_val_valve, y_test_valve, y_val_valve = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [11]:
# Check the distribution of labels
ylabels = [y_train_fan, y_test_fan, y_val_fan]

print('Distribution of labels in the different sets for fans\n')
for l in ylabels:
    unique_values, counts = np.unique(l, return_counts=True)
    print(dict(zip(unique_values, counts)))


ylabels = [y_train_pump, y_test_pump, y_val_pump]

print('\nDistribution of labels in the different sets for pumps\n')
for l in ylabels:
    unique_values, counts = np.unique(l, return_counts=True)
    print(dict(zip(unique_values, counts)))


ylabels = [y_train_slider, y_test_slider, y_val_slider]

print('\nDistribution of labels in the different sets for sliders\n')
for l in ylabels:
    unique_values, counts = np.unique(l, return_counts=True)
    print(dict(zip(unique_values, counts)))


ylabels = [y_train_valve, y_test_valve, y_val_valve]

print('\nDistribution of labels in the different sets for valves\n')
for l in ylabels:
    unique_values, counts = np.unique(l, return_counts=True)
    print(dict(zip(unique_values, counts)))

Distribution of labels in the different sets for fans

{0: 950, 1: 965}
{0: 227, 1: 183}
{0: 191, 1: 220}

Distribution of labels in the different sets for pumps

{0: 950, 1: 965}
{0: 227, 1: 183}
{0: 191, 1: 220}

Distribution of labels in the different sets for sliders

{0: 950, 1: 965}
{0: 227, 1: 183}
{0: 191, 1: 220}

Distribution of labels in the different sets for valves

{0: 950, 1: 965}
{0: 227, 1: 183}
{0: 191, 1: 220}


## Model training (Multi-class) - baseline with raw audio vectors

In [34]:
# Multi_class Logistic Regression 
clf = LogisticRegression(multi_class='multinomial', max_iter=10000)
clf.fit(X_train, y_train)

In [35]:
y_val_pred = clf.predict(X_val)
print(f"Accuracy: {accuracy_score(y_val, y_val_pred)}")

Accuracy: 0.6218026796589525


## Model training (Multi-class) - baseline with features selected

In [19]:
def extract_mfccs(audio_data, sample_rate=16000, n_mfcc=13):
    # Extract MFCCs for each audio sample in the dataset
    mfccs_list = []
    for raw_audio in audio_data:
        mfccs = librosa.feature.mfcc(y=raw_audio, sr=sample_rate, n_mfcc=n_mfcc)
        mfccs_processed = np.mean(mfccs.T, axis=0)
        mfccs_list.append(mfccs_processed)
    return np.array(mfccs_list)

def extract_temporal_features(audio_data):
    # Extract temporal features for each audio sample in the dataset
    temporal_features_list = []
    for raw_audio in audio_data:
        zero_crossing_rate = librosa.feature.zero_crossing_rate(raw_audio)[0]
        autocorrelation = librosa.autocorrelate(raw_audio)
        temporal_features = [np.mean(zero_crossing_rate), np.mean(autocorrelation)]
        temporal_features_list.append(temporal_features)
    return np.array(temporal_features_list)

def extract_spectral_features(audio_data, sample_rate=16000):
    # Extract spectral features for each audio sample in the dataset
    spectral_features_list = []
    for raw_audio in audio_data:
        spectral_centroids = librosa.feature.spectral_centroid(y=raw_audio, sr=sample_rate)[0]
        spectral_rollof = librosa.feature.spectral_rolloff(y=raw_audio, sr=sample_rate)[0]
        spectral_contrast = librosa.feature.spectral_contrast(y=raw_audio, sr=sample_rate)[0]
        spectral_features = [np.mean(spectral_centroids), np.mean(spectral_rollof), np.mean(spectral_contrast)]
        spectral_features_list.append(spectral_features)
    return np.array(spectral_features_list)

def extract_features(audio_data, sample_rate=16000):
    # Extract features from the entire numpy dataset
    mfccs = extract_mfccs(audio_data, sample_rate)
    temporal_features = extract_temporal_features(audio_data)
    spectral_features = extract_spectral_features(audio_data, sample_rate)
    
    # Concatenate all features for each audio sample
    features = np.hstack((mfccs, spectral_features, temporal_features))
    return features

In [20]:
X_train_vec = extract_features(X_train, sample_rate=16000)

In [23]:
X_val_vec = extract_features(X_val, sample_rate=16000)

In [30]:
clf = LogisticRegression(multi_class='multinomial', max_iter=10000)
clf.fit(X_train_vec, y_train)

In [31]:
y_val_pred = clf.predict(X_val_vec)
print(f"Accuracy: {accuracy_score(y_val, y_val_pred)}")

Accuracy: 0.7539585870889159


## Model training (Binary) - baseline with raw audio vectors

In [14]:
# Fans

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_fan, y_train_fan)

y_val_pred = clf.predict(X_val_fan)
print(f"Accuracy: {accuracy_score(y_val_fan, y_val_pred)}")

Accuracy: 0.8418491484184915


In [15]:
# Pumps

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_pump, y_train_pump)

y_val_pred = clf.predict(X_val_pump)
print(f"Accuracy: {accuracy_score(y_val_pump, y_val_pred)}")

Accuracy: 0.8637469586374696


In [16]:
# Sliders

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_slider, y_train_slider)

y_val_pred = clf.predict(X_val_slider)
print(f"Accuracy: {accuracy_score(y_val_slider, y_val_pred)}")

Accuracy: 0.8004866180048662


In [17]:
# Valves

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_valve, y_train_valve)

y_val_pred = clf.predict(X_val_valve)
print(f"Accuracy: {accuracy_score(y_val_valve, y_val_pred)}")

Accuracy: 0.7055961070559611


## Model training (Binary) - baseline with features selected

In [36]:
# Fans

X_train_fan_vec = extract_features(X_train_fan, sample_rate=16000)
X_val_fan_vec = extract_features(X_val_fan, sample_rate=16000)

clf = LogisticRegression(max_iter=10000)
clf.fit(X_train_fan_vec, y_train_fan)

y_val_pred = clf.predict(X_val_fan_vec)
print(f"Accuracy: {accuracy_score(y_val_fan, y_val_pred)}")

Accuracy: 0.7128953771289538


In [37]:
# Pumps

X_train_pump_vec = extract_features(X_train_pump, sample_rate=16000)
X_val_pump_vec = extract_features(X_val_pump, sample_rate=16000)

clf = LogisticRegression(max_iter=10000)
clf.fit(X_train_pump_vec, y_train_pump)

y_val_pred = clf.predict(X_val_pump_vec)
print(f"Accuracy: {accuracy_score(y_val_pump, y_val_pred)}")

Accuracy: 0.7591240875912408


In [38]:
# Sliders

X_train_slider_vec = extract_features(X_train_slider, sample_rate=16000)
X_val_slider_vec = extract_features(X_val_slider, sample_rate=16000)

clf = LogisticRegression(max_iter=10000)
clf.fit(X_train_slider_vec, y_train_slider)

y_val_pred = clf.predict(X_val_slider_vec)
print(f"Accuracy: {accuracy_score(y_val_slider, y_val_pred)}")

Accuracy: 0.8880778588807786


In [39]:
# Valves

X_train_valve_vec = extract_features(X_train_valve, sample_rate=16000)
X_val_valve_vec = extract_features(X_val_valve, sample_rate=16000)

clf = LogisticRegression(max_iter=10000)
clf.fit(X_train_valve_vec, y_train_valve)

y_val_pred = clf.predict(X_val_valve_vec)
print(f"Accuracy: {accuracy_score(y_val_valve, y_val_pred)}")

Accuracy: 0.6058394160583942
