In [1]:
# Import Libraries
import os
import pandas as pd
import numpy as np
import librosa

import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

from IPython.display import Image

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score
from glob import glob


In [6]:
# Read data frame from pickle file
df = pd.read_pickle('sampled_audio_data.pkl')
df.head()

Unnamed: 0,Sound Source,SNR,Model,.wav File,Sample Rate,Raw Data,Classification
145,fan,-6dB,id_00,00000174.wav,16000,"[-0.005207062, -0.0051956177, -0.00573349, -0....",abnormal
170,fan,-6dB,id_00,00000188.wav,16000,"[0.010253906, 0.013095856, 0.013023376, 0.0088...",abnormal
2,fan,-6dB,id_00,00000065.wav,16000,"[-0.0024223328, -0.004142761, -0.003967285, -0...",abnormal
329,fan,-6dB,id_00,00000251.wav,16000,"[-0.0028533936, -0.005077362, -0.0044822693, -...",abnormal
362,fan,-6dB,id_00,00000308.wav,16000,"[-0.005332947, -0.0024414062, 0.00037002563, 0...",abnormal


In [7]:
# Hold audio vectors and labels as numpy array
features = []
machine_labels = []


# Encode 'Sound Source' labels
machine_label_encoder = LabelEncoder()
df['Sound Source'] = machine_label_encoder.fit_transform(df['Sound Source'])
machine_label_mapping = {index: label for index, label in enumerate(machine_label_encoder.classes_)}

# Encode 'Classification' labels
state_label_encoder = LabelEncoder()
df['Classification'] = state_label_encoder.fit_transform(df['Classification'])
state_label_mapping = {index: label for index, label in enumerate(state_label_encoder.classes_)}

# Convert labales to integers and store vectors/labels
for idx, row in df.iterrows():
    audio_vector = row['Raw Data']
    machine_label = row['Sound Source']
    features.append(audio_vector)
    machine_labels.append(machine_label)

X = np.array(features)
y_machine = np.array(machine_labels)

In [8]:
# See label encoding for machine type
machine_label_mapping

{0: 'fan', 1: 'pump', 2: 'slider', 3: 'valve'}

In [9]:
state_label_mapping

{0: 'abnormal', 1: 'normal'}

In [10]:
# Separate data for each machine type for binary classification
fan_df = df[df['Sound Source'] == 0]
pump_df = df[df['Sound Source'] == 1]
slider_df = df[df['Sound Source'] == 2]
valve_df = df[df['Sound Source'] == 3]

# Fans
X_fan = []
fan_labels = []

for idx, row in fan_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_fan.append(audio_vector)
    fan_labels.append(label)
    
X_fan = np.array(X_fan)
fan_labels = np.array(fan_labels)

# Pumps
X_pump = []
pump_labels = []

for idx, row in pump_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_pump.append(audio_vector)
    pump_labels.append(label)
    
X_pump = np.array(X_pump)
pump_labels = np.array(pump_labels)

# Sliders
X_slider = []
slider_labels = []

for idx, row in slider_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_slider.append(audio_vector)
    slider_labels.append(label)
    
X_slider = np.array(X_slider)
slider_labels = np.array(slider_labels)

# Valves
X_valve = []
valve_labels = []

for idx, row in valve_df.iterrows():
    audio_vector = row['Raw Data']
    label = row['Classification']
    X_valve.append(audio_vector)
    valve_labels.append(label)
    
X_valve = np.array(X_valve)
valve_labels = np.array(valve_labels)

In [11]:
# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for FANS
X_train_fan, X_temp, y_train_fan, y_temp = train_test_split(X_fan, fan_labels, test_size=0.3, random_state=42)
X_test_fan, X_val_fan, y_test_fan, y_val_fan = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for PUMPS
X_train_pump, X_temp, y_train_pump, y_temp = train_test_split(X_pump, pump_labels, test_size=0.3, random_state=42)
X_test_pump, X_val_pump, y_test_pump, y_val_pump = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for SLIDERS
X_train_slider, X_temp, y_train_slider, y_temp = train_test_split(X_slider, slider_labels, test_size=0.3, random_state=42)
X_test_slider, X_val_slider, y_test_slider, y_val_slider = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Split data to train/test/val sets with distribution of 0.7/0.15/0.15 for VALVES
X_train_valve, X_temp, y_train_valve, y_temp = train_test_split(X_valve, valve_labels, test_size=0.3, random_state=42)
X_test_valve, X_val_valve, y_test_valve, y_val_valve = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [14]:
def create_directories(sound_source, categories):
    for sound_source in sound_sources:
        for category in categories:
            os.makedirs(os.path.join('dataset',sound_source+'_data', 'train', category), exist_ok=True)
            os.makedirs(os.path.join('dataset',sound_source+'_data', 'val', category), exist_ok=True)
            os.makedirs(os.path.join('dataset',sound_source+'_data', 'test', category), exist_ok=True)
            


sound_sources = ['fan', 'pump', 'slider', 'valve']
classifications = ['normal', 'abnormal']

for sound_source in sound_sources:
    for classification in classifications:
        create_directories(sound_sources, classifications)


In [15]:
def save_mel_spectrogram(audio_vector, sr, save_path):
    # Set the figure size and DPI to get a 224x224 image
    fig_size = 224 / 100  # inches (224 pixels / 100 DPI)
    dpi = 100  # Dots per inch

    # Plot and save Mel spectrogram as an image
    fig, ax = plt.subplots(figsize=(fig_size, fig_size), dpi=dpi)
    ax.set_axis_off()
    fig.subplots_adjust(left=0, right=1, top=1, bottom=0)  # Remove padding
    
    sgram = librosa.stft(audio_vector)  # extract short time fourier transform
    sgram_mag, _ = librosa.magphase(sgram)
    mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sr)
    mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.min)  # decibel scale
    
    librosa.display.specshow(mel_sgram, sr=sr, ax=ax)
    
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

# Function to save Mel spectrogram images for a dataset
def save_spectrogram_images(X, y, sound_source, data_type, base_dir='dataset'):

    # Iterate through each sample
    for idx, audio_vector in enumerate(X):
        # Determine classification
        classification = 'normal' if y[idx] == 1 else 'abnormal'
        
        # Define file path
        file_path = os.path.join(base_dir, sound_source+'_data', data_type, classification, f'{idx}.png')

        # Save Mel spectrogram image
        save_mel_spectrogram(audio_vector, sr=16000, save_path=file_path)

# Fans
save_spectrogram_images(X_train_fan, y_train_fan, 'fan', 'train')
save_spectrogram_images(X_val_fan, y_val_fan, 'fan', 'val')
save_spectrogram_images(X_test_fan, y_test_fan, 'fan', 'test')

# Pumps
save_spectrogram_images(X_train_pump, y_train_pump, 'pump', 'train')
save_spectrogram_images(X_val_pump, y_val_pump, 'pump', 'val')
save_spectrogram_images(X_test_pump, y_test_pump, 'pump', 'test')

# Sliders
save_spectrogram_images(X_train_slider, y_train_slider, 'slider', 'train')
save_spectrogram_images(X_val_slider, y_val_slider, 'slider', 'val')
save_spectrogram_images(X_test_slider, y_test_slider, 'slider', 'test')

# Valves
save_spectrogram_images(X_train_valve, y_train_valve, 'valve', 'train')
save_spectrogram_images(X_val_valve, y_val_valve, 'valve', 'val')
save_spectrogram_images(X_test_valve, y_test_valve, 'valve', 'test')