In [2]:
import os
import numpy as np
import scipy
from scipy.io import wavfile
import scipy.fftpack as fft
from scipy.signal import get_window
import IPython.display as ipd
import librosa
import soundfile as sf

In [3]:
audio_file = "beeAudioFile.mp3"
audio_data, sampling_rate = librosa.load(audio_file, sr=None)  # sr=None to get the original sampling rate

# Play the audio using IPython's Audio
# ipd.Audio(audio_data, rate=sampling_rate)

print("Sample rate: {0}Hz".format(sampling_rate))
print("Audio duration: {0}s".format(len(audio_data) / sampling_rate))

Sample rate: 48000Hz
Audio duration: 155.352s


In [3]:

queen_audio_file = "QueenBee_Testing_15mins.wav"
queen_audio_data, queen_sampling_rate = librosa.load(queen_audio_file, sr=None)  # sr=None to get the original sampling rate

# resample for no queen part
no_queen_audio_file = "No_QueenBee_Testing_15mins.wav"
no_queen_audio_data, no_queen_sampling_rate = librosa.load(no_queen_audio_file, sr=None)  # sr=None to get the original sampling rate

#sample_rate, audio = librosa.load("beeAudioFile.mp3")
print("queen_sampling_rate: {0}Hz".format(queen_sampling_rate))
print("no_queen_sampling_rate: {0}Hz".format(no_queen_sampling_rate))


queen_sampling_rate: 44100Hz
no_queen_sampling_rate: 44100Hz


In [7]:
# Define segment length and hop length in seconds
segment_length = 30  # seconds
hop_length  = 10     # seconds (hop_length of 10 seconds)

# Calculate frame length and hop length in samples
frame_length = int(segment_length * queen_sampling_rate)
hop_length_samples = int(hop_length * queen_sampling_rate)

print(frame_length)
print(hop_length_samples)


1440000
480000


In [8]:
# generate queen segment

# Iterate over the audio and create overlapping segments
queen_segments = []
start_sample = 0

while start_sample + frame_length <= len(queen_audio_data):
    segment = queen_audio_data[start_sample:start_sample + frame_length]
    queen_segments.append(segment)
    start_sample += hop_length_samples

print("Queen Segments saved to Array.")

# 2000,  MFCC for each, 2000 MFCC features,  divded features, 1800 for training  200 for testing feed  LSTM and SVM

# generate no queen segment

# Iterate over the audio and create overlapping segments
no_queen_segments = []
start_sample = 0

while start_sample + frame_length <= len(no_queen_audio_data):
    no_queen_segment = no_queen_audio_data[start_sample:start_sample + frame_length]
    no_queen_segments.append(no_queen_segment)
    start_sample += hop_length_samples

print("No Queen Segments saved to Array.")

Queen Segments saved to Array.
No Queen Segments saved to Array.


In [9]:
print("queen segments:", len(queen_segments))
print("no queen segments:", len(no_queen_segments))

queen segments: 34
no queen segments: 403


In [10]:
# augmentation

import numpy as np
from audiomentations import Compose, TimeStretch, PitchShift, AddBackgroundNoise

# Define the augmentation transformations you want to apply
augmentations = Compose([
    TimeStretch(min_rate=0.8, max_rate=1.2, p=0.5),  # Adjust time duration
    PitchShift(min_semitones=-2, max_semitones=2, p=0.5),  # Adjust pitch
    AddBackgroundNoise(sounds_path="nature_noise.mp3", p=0.5),  # Add background noise
])

# Augment queen segments
augmented_queen_segments = []
for segment in queen_segments:
    augmented_segment = augmentations(samples=segment, sample_rate=queen_sampling_rate)
    augmented_queen_segments.append(augmented_segment)

# Augment no queen segments
augmented_no_queen_segments = []
for segment in no_queen_segments:
    augmented_segment = augmentations(samples=segment, sample_rate=no_queen_sampling_rate)
    augmented_no_queen_segments.append(augmented_segment)

# Now you have augmented segments in augmented_queen_segments and augmented_no_queen_segments




In [11]:
print("queen segments:", len(augmented_queen_segments))
print("no queen segments:", len(augmented_no_queen_segments))

queen segments: 34
no queen segments: 403


Generage mel spectrogram into Folders:

In [12]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import os

def save_spectrogram(audio_segments, folder_name):
    """
    Generates and saves Mel spectrograms for a list of audio segments.
    Skips saving if the image already exists.
    :param audio_segments: List of audio segments (numpy arrays)
    :param folder_name: Folder to save the spectrogram images
    """
    # Ensure the folder exists
    os.makedirs(folder_name, exist_ok=True)

    # Loop through each segment and generate a spectrogram
    for i, segment in enumerate(audio_segments):
        file_path = f'{folder_name}/spectrogram_{i}.png'
        
        # Check if the image already exists
        if not os.path.exists(file_path):
            # Generate Mel spectrogram
            S = librosa.feature.melspectrogram(y=segment, sr=22050, n_mels=128, fmax=8000)
            S_dB = librosa.power_to_db(S, ref=np.max)

            # Plot
            plt.figure(figsize=(10, 4))
            librosa.display.specshow(S_dB, sr=22050, x_axis='time', y_axis='mel')
            plt.colorbar(format='%+2.0f dB')
            plt.title(f'Mel-frequency spectrogram {i}')
            plt.tight_layout()
            
            # Save the figure
            plt.savefig(file_path)
            plt.close()

# Example usage
# Replace these with your actual lists of audio segments
# save_spectrogram(augmented_queen_segments, 'queen_img')
# save_spectrogram(augmented_no_queen_segments, 'queenless_img')

# Example usage
# Assuming `augmented_queen_segments` and `augmented_no_queen_segments` are your audio segment lists
save_spectrogram(augmented_queen_segments, 'queen_img')
save_spectrogram(augmented_no_queen_segments, 'queenless_img')


extract MFCC features and label

In [13]:
def extract_mfcc_and_label(audio_segment, label):
    # Extract MFCC features from the audio segment
    mfccs = librosa.feature.mfcc(y=audio_segment, sr=queen_sampling_rate, n_mfcc=13, hop_length=hop_length_samples)
    
    return mfccs, label

queen_mfccs = []  # To store MFCC features
queen_labels = []  # To store labels (1 for "queen")

for segment in augmented_queen_segments:
    mfcc, label = extract_mfcc_and_label(segment, 1)  # Label as 1 for "queen"
    queen_mfccs.append(mfcc)
    queen_labels.append(label)


no_queen_mfccs = []  # To store MFCC features
no_queen_labels = []  # To store labels (1 for "queen")

for no_queen_segment in augmented_no_queen_segments:
    mfcc, label = extract_mfcc_and_label(no_queen_segment, 0)  # Label as 1 for "queen"
    no_queen_mfccs.append(mfcc)
    no_queen_labels.append(label)

print("queen segments:", len(queen_mfccs))
print("no queen segments:", len(no_queen_mfccs))




queen segments: 34
no queen segments: 403


In [14]:
print("queen_mfccs:")
print(queen_mfccs[0])
print("no_queen_mfccs:")
print(no_queen_mfccs[0])

queen_mfccs:
[[-9.0834021e+02 -4.0651260e+02 -3.7499939e+02 -5.4887689e+02]
 [ 0.0000000e+00  1.7578656e+02  1.5964374e+02  1.2885406e+02]
 [ 0.0000000e+00 -2.5732170e+01 -4.4139931e+01  1.8598820e+01]
 [ 0.0000000e+00  6.0004074e+01  7.1312477e+01  4.3563576e+01]
 [ 0.0000000e+00 -3.9786682e+01 -4.7004913e+01  5.7934742e+00]
 [ 0.0000000e+00  3.0369919e+01  2.5160576e+01 -6.6570635e+00]
 [ 0.0000000e+00 -1.9473936e+01 -1.4755440e+01  8.8173813e-01]
 [ 0.0000000e+00  3.2558167e+01  3.3551048e+01  1.5242510e-01]
 [ 0.0000000e+00 -3.9381802e+00 -2.1995125e+01  3.5655100e+00]
 [ 0.0000000e+00  4.4144449e+00  7.1385679e+00  1.2887930e+01]
 [ 0.0000000e+00 -1.7217085e-01  1.7178435e+00  4.8987284e+00]
 [ 0.0000000e+00  7.0732756e+00 -2.9608059e+00  6.6356955e+00]
 [ 0.0000000e+00 -9.4648170e-01 -2.2162137e+00  1.3756098e+01]]
no_queen_mfccs:
[[-7.0478857e+02 -2.8825006e+02 -4.1513882e+02 -2.5544597e+02]
 [ 0.0000000e+00  1.7309164e+02  1.5318442e+02  1.4112778e+02]
 [ 0.0000000e+00  1.82071

queen_mfccs:
[[-9.0834021e+02 -4.0651260e+02 -3.7499939e+02 -5.4887689e+02]
 [ 0.0000000e+00  1.7578656e+02  1.5964374e+02  1.2885406e+02]
 [ 0.0000000e+00 -2.5732170e+01 -4.4139931e+01  1.8598820e+01]
 [ 0.0000000e+00  6.0004074e+01  7.1312477e+01  4.3563576e+01]
 [ 0.0000000e+00 -3.9786682e+01 -4.7004913e+01  5.7934742e+00]
 [ 0.0000000e+00  3.0369919e+01  2.5160576e+01 -6.6570635e+00]
 [ 0.0000000e+00 -1.9473936e+01 -1.4755440e+01  8.8173813e-01]
 [ 0.0000000e+00  3.2558167e+01  3.3551048e+01  1.5242510e-01]
 [ 0.0000000e+00 -3.9381802e+00 -2.1995125e+01  3.5655100e+00]
 [ 0.0000000e+00  4.4144449e+00  7.1385679e+00  1.2887930e+01]
 [ 0.0000000e+00 -1.7217085e-01  1.7178435e+00  4.8987284e+00]
 [ 0.0000000e+00  7.0732756e+00 -2.9608059e+00  6.6356955e+00]
 [ 0.0000000e+00 -9.4648170e-01 -2.2162137e+00  1.3756098e+01]]
no_queen_mfccs:
[[-620.7817     -246.7279     -376.80215    -177.33191   ]
 [   0.          172.13995     145.6271      130.23778   ]
 [   0.           19.31228       4.8429527    20.572308  ]
 [   0.           58.24466      19.944237     20.364254  ]
 [   0.          -26.18211     -11.411879      2.146392  ]
 [   0.           28.36429       6.484435      2.1418092 ]
 [   0.          -13.176889     -0.63723433   -2.3569534 ]
 [   0.            3.0853925    -4.836183     -1.0625077 ]
 [   0.           -8.8347435    -5.7978144    -1.1277297 ]
 [   0.           -1.2606636    -3.86494      -4.05499   ]
 [   0.            1.0070633    -8.360489     -4.835867  ]
 [   0.           -6.2654414    -8.794887     -3.7851849 ]
 [   0.           -4.145375     -5.0080633    -5.352196  ]]

In [15]:
# Combine the data from "queen" and "no queen" segments and labels:

X = np.vstack((queen_mfccs, no_queen_mfccs))
y = np.hstack((queen_labels, no_queen_labels))


print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

# The audio is divided into segments with a hop length of 10 seconds, 
# which results in 3 time steps per frame. 
# When you add the 0th coefficient, you have a total of 4 coefficients per frame



Shape of X: (437, 13, 4)
Shape of y: (437,)


In [16]:
# Reshape X to have two dimensions
X = X.reshape(X.shape[0], -1)  # Flatten the last two dimensions

# Now, X will have a shape of (437, 13 * 4)
print("Shape of X:", X.shape)   

Shape of X: (437, 52)


In [17]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier (you can choose different kernels and parameters)
clf = svm.SVC(kernel='linear')

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate accuracy and print the classification report
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of MFCC Features: {accuracy * 100:.2f}%")

report = classification_report(y_test, y_pred)
print("\nClassification Report:\n", report)


Accuracy of MFCC Features: 98.86%

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99        78
           1       0.91      1.00      0.95        10

    accuracy                           0.99        88
   macro avg       0.95      0.99      0.97        88
weighted avg       0.99      0.99      0.99        88



In [18]:
# LPC order (you can adjust this)
lpc_order = 12

# Extract LPC features for augmented queen segments
lpc_queen_segments = []
for segment in augmented_queen_segments:
    # Apply LPC analysis using librosa.lpc
    lpc_coefficients = librosa.lpc(segment, order=lpc_order)
    lpc_queen_segments.append(lpc_coefficients)

# Extract LPC features for augmented no queen segments
lpc_no_queen_segments = []
for segment in augmented_no_queen_segments:
    # Apply LPC analysis using librosa.lpc
    lpc_coefficients = librosa.lpc(segment, order=lpc_order)
    lpc_no_queen_segments.append(lpc_coefficients)

# Now you have LPC features for both queen and no queen segments.
# lpc_queen_segments and lpc_no_queen_segments are lists of LPC coefficients for each segment.


In [19]:
import numpy as np

# Assuming '1' represents "queen" and '0' represents "no queen"
queen_labels = [1] * len(lpc_queen_segments)
no_queen_labels = [0] * len(lpc_no_queen_segments)

# Combine queen and no queen data and labels
X = np.vstack((lpc_queen_segments, lpc_no_queen_segments))
y = np.hstack((queen_labels, no_queen_labels))

# Find rows with NaN or infinity values in X
nan_inf_indices = np.isnan(X).any(axis=1) | np.isinf(X).any(axis=1)

# Remove rows with NaN or infinity values from X and y
X = X[~nan_inf_indices]
y = y[~nan_inf_indices]

# Now, X_clean and y_clean contain the data with NaN and infinity rows removed

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X)



In [20]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Create an SVM classifier (you can choose different kernels and parameters)
clf = svm.SVC(kernel='linear')

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate accuracy and print the classification report
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of LPC Features: {accuracy * 100:.2f}%")

report = classification_report(y_test, y_pred)
print("\nClassification Report:\n", report)


Accuracy of LPC Features: 88.51%

Classification Report:
               precision    recall  f1-score   support

           0       0.89      1.00      0.94        77
           1       0.00      0.00      0.00        10

    accuracy                           0.89        87
   macro avg       0.44      0.50      0.47        87
weighted avg       0.78      0.89      0.83        87



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
