<a href="https://colab.research.google.com/github/Vamshi-Nallaguntla/BeeHive-Audio-Generalization/blob/main/Beehive_Project_NuHive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
base_folder = '/content/drive/My Drive/BeeHive/Datasets/NuHive_Organized'
queen_folder = f"{base_folder}/Queen"
no_queen_folder = f"{base_folder}/No_Queen"

In [None]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [None]:
# Parameters
sampling_rate = 16000
duration = 2  # 2 seconds
n_mfcc = 13  # Number of MFCC features

def preprocess_audio(folder, label):
    features, labels = [], []
    for file_name in os.listdir(folder):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder, file_name)
            try:
                # Load audio
                y, sr = librosa.load(file_path, sr=sampling_rate)
                y = librosa.util.fix_length(y, size=int(duration * sampling_rate))

                # Extract MFCCs
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
                mfcc_mean = np.mean(mfcc, axis=1)

                features.append(mfcc_mean)
                labels.append(label)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    return np.array(features), np.array(labels)

# Preprocess Queen and No_Queen folders
queen_features, queen_labels = preprocess_audio(queen_folder, label=1)
no_queen_features, no_queen_labels = preprocess_audio(no_queen_folder, label=0)

# Combine data
X = np.concatenate([queen_features, no_queen_features], axis=0)
y = np.concatenate([queen_labels, no_queen_labels], axis=0)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Reshape data for CNN input
X_train_cnn = X_train.reshape(-1, X_train.shape[1], 1, 1)
X_test_cnn = X_test.reshape(-1, X_test.shape[1], 1, 1)

# Build CNN model
model = Sequential([
    tf.keras.Input(shape=(X_train.shape[1], 1, 1)),  # Explicit input shape
    Conv2D(32, (3, 1), activation='relu'),  # Kernel size (3, 1) matches input dimensions
    MaxPooling2D((2, 1)),  # Pooling along valid dimensions
    Conv2D(64, (3, 1), activation='relu'),
    MaxPooling2D((2, 1)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])



In [None]:
# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train_cnn, y_train, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 219ms/step - accuracy: 0.4826 - loss: 6.5523 - val_accuracy: 0.4783 - val_loss: 3.7894
Epoch 2/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5644 - loss: 4.0079 - val_accuracy: 0.5217 - val_loss: 1.6553
Epoch 3/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5080 - loss: 2.5170 - val_accuracy: 0.5761 - val_loss: 0.6003
Epoch 4/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5515 - loss: 1.3803 - val_accuracy: 0.7391 - val_loss: 0.4780
Epoch 5/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6347 - loss: 0.8242 - val_accuracy: 0.7826 - val_loss: 0.4056
Epoch 6/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7609 - loss: 0.4769 - val_accuracy: 0.8696 - val_loss: 0.3508
Epoch 7/100
[1m12/12[0m [32m━

<keras.src.callbacks.history.History at 0x793dc0189630>

In [None]:
# Predict on the test set
y_pred = (model.predict(X_test_cnn) > 0.5).astype("int32").flatten()

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Evaluation Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step
Evaluation Metrics:
Accuracy: 0.87
Precision: 0.83
Recall: 0.95
F1-Score: 0.89

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.77      0.85        53
           1       0.83      0.95      0.89        63

    accuracy                           0.87       116
   macro avg       0.88      0.86      0.87       116
weighted avg       0.88      0.87      0.87       116

