In [None]:
import os
import librosa
import numpy as np

# Function to extract features from an audio file
def extract_features(audio_path):
    try:
        audio_data, sample_rate = librosa.load(audio_path, sr=None)
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=20)
        # Add other features as needed
        features = np.mean(mfccs, axis=1)
        return features
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

# Specify the root directory of your dataset
dataset_root = '/content/drive/MyDrive/Animal_Dataset'

# Define the classes (animal names)
classes = ['Birds', 'Elephant', 'Leopard', 'Otter', 'Tiger']

X = []  # List to store features
y = []  # List to store labels

# Iterate through each class folder
for class_name in classes:
    class_folder = os.path.join(dataset_root, class_name)

    # Iterate through audio files in the class folder
    for audio_file in os.listdir(class_folder):
        audio_path = os.path.join(class_folder, audio_file)

        # Extract features from the audio file
        features = extract_features(audio_path)

        if features is not None:
            X.append(features)
            y.append(classes.index(class_name))

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)

# Now, X contains your feature matrix, and y contains the corresponding labels


In [None]:
print(X)
print(y)

[[-597.7563     -37.171284   -41.327328  ...    1.2677945   -5.098191
     2.12892  ]
 [-560.7332      -8.476901   -27.771772  ...    1.220307     2.7622428
    -1.9266127]
 [-526.71216    -78.22412    -76.27306   ...  -10.779298     7.3135214
     8.441823 ]
 ...
 [-271.472      173.8646     -36.965656  ...  -12.830008   -10.297043
    -4.864692 ]
 [-261.2718     163.21481    -36.95614   ...   -9.914326    -7.5772476
    -7.423172 ]
 [-276.2054     159.80583    -33.212875  ...  -11.813957    -9.561474
    -6.111332 ]]
[0 0 0 ... 4 4 4]


In [None]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Assuming you have a dataset (X, y)
# X is your feature matrix, and y is your label vector

# Convert labels to categorical if it's a classification problem
y_categorical = to_categorical(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

# Define model
input_length = X_train.shape[1]  # Adjust based on your features
num_features = X_train.shape[1]  # Adjust based on your features (modified to match X_train's shape)
num_classes = y_categorical.shape[1]  # Adjust based on your number of classes

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(input_length, 1)))  # Modified input_shape
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(num_classes, activation='softmax'))

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Define training parameters
epochs = 10  # Adjust based on your preference
batch_size = 32  # Adjust based on your preference and hardware constraints

# Train the model
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Save the trained model
model.save('/content/drive/MyDrive/Features_Output/your_trained_model.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
from keras.models import load_model
import os
import librosa
import numpy as np

# Load the saved model
model_path = '/content/drive/MyDrive/Features_Output/your_trained_model.h5'
loaded_model = load_model(model_path)

def extract_and_preprocess_features(audio_path, expected_shape):
    try:
        # Load the new audio file
        audio_data, sample_rate = librosa.load(audio_path, sr=None)

        # Extract MFCCs, ZCR, Mel spectrogram, and Chroma features (adjust as needed)
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
        zcr = librosa.feature.zero_crossing_rate(y=audio_data)
        mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
        chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)

        # Calculate the mean of each feature
        mfcc_mean = np.mean(mfccs, axis=1)
        zcr_mean = np.mean(zcr, axis=1)
        mel_mean = np.mean(mel_spectrogram, axis=1)
        chroma_mean = np.mean(chroma, axis=1)

        # Combine the extracted features into a single feature vector
        combined_features = np.concatenate((mfcc_mean, zcr_mean, mel_mean, chroma_mean))

        # Ensure that the feature vector shape matches the expected shape
        if combined_features.shape[0] < expected_shape[0]:
            # Pad the feature vector with zeros
            padding = np.zeros(expected_shape[0] - combined_features.shape[0])
            combined_features = np.concatenate((combined_features, padding))
        elif combined_features.shape[0] > expected_shape[0]:
            # Trim the feature vector if it's larger than the expected size
            combined_features = combined_features[:expected_shape[0]]

        return combined_features
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None


# Replace 'new_audio_path' with the path to your new audio file
new_audio_path = '/content/drive/MyDrive/Animal_Dataset/Elephant/elephant100.wav'
expected_input_shape = (20, 1)  # Update with your model's expected input shape
new_audio_features = extract_and_preprocess_features(new_audio_path, expected_input_shape)

if new_audio_features is not None:
    print("Shape before reshaping:", new_audio_features.shape)
    new_audio_features = new_audio_features.reshape(1, *expected_input_shape)
    print("Shape after reshaping:", new_audio_features.shape)
    predictions = loaded_model.predict(new_audio_features)

    # Get the predicted class labels (assuming it's a classification problem)
    predicted_class_labels = np.argmax(predictions, axis=1)

    # Create a mapping of class codes to class labels
    class_label_mapping = {
        0: 'Birds',
        1: 'Elephant',
        2: 'Leopard',
        3: 'Otter',
        4: 'Tiger'
    }

    # Map the class code to class label for the predicted class
    predicted_class_name = class_label_mapping[predicted_class_labels[0]]

    # Print the predicted class label for the new audio file
    print(f"Predicted Class Label for {new_audio_path}: {predicted_class_name}")

Shape before reshaping: (20,)
Shape after reshaping: (1, 20, 1)
Predicted Class Label for /content/drive/MyDrive/Animal_Dataset/Elephant/elephant100.wav: Elephant
