Cicada Species Classification based on their audio (61% acc
), dataset:https://www.kaggle.com/datasets/michaelgoh/cicada-species-detection-based-on-acoustic-signals



In [None]:
!pip install opendatasets tensorflow_io matplotlib

In [2]:
import opendatasets as od
od.download('https://www.kaggle.com/datasets/michaelgoh/cicada-species-detection-based-on-acoustic-signals')

Downloading cicada-species-detection-based-on-acoustic-signals.zip to ./cicada-species-detection-based-on-acoustic-signals


100%|██████████| 129M/129M [00:07<00:00, 17.2MB/s]





In [73]:
import os
import numpy as np
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Set the path to your dataset folder
dataset_path = '/content/cicada-species-detection-based-on-acoustic-signals/Cicada Species Detection Based on Acoustic Signals/audio (original)/'

# Define the cicada species names
species_names = ['cassini', 'septendecim', 'septendecula']

# Define the desired sample rate and audio length for preprocessing
desired_sample_rate = 16000
desired_audio_length = 5  # in seconds

# Initialize empty lists to store the preprocessed data and labels
data = []
labels = []

# Iterate over each species
for species in species_names:
    # Set the path to the species folder
    species_folder = os.path.join(dataset_path, species)

    # Iterate over each audio file in the species folder
    for file_name in os.listdir(species_folder):
        # Read the audio file
        file_path = os.path.join(species_folder, file_name)
        sample_rate, audio_data = wavfile.read(file_path)



        # Resample the audio data to the desired sample rate
        if sample_rate != desired_sample_rate:
            audio_data = np.interp(
                np.linspace(
                    0, len(audio_data), int(len(audio_data) * (desired_sample_rate / sample_rate))
                ), np.arange(len(audio_data)), audio_data
            )

        # Pad or truncate the audio signal to the desired length
        desired_samples = int(desired_sample_rate * desired_audio_length)
        if len(audio_data) < desired_samples:
            audio_data = np.pad(audio_data, (0, desired_samples - len(audio_data)), 'constant')
        elif len(audio_data) > desired_samples:
            audio_data = audio_data[:desired_samples]

        # Append the preprocessed data and label to the lists
        data.append(audio_data)
        labels.append(species)

# Convert the data and labels to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Encode the labels as integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)

# Normalize the audio data to values between 0 and 1
train_data = train_data / np.max(np.abs(train_data))
test_data = test_data / np.max(np.abs(test_data))

# Convert labels to categorical format
num_classes = len(species_names)
train_labels = to_categorical(train_labels, num_classes=num_classes)
test_labels = to_categorical(test_labels, num_classes=num_classes)


  sample_rate, audio_data = wavfile.read(file_path)


In [74]:
# Build the neural network model
model = Sequential()
model.add(Dense(256, input_shape=(desired_samples,), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [75]:
# Train the model
model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_data=(test_data, test_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7a2a447746a0>

In [76]:
loss, accuracy = model.evaluate(test_data, test_labels)
print("Test loss:", loss)
print("Test accuracy:", accuracy)

Test loss: 2.2093069553375244
Test accuracy: 0.3333333432674408


In [77]:
predictions = model.predict(test_data)
predicted_labels = np.argmax(predictions, axis=1)
actual_labels = np.argmax(test_labels, axis=1)
predicted_labels = label_encoder.inverse_transform(predicted_labels)
actual_labels = label_encoder.inverse_transform(actual_labels)
for i in range(len(test_data)):
    print("Predicted: {}, Actual: {}".format(predicted_labels[i], actual_labels[i]))
correct_predictions = np.sum(predicted_labels == actual_labels)
total_predictions = len(predicted_labels)
accuracy = correct_predictions / total_predictions
print("Accuracy: {:.2%}".format(accuracy))


Predicted: septendecim, Actual: septendecula
Predicted: cassini, Actual: septendecim
Predicted: septendecim, Actual: septendecim
Predicted: septendecim, Actual: septendecula
Predicted: cassini, Actual: septendecula
Predicted: cassini, Actual: septendecula
Predicted: cassini, Actual: cassini
Predicted: septendecim, Actual: cassini
Predicted: cassini, Actual: cassini
Accuracy: 33.33%


In [115]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

dataset_path = '/content/cicada-species-detection-based-on-acoustic-signals/Cicada Species Detection Based on Acoustic Signals/audio (original)/'

# Define the cicada species names
species_names = ['cassini', 'septendecim', 'septendecula']

# Define the desired sample rate and audio length for preprocessing
desired_sample_rate = 16000
desired_audio_length = 2  # in seconds

# Define the number of augmented samples per original sample
num_augmented_samples = 3

# Initialize empty lists to store the preprocessed data and labels
data = []
labels = []

# Iterate over each species
for species in species_names:
    # Set the path to the species folder
    species_folder = os.path.join(dataset_path, species)

    # Iterate over each audio file in the species folder
    for file_name in os.listdir(species_folder):
        # Read the audio file
        file_path = os.path.join(species_folder, file_name)
        audio_data, _ = librosa.load(file_path, sr=desired_sample_rate, duration=desired_audio_length)

        # Augment the audio data
        augmented_data = []
        for _ in range(num_augmented_samples):
            # Apply time stretching and pitch shifting
            time_stretched = librosa.effects.time_stretch(audio_data, rate=np.random.uniform(0.8, 1.2))
            pitch_shifted = librosa.effects.pitch_shift(time_stretched, sr=desired_sample_rate, n_steps=np.random.randint(-3, 3))

            # Pad or truncate the augmented audio to the desired length
            desired_samples = int(desired_sample_rate * desired_audio_length)
            if len(pitch_shifted) < desired_samples:
                pitch_shifted = np.pad(pitch_shifted, (0, desired_samples - len(pitch_shifted)), 'constant')
            elif len(pitch_shifted) > desired_samples:
                pitch_shifted = pitch_shifted[:desired_samples]

            augmented_data.append(pitch_shifted)

        # Append the preprocessed data and label to the lists
        data.extend(augmented_data)
        labels.extend([species] * len(augmented_data))

# Convert the data and labels to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Encode the labels as integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)

# Normalize the audio data to values between 0 and 1
train_data = train_data / np.max(np.abs(train_data))
test_data = test_data / np.max(np.abs(test_data))

# Convert labels to categorical format
num_classes = len(species_names)
train_labels = to_categorical(train_labels, num_classes=num_classes)
test_labels = to_categorical(test_labels, num_classes=num_classes)


In [116]:
model = Sequential()
model.add(Dense(256, input_shape=(desired_sample_rate * desired_audio_length,), activation='relu'))
model.add(BatchNormalization())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
model.fit(train_data, train_labels, epochs=15, batch_size=32, validation_data=(test_data, test_labels))


In [94]:
predictions = model.predict(test_data)
predicted_labels = np.argmax(predictions, axis=1)
actual_labels = np.argmax(test_labels, axis=1)
predicted_labels = label_encoder.inverse_transform(predicted_labels)
actual_labels = label_encoder.inverse_transform(actual_labels)
for i in range(len(test_data)):
    print("Predicted: {}, Actual: {}".format(predicted_labels[i], actual_labels[i]))
correct_predictions = np.sum(predicted_labels == actual_labels)
total_predictions = len(predicted_labels)
accuracy = correct_predictions / total_predictions
print("Accuracy: {:.2%}".format(accuracy))


Predicted: septendecula, Actual: septendecula
Predicted: cassini, Actual: cassini
Predicted: septendecula, Actual: septendecula
Predicted: cassini, Actual: cassini
Predicted: cassini, Actual: cassini
Predicted: septendecula, Actual: septendecim
Predicted: septendecula, Actual: cassini
Predicted: septendecula, Actual: cassini
Predicted: septendecula, Actual: cassini
Predicted: septendecula, Actual: septendecula
Predicted: septendecula, Actual: septendecula
Predicted: septendecula, Actual: septendecula
Predicted: cassini, Actual: septendecim
Predicted: septendecula, Actual: septendecula
Predicted: septendecula, Actual: septendecula
Predicted: cassini, Actual: septendecula
Predicted: cassini, Actual: septendecim
Predicted: septendecula, Actual: septendecula
Accuracy: 61.11%
