In [1]:
# Finally !
# !pip install pandas

In [2]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display

In [3]:
folder_path = r'/mnt/external_disk/InsectSound1000'
insect_name = 'Aphidoletes_aphidimyza'

In [4]:
audio_data_dict = {}

# List and filter files based on the insect name
wav_files = [f for f in os.listdir(folder_path) if f.endswith('.wav') and insect_name in f]

# Process files and load audio data
for wav_file in wav_files:
    file_path = os.path.join(folder_path, wav_file)
    try:
        audio_data, sample_rate = librosa.load(file_path, sr=None)
        audio_data_dict[wav_file] = {'audio': audio_data, 'sample_rate': sample_rate}
    except Exception as e:
        print(f"Error loading {wav_file}: {e}")



In [5]:
def analyze_fft(audio_data, sample_rate, threshold_ratio=0.5):
    n = len(audio_data)
    fft_data = np.fft.fft(audio_data)
    freqs = np.fft.fftfreq(n, d=1/sample_rate)
    positive_freqs = freqs[:n // 2]
    magnitude = np.abs(fft_data[:n // 2])

    # Plot FFT
    # plt.figure(figsize=(10, 5))
    # plt.plot(positive_freqs, magnitude)
    # plt.title("FFT Analysis")
    # plt.xlabel("Frequency (Hz)")
    # plt.ylabel("Amplitude")
    # plt.grid()
    # plt.show()

    dominant_frequency = positive_freqs[np.argmax(magnitude)]
    peak_magnitude = np.max(magnitude)
    threshold = threshold_ratio * peak_magnitude
    dominant_freqs = positive_freqs[magnitude >= threshold]

    print(f"Dominant frequency: {dominant_frequency:.2f} Hz")
    return dominant_frequency, dominant_freqs

In [6]:
def plot_spectrogram(audio_data, sample_rate):
    plt.figure(figsize=(10, 5))
    spectrogram = librosa.stft(audio_data)
    spectrogram_db = librosa.amplitude_to_db(np.abs(spectrogram), ref=np.max)
    librosa.display.specshow(spectrogram_db, sr=sample_rate, x_axis='time', y_axis='linear', cmap='viridis')
    plt.colorbar(format="%+5.0f dB")
    plt.title("Spectrogram")
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")
    plt.ylim(0, 200)
    plt.show()

In [None]:
dominant_freqs_all = []
zcr_all = []
mean_amplitude_all = []
max_amplitude_all = []
min_amplitude_all = []

for file_name, data in audio_data_dict.items():
    audio = data['audio']
    sample_rate = data['sample_rate']
    
    print(f"Analyzing file: {file_name}")
    
    # Analyze FFT and dominant frequencies
    dominant_frequency, dominant_freqs = analyze_fft(audio, sample_rate)
    dominant_freqs_all.append(dominant_frequency)
    
    # Calculate Zero-Crossing Rate (ZCR)
    zcr = librosa.zero_crossings(audio, pad=False)
    zcr_rate = sum(zcr) / len(audio)
    zcr_all.append(zcr_rate)
    
    # Basic amplitude stats
    mean_amplitude = np.mean(audio)
    max_amplitude = np.max(audio)
    min_amplitude = np.min(audio)
    mean_amplitude_all.append(mean_amplitude)
    max_amplitude_all.append(max_amplitude)
    min_amplitude_all.append(min_amplitude)

    print("-" * 50)  # Separator between file outputs

# Calculate min/max ranges for each feature
dominant_freq_min = np.min(dominant_freqs_all)
dominant_freq_max = np.max(dominant_freqs_all)

zcr_min = np.min(zcr_all)
zcr_max = np.max(zcr_all)

mean_amplitude_min = np.min(mean_amplitude_all)
mean_amplitude_max = np.max(mean_amplitude_all)

max_amplitude_min = np.min(max_amplitude_all)
max_amplitude_max = np.max(max_amplitude_all)

min_amplitude_min = np.min(min_amplitude_all)
min_amplitude_max = np.max(min_amplitude_all)

# Print ranges
print(f"Dominant Frequency Range: {dominant_freq_min:.2f} Hz - {dominant_freq_max:.2f} Hz")
print(f"Zero-Crossing Rate Range: {zcr_min:.5f} - {zcr_max:.5f}")
print(f"Mean Amplitude Range: {mean_amplitude_min:.3f} - {mean_amplitude_max:.3f}")
print(f"Max Amplitude Range: {max_amplitude_min:.3f} - {max_amplitude_max:.3f}")
print(f"Min Amplitude Range: {min_amplitude_min:.3f} - {min_amplitude_max:.3f}")

Analyzing file: 202253-16-31_Aphidoletes_aphidimyza_000029_s3_ch2.wav
Dominant frequency: 3.60 Hz
--------------------------------------------------
Analyzing file: 2022324-13-48_Aphidoletes_aphidimyza_000071_s6_ch0.wav
Dominant frequency: 19.20 Hz
--------------------------------------------------
Analyzing file: 2022323-13-1_Aphidoletes_aphidimyza_000058_s16_ch0.wav
Dominant frequency: 16.40 Hz
--------------------------------------------------
Analyzing file: 2022323-13-1_Aphidoletes_aphidimyza_000076_s36_ch0.wav
Dominant frequency: 18.80 Hz
--------------------------------------------------
Analyzing file: 2022324-13-48_Aphidoletes_aphidimyza_000060_s39_ch0.wav
Dominant frequency: 28.80 Hz
--------------------------------------------------
Analyzing file: 202253-16-31_Aphidoletes_aphidimyza_000003_s4_ch0.wav
Dominant frequency: 28.80 Hz
--------------------------------------------------
Analyzing file: 202254-16-17_Aphidoletes_aphidimyza_000021_s41_ch0.wav
Dominant frequency: 3.60 

In [1]:
len(audio_data_dict)

NameError: name 'audio_data_dict' is not defined

In [2]:
from scipy import stats
dominant_freqs_array = np.array(dominant_freqs_all)

# Mean
mean_freq = np.mean(dominant_freqs_array)

# Median
median_freq = np.median(dominant_freqs_array)

# Mode
mode_freq = stats.mode(dominant_freqs_array)[0]  # Mode returns an array, we extract the value

# Print results
print(f"Mean Dominant Frequency: {mean_freq:.2f} Hz")
print(f"Median Dominant Frequency: {median_freq:.2f} Hz")
print(f"Mode Dominant Frequency: {mode_freq:.2f} Hz")


NameError: name 'np' is not defined

In [None]:
# White Noise Generation with Bandpass Filtering
def generate_filtered_white_noise(duration, sample_rate, low_freq, high_freq, order=3):
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
    white_noise = np.random.normal(0, 0.1, len(t))  # Generate white noise
    
    # Bandpass filter (using filtfilt)
    from scipy.signal import butter, filtfilt
    nyquist = 0.5 * sample_rate
    low = low_freq / nyquist
    high = high_freq / nyquist
    b, a = butter(order, [low, high], btype='band')
    filtered_noise = filtfilt(b, a, white_noise)  # Apply zero-phase filtering
    return filtered_noise, t


In [3]:
def generate_random_pulses(duration, sample_rate, low_freq, high_freq, pulse_duration=0.25):
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
    pulses = np.zeros_like(t)

    # Generate random onset times
    num_pulses = int(duration // pulse_duration)
    onset_times = np.sort(np.random.choice(t, num_pulses, replace=False))  # Random start times
    
    for onset in onset_times:
        pulse_freq = np.random.uniform(low_freq, high_freq)  # Random frequency for the pulse
        pulse_t = t[(t >= onset) & (t < onset + pulse_duration)]  # Time range for the pulse
        pulse_signal = np.sin(2 * np.pi * pulse_freq * (pulse_t - onset))  # Generate sine wave
        pulses[(t >= onset) & (t < onset + pulse_duration)] = pulse_signal
    
    return pulses


In [4]:
# Scale the combined signal
def scale_signal(signal, min_value, max_value):
    min_signal = np.min(signal)
    max_signal = np.max(signal)
    scale_factor = (max_value - min_value) / (max_signal - min_signal)
    scaled_signal = (signal - min_signal) * scale_factor + min_value
    return scaled_signal



In [5]:
duration = 2.5
sample_rate= 16000
low_freq = 0.4
high_freq = 50

filtered_noise,t = generate_filtered_white_noise(duration,sample_rate,low_freq,high_freq)
pulse_signal = generate_random_pulses(duration,sample_rate,low_freq,high_freq)
# Combine signals
combined_signal = filtered_noise + pulse_signal

# Scale combined signal to target range
scaled_combined_signal = scale_signal(combined_signal, -9.787, 10.209)

# Plot
plt.figure(figsize=(12, 4))
plt.plot(scaled_combined_signal)
plt.title("Final Disruptive Signal (White Noise + Pulses, Scaled)")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.show()


NameError: name 'generate_filtered_white_noise' is not defined

In [6]:
# Spectrogram for natural signal
plot_spectrogram(audio_data, sample_rate)

NameError: name 'plot_spectrogram' is not defined

In [7]:
plot_spectrogram(combined_signal, sample_rate)

NameError: name 'plot_spectrogram' is not defined

In [8]:
import numpy as np

def spectral_overlap(pest_signal, disrupt_signal, sample_rate, freq_range=(20, 100)):
    """
    Computes the spectral overlap between pest and disruptive signals.

    Args:
        pest_signal (array): Pest sound signal.
        disrupt_signal (array): Disruptive signal.
        sample_rate (int): Sample rate of the signals.
        freq_range (tuple): Target frequency range (min_freq, max_freq).

    Returns:
        overlap_ratio (float): The ratio of overlap between the pest and disruptive signals.
    """
    # Compute FFT of both signals
    pest_fft = np.fft.fft(pest_signal)
    disrupt_fft = np.fft.fft(disrupt_signal)
    freqs = np.fft.fftfreq(len(pest_signal), d=1/sample_rate)

    # Select frequencies within the target range
    freq_mask = (freqs >= freq_range[0]) & (freqs <= freq_range[1])
    pest_power = np.abs(pest_fft[freq_mask]) ** 2
    disrupt_power = np.abs(disrupt_fft[freq_mask]) ** 2

    # Compute overlap ratio
    overlap = np.sum(np.minimum(pest_power, disrupt_power)) / np.sum(pest_power)
    return overlap
