In [1]:
# import librosa
# import numpy as np
# import os
# import csv

# # Function to extract features from a wav file
# def extract_features(file_path):
#     try:
#         # Load the audio file
#         y, sr = librosa.load(file_path)
#     except Exception as e:
#         print(f"Error loading {file_path}: {e}")
#         return None

#     # Spectral Features
#     spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
#     spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
#     spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=y))
#     spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr))

#     # Temporal Features
#     rms_energy = np.mean(librosa.feature.rms(y=y))
#     zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))
#     temporal_variance = np.var(y)

#     # Chroma Features
#     chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))

#     # MFCCs (Mel-frequency Cepstral Coefficients)
#     mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)

#     # Amplitude Envelope (Mean of absolute signal)
#     amplitude = np.mean(np.abs(y))

#     # Frequency-related Features (using FFT)
#     stft = np.abs(librosa.stft(y))
#     frequency = np.mean(librosa.fft_frequencies(sr=sr))  # Mean frequency over time
#     dominant_freq = librosa.core.fft_frequencies(sr=sr)[np.argmax(np.mean(stft, axis=1))]

#     return {
#         "frequency": frequency,
#         "spectral_centroid": spectral_centroid,
#         "spectral_bandwidth": spectral_bandwidth,
#         "spectral_flatness": spectral_flatness,
#         "spectral_contrast": spectral_contrast,
#         "rms_energy": rms_energy,
#         "zero_crossing_rate": zero_crossing_rate,
#         "temporal_variance": temporal_variance,
#         "chroma": chroma,
#         "mfcc1": mfccs[0], "mfcc2": mfccs[1], "mfcc3": mfccs[2], "mfcc4": mfccs[3], "mfcc5": mfccs[4], "mfcc6": mfccs[5], "mfcc7": mfccs[6], "mfcc8": mfccs[7], "mfcc9": mfccs[8], "mfcc10": mfccs[9], "mfcc11": mfccs[10], "mfcc12": mfccs[11],
#         "amplitude": amplitude,
#         "dominant_freq": dominant_freq
#     }

# # Directory where your wav files are stored
# data_directory = '/mnt/external_disk/InsectSound1000'

# # List of insect names (provide this list)
# insect_names = [
#     'Bombus_terrestris',
#     'Episyrphus_balteatus',
#     'Aphidoletes_aphidimyza',
#     'Bradysia_difformis',
#     'Rhaphigaster_nebulos',
#     'Palomena_prasina',
#     'Halyomorpha_halys',
#     'Nezara_viridula',
#     'Tuta_absoluta',
#     'Coccinella_septempunctata',
#     'Trialeurodes_vaporariorum',
#     'Myzus_persicae'
# ]

# features_by_insect = {insect: [] for insect in insect_names}

# if not os.path.isdir(data_directory):
#     print(f"Directory {data_directory} not found.")
# else:
#     for file_name in os.listdir(data_directory):
#         if file_name.endswith('.wav'):
#             # Match the insect name in the filename (assuming the insect name is in the format ...)
#             insect_name = None
#             for insect in insect_names:
#                 if insect in file_name:
#                     insect_name = insect
#                     break
            
#             if insect_name:
#                 file_path = os.path.join(data_directory, file_name)
#                 file_features = extract_features(file_path)
                
#                 if file_features:
#                     # Append the features to the corresponding insect list
#                     features_by_insect[insect_name].append(file_features)
#                 else:
#                     print(f"Error processing {file_name}")
#             else:
#                 print(f"Insect name not found in {file_name}")

#     # Create CSV files for each insect
#     for insect_name, features in features_by_insect.items():
#         if features:
#             csv_filename = f"{insect_name}_features.csv"
#             try:
#                 with open(csv_filename, mode='w', newline='') as file:
#                     fieldnames = ["frequency", "spectral_centroid", "spectral_bandwidth", "spectral_flatness", "spectral_contrast",
#                                   "rms_energy", "zero_crossing_rate", "temporal_variance", "chroma",
#                                   "mfcc1", "mfcc2", "mfcc3", "mfcc4", "mfcc5", "mfcc6", "mfcc7", "mfcc8", "mfcc9", "mfcc10", "mfcc11", "mfcc12", 
#                                   "amplitude", "dominant_freq"]
#                     writer = csv.DictWriter(file, fieldnames=fieldnames)
#                     writer.writeheader()
#                     writer.writerows(features)
#                 print(f"CSV file for {insect_name} created: {csv_filename}")
#             except Exception as e:
#                 print(f"Error writing {csv_filename}: {e}")
#         else:
#             print(f"No wav files found for {insect_name}.")


In [1]:
import librosa
import numpy as np
import os
import csv
import logging

# Set up logging
logging.basicConfig(filename="process_log.txt", level=logging.INFO, format="%(asctime)s - %(message)s")

# Directory containing the audio files
data_directory = "/mnt/external_disk/InsectSound1000"

# List of insect names
insect_names = [
    # "Bombus_terrestris",
    # "Episyrphus_balteatus",
    # "Aphidoletes_aphidimyza",
    # "Bradysia_difformis",
    # "Rhaphigaster_nebulos",
    # "Palomena_prasina",
    # "Halyomorpha_halys",
    # "Nezara_viridula",
    # "Tuta_absoluta",
    # "Coccinella_septempunctata",
    "Trialeurodes_vaporariorum",
    "Myzus_persicae",
]

# Function to extract features from a wav file
def extract_features(file_path, insect_name):
    try:
        # Load the audio file
        y, sr = librosa.load(file_path)
    except Exception as e:
        logging.error(f"Error loading {file_path}: {e}")
        return None

    try:
        # Extract features
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
        spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
        spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=y))
        spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr))
        rms_energy = np.mean(librosa.feature.rms(y=y))
        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))
        temporal_variance = np.var(y)
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)
        amplitude = np.mean(np.abs(y))
        stft = np.abs(librosa.stft(y))
        frequency = np.mean(librosa.fft_frequencies(sr=sr))
        dominant_freq = librosa.core.fft_frequencies(sr=sr)[np.argmax(np.mean(stft, axis=1))]
    except Exception as e:
        logging.error(f"Error extracting features for {file_path}: {e}")
        return None

    return {
        "file_path": file_path,
        "insect_name": insect_name,
        # "frequency": frequency,
        "spectral_centroid": spectral_centroid,
        "spectral_bandwidth": spectral_bandwidth,
        "spectral_flatness": spectral_flatness,
        "spectral_contrast": spectral_contrast,
        "rms_energy": rms_energy,
        "zero_crossing_rate": zero_crossing_rate,
        "temporal_variance": temporal_variance,
        "chroma": chroma,
        **{f"mfcc{i+1}": mfcc for i, mfcc in enumerate(mfccs)},
        "amplitude": amplitude,
        "dominant_freq": dominant_freq,
    }

# Save extracted features to a CSV file
def save_to_csv(features, csv_filename):
    fieldnames = features[0].keys()
    with open(csv_filename, mode="a", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        if os.stat(csv_filename).st_size == 0:
            writer.writeheader()
        writer.writerows(features)

# Main processing function for a single insect
def process_insect(insect_name):
    logging.info(f"Starting processing for {insect_name}")
    csv_filename = f"{insect_name}_features.csv"

    # Skip if already processed
    if os.path.exists(csv_filename):
        logging.info(f"Skipping {insect_name}, CSV file already exists.")
        return

    # Gather all files for the insect
    features = []
    for file_name in os.listdir(data_directory):
        if insect_name in file_name and file_name.endswith(".wav"):
            file_path = os.path.join(data_directory, file_name)
            feature = extract_features(file_path, insect_name)
            if feature:
                features.append(feature)

    if features:
        save_to_csv(features, csv_filename)
        logging.info(f"Completed processing for {insect_name}. Data saved to {csv_filename}")
    else:
        logging.warning(f"No files processed for {insect_name}")




In [2]:
if __name__ == "__main__":
    # Process each insect individually
    for insect in insect_names:
        process_insect(insect)

  return pitch_tuning(
