# Convert all csv files to wav files and add their corresponding labels as csv files in output directory

In [7]:
import os
import pandas as pd
import numpy as np
import csv
from scipy.io.wavfile import write

def normalize_and_convert_to_wav(input_directory, output_directory, sample_rate=2000):
    """
    Normalizes CSV data and converts it to WAV files, while saving labels to separate CSV files.

    Args:
        input_directory (str): Path to the directory containing the input CSV files.
        output_directory (str): Path to the directory where the output WAV and label CSV files will be saved.
        sample_rate (int, optional): The sample rate for the WAV files. Default is 2000 Hz.

    Returns:
        None
    """
    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # Get a list of all CSV files in the input directory
    csv_files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

    # Normalize and convert to WAV for each file individually
    for csv_file in csv_files:
        data = pd.read_csv(os.path.join(input_directory, csv_file))
        first_column = data.iloc[:, 0]

        # Normalize the data based on the file's own maximum value
        normalized_data = np.int16((first_column / first_column.max()) * 32767)

        # Write the values to an audio file with the specified sample rate
        output_file = os.path.join(output_directory, os.path.splitext(csv_file)[0] + '.wav')
        write(output_file, sample_rate, normalized_data)

        # Save the labels to a new CSV file
        label_output_file = os.path.join(output_directory, os.path.splitext(csv_file)[0] + '_labels.csv')
        labels = data.drop(data.columns[0], axis=1)
        labels.to_csv(label_output_file, index=False)

    print("Audio files and labels have been successfully created and stored in the ./Orig_WAV-Label_Files directory.")

input_dir ="./Orig_CSV_Files"
output_dir = "./Orig_WAV-Label_Files"
normalize_and_convert_to_wav(input_dir, output_dir)

Audio files and labels have been successfully created and stored in the 'FixingAudioSpectrograms' directory.


In [9]:
# Verify directory contents
import os
import glob


wav_path = "./Orig_WAV-Label_Files"
csv_path = "./Orig_CSV_Files"

wav_files = glob.glob(os.path.join(wav_path, '*.wav'))
print("WAV Files:", wav_files)

csv_files = glob.glob(os.path.join(csv_path, '*.csv'))
print("CSV Files:", csv_files)


WAV Files: ['./Orig_WAV-Label_Files/07_swallow_banana_N2.wav', './Orig_WAV-Label_Files/23_swallow_water_N2.wav', './Orig_WAV-Label_Files/06_swallow_water.wav', './Orig_WAV-Label_Files/20_swallow_dry.wav', './Orig_WAV-Label_Files/23_swallow_banana.wav', './Orig_WAV-Label_Files/04_swallow_dry.wav', './Orig_WAV-Label_Files/03_swallow_water.wav', './Orig_WAV-Label_Files/19_swallow_banana_N2.wav', './Orig_WAV-Label_Files/13_swallow_banana.wav', './Orig_WAV-Label_Files/14_swallow_water_N2.wav', './Orig_WAV-Label_Files/03_swallow_banana.wav', './Orig_WAV-Label_Files/12_swallow_dry.wav', './Orig_WAV-Label_Files/15_swallow_dry.wav', './Orig_WAV-Label_Files/07_swallow_water.wav', './Orig_WAV-Label_Files/05_swallow_banana_N2.wav', './Orig_WAV-Label_Files/10_swallow_banana.wav', './Orig_WAV-Label_Files/02_swallow_water.wav', './Orig_WAV-Label_Files/20_swallow_banana.wav', './Orig_WAV-Label_Files/03_swallow_dry.wav', './Orig_WAV-Label_Files/27_swallow_dry.wav', './Orig_WAV-Label_Files/04_swallow_wa

In [20]:
import os
import pandas as pd
import torch
import librosa
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt


def create_vmap(spectrogram):
    """
    Creates the vmap (velocity map) of a given spectrogram. The vmap represents the temporal derivative
    of the spectrogram, providing information about the changes in frequency content over time.

    Args:
        spectrogram (numpy.ndarray): Input spectrogram with shape (T, f), where T is the number of time frames
                                     and f is the number of frequency bins.

    Returns:
        numpy.ndarray: The vmap of the spectrogram with the same shape as the input spectrogram.
    """
    T, f = spectrogram.shape

    vmap = np.zeros_like(spectrogram)

    for t in range(T):
        if t == 0:
            vmap[t, :] = spectrogram[t + 1, :] - spectrogram[t, :]
        elif t == T - 1:
            vmap[t, :] = spectrogram[t, :] - spectrogram[t - 1, :]
        else:
            vmap[t, :] = (spectrogram[t + 1, :] - spectrogram[t - 1, :]) / 2

    return vmap

def create_amap(vmap):
    """
    Creates the amap (acceleration map) of a given vmap. The amap represents the second temporal derivative
    of the spectrogram, providing information about the acceleration of changes in frequency content over time.

    Args:
        vmap (numpy.ndarray): Input vmap with shape (T, f), where T is the number of time frames
                              and f is the number of frequency bins.

    Returns:
        numpy.ndarray: The amap of the vmap with the same shape as the input vmap.
    """
    T, f = vmap.shape

    amap = np.zeros_like(vmap)

    for t in range(T):
        if t == 0:
            amap[t, :] = vmap[t + 1, :] - vmap[t, :]
        elif t == T - 1:
            amap[t, :] = vmap[t, :] - vmap[t - 1, :]
        else:
            amap[t, :] = (vmap[t + 1, :] - vmap[t - 1, :]) / 2

    return amap


def process_audio_file(audio_file_path, csv_label_path, output_path, segment_length=2, step_size_in_sec=0.1, label_threshold=0.5):
    """
    Processes an audio file by creating segments of mel spectrograms, velocity maps (vmap), and acceleration maps (amap),
    and saves them as tensors along with their labels.

    Args:
        audio_file_path (str): Path to the input audio file.
        csv_label_path (str): Path to the CSV file containing labels.
        output_path (str): Path to the output directory where the tensors will be saved.
        segment_length (int, optional): Length of each segment in seconds. Defaults to 2.
        step_size_in_sec (float, optional): Step size in seconds for creating segments. Defaults to 0.1.
        label_threshold (float, optional): Threshold for determining the most common label based on the percentage
                                           of labels in the segment. Defaults to 0.5.
    """
    # Load the audio file
    y, sr = librosa.load(audio_file_path, sr=None)
    
    # Append threshold and step size to output directory
    output_path = f"{output_path}_thresh_{label_threshold}_step_{step_size_in_sec}"
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    # Compute the mel spectrogram with the specified settings
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=31,
                                       fmin=1, fmax=1000, window='hann',
                                       n_mels=128, power=2.0, center=False)

    # Convert to decibels
    S_dB = librosa.power_to_db(S, ref=np.max)

    # Load the CSV labels
    labels = pd.read_csv(csv_label_path).iloc[:, 0].values

    # Calculate segment and step sizes in frames
    num_segments = int(len(y)/(step_size_in_sec*sr)-(sr*2)/(step_size_in_sec*sr)) #calculate the number of segments in terms of spectrogram frames
    step_size_spectrograms = S_dB.shape[1] // num_segments # compute the step size in terms of spectrogram size

    # Initialize counters
    num_zero_labels = 0
    num_one_labels = 0
    start_frame_spectrograms = 0
    start_frame_labels = 0

    # Loop through the audio file and create segments
    for i in range(num_segments):
        # Calculate end frame of segment
        end_frame_spectrograms = start_frame_spectrograms + 128
        end_frame_labels = min(int(len(labels)-1), int(end_frame_spectrograms * (len(labels) / len(S_dB[0]))))
    
        segment = S_dB[:, start_frame_spectrograms:end_frame_spectrograms]

        segment_labels = labels[int(start_frame_labels):int(end_frame_labels)]

    
        # Ensure the segment has the correct size and skip if not
        if segment.shape[1] != 128 or len(segment_labels) == 0:
            start_frame_spectrograms += step_size_spectrograms
            start_frame_labels = int(start_frame_spectrograms * (len(labels) / len(S_dB[0])))
            continue
    
        # Create vmap and amap
        vmap = create_vmap(segment)
        amap = create_amap(vmap)
    
        # Determine the label based on the majority rule
        if np.sum(segment_labels == 1) / len(segment_labels) > label_threshold:
            most_common_label = 1
        else:
            most_common_label = 0
    
        # Update counters
        if most_common_label == 0:
            num_zero_labels += 1
        elif most_common_label == 1:
            num_one_labels += 1
    
        # Combine the spectrogram, vmap, and amap into a 3x128x128 tensor
        combined_tensor = torch.tensor(np.stack([segment, vmap, amap], axis=0), dtype=torch.float32)
    
        # Store corresponding label in the label tensor
        label_tensor = torch.tensor(most_common_label, dtype=torch.int64)
    
        # Create segment name
        segment_name = f"{os.path.splitext(os.path.basename(audio_file_path))[0]}_segment_{i:04d}"
    
        # Save Spectrogram Tensor
        combined_tensor_path = os.path.join(output_path, f"{segment_name}_combined.pt")
        torch.save(combined_tensor, combined_tensor_path)
    
        # Save Label Tensor
        label_tensor_path = os.path.join(output_path, f"{segment_name}_label.pt")
        torch.save(label_tensor, label_tensor_path)
    
        # Update start frames
        start_frame_spectrograms += step_size_spectrograms
        start_frame_labels = int(start_frame_spectrograms * (len(labels) / len(S_dB[0])))

    return num_segments, num_zero_labels, num_one_labels

# Example usage
input_path = './Orig_WAV-Label_Files'
output_path = "./FINISHED_PRODUCT"

# Iterate through all label and audio files in the directory
tot_segments = 0
tot_zeros = 0
tot_ones = 0
tot_files = 0
label_threshold = .5
step_size_in_sec = 0.1
for filename in os.listdir(input_path):
    if filename.endswith('.wav'):
        audio_file_path = os.path.join(input_path, filename)
        csv_label_path = os.path.join(input_path, filename.replace('.wav', '_labels.csv'))
        num_segments, num_zero_labels, num_one_labels = process_audio_file(audio_file_path, csv_label_path, 
                                        output_path,step_size_in_sec=step_size_in_sec, label_threshold=label_threshold)
        tot_segments += num_segments
        tot_zeros += num_zero_labels
        tot_ones += num_one_labels
        tot_files += 1
print("Total number of segments:", tot_segments)
print("Total number of zero labels:", tot_zeros)
print("Total number of one labels:", tot_ones)
print("Total number of files:", tot_files)


Total number of segments: 9503
Total number of zero labels: 7243
Total number of one labels: 1216
Total number of files: 110


In [28]:
# create tons of files
label_thresholds = [0.2, 0.4, 0.6, 0.65, 0.7]
step_sizes_in_sec = [0.05, 0.1, 0.2]

for label_threshold in label_thresholds:
    for step_size_in_sec in step_sizes_in_sec:
        print(f"Processing with label_threshold={label_threshold}, step_size_in_sec={step_size_in_sec}")
        tot_segments = 0
        tot_zeros = 0
        tot_ones = 0
        tot_files = 0
        
        for filename in os.listdir(input_path):
            if filename.endswith('.wav'):
                audio_file_path = os.path.join(input_path, filename)
                csv_label_path = os.path.join(input_path, filename.replace('.wav', '_labels.csv'))
                num_segments, num_zero_labels, num_one_labels = process_audio_file(
                    audio_file_path, csv_label_path, output_path,
                    step_size_in_sec=step_size_in_sec, label_threshold=label_threshold
                )
                tot_segments += num_segments
                tot_zeros += num_zero_labels
                tot_ones += num_one_labels
                tot_files += 1
        
        print(f"Total number of segments for label_threshold={label_threshold}, step_size_in_sec={step_size_in_sec}: {tot_segments}")
        print(f"Total number of zero labels: {tot_zeros}")
        print(f"Total number of one labels: {tot_ones}")
        print(f"Total number of files processed: {tot_files}")


Processing with label_threshold=0.2, step_size_in_sec=0.05
Total number of segments for label_threshold=0.2, step_size_in_sec=0.05: 19062
Total number of zero labels: 11600
Total number of one labels: 6266
Total number of files processed: 110
Processing with label_threshold=0.2, step_size_in_sec=0.1
Total number of segments for label_threshold=0.2, step_size_in_sec=0.1: 9503
Total number of zero labels: 5524
Total number of one labels: 2935
Total number of files processed: 110
Processing with label_threshold=0.2, step_size_in_sec=0.2
Total number of segments for label_threshold=0.2, step_size_in_sec=0.2: 4724
Total number of zero labels: 2630
Total number of one labels: 1396
Total number of files processed: 110
Processing with label_threshold=0.4, step_size_in_sec=0.05
Total number of segments for label_threshold=0.4, step_size_in_sec=0.05: 19062
Total number of zero labels: 14021
Total number of one labels: 3845
Total number of files processed: 110
Processing with label_threshold=0.4,

All tensor files correctly match their corresponding label files.


Sample tensor shape: torch.Size([3, 128, 128])
Sample label: 0


NameError: name 'CustomTensorDataset' is not defined