## This code takes in a path to a folder containing many .wav files, then concatonates/splits them up into 1-minute .wav files.

In [2]:
import os
import numpy as np
from scipy.io import wavfile
import json

def read_audio_files(folder_path):
    audio_data = []
    samplerates = []
    file_details = []  # List to store details for each file

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder_path, file_name)
            try:
                samplerate, data = wavfile.read(file_path)
                if data.ndim > 1:  # Convert to mono if stereo
                    data = data.mean(axis=1)
                audio_data.append(data)
                samplerates.append(samplerate)
                file_details.append({'file_name': file_name, 'samples': len(data)})
            except Exception as e:
                print(f"Error processing {file_name}: {e}")
                continue

    assert all(x == samplerates[0] for x in samplerates), "Sample rates differ among files"
    return audio_data, samplerates[0], file_details

def concatenate_to_minute_segments(audio_data, samplerate, file_details, target_duration_minutes=1):
    target_duration_samples = target_duration_minutes * 60 * samplerate
    concatenated_data = np.concatenate(audio_data)

    num_segments = concatenated_data.size // target_duration_samples
    total_samples_needed = num_segments * target_duration_samples
    concatenated_data = concatenated_data[:total_samples_needed]

    segments = []
    segment_file_lists = []
    current_sample_index = 0  # Track which file we are in
    cumulative_file_end = 0   # Track the end sample of current file globally

    for i in range(num_segments):
        start_index = i * target_duration_samples
        end_index = start_index + target_duration_samples
        segment = concatenated_data[start_index:end_index]
        segments.append(segment)

        files_in_segment = []
        seg_start = start_index
        seg_end = end_index
        seg_relative_offset = 0  # Track where we are in the segment

        file_start_sample = 0  # Absolute start sample of the file

        for j, file_info in enumerate(file_details):
            file_length = file_info['samples']
            file_end_sample = file_start_sample + file_length

            # Check for overlap with segment
            overlap_start = max(file_start_sample, seg_start)
            overlap_end = min(file_end_sample, seg_end)

            if overlap_end > overlap_start:
                rel_start = overlap_start - seg_start
                rel_end = overlap_end - seg_start
                files_in_segment.append({
                    'file_name': file_info['file_name'],
                    'start_sample': int(rel_start),
                    'end_sample': int(rel_end)
                })

            file_start_sample = file_end_sample
            if file_start_sample >= seg_end:
                break

        segment_file_lists.append(files_in_segment)

    return segments, segment_file_lists

# === Run the processing ===
folder_path = '/Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5483_sample_songs/216_sample'  # Replace with your folder path
audio_data, samplerate, file_details = read_audio_files(folder_path)

# Create the new folder for segmented files
output_folder = os.path.join(folder_path, 'one_minute_segments')
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Generate segments and detailed mappings
segments, segment_file_lists = concatenate_to_minute_segments(audio_data, samplerate, file_details)

# Save segmented .wav files and detailed file-to-segment mappings
json_data = {}
for i, (segment, file_list) in enumerate(zip(segments, segment_file_lists)):
    output_path = os.path.join(output_folder, f"one_minute_segment_{i+1}.wav")
    wavfile.write(output_path, samplerate, segment.astype(np.int16))
    print(f"Saved: {output_path}")
    json_data[f"one_minute_segment_{i+1}.wav"] = file_list

# Save file segment mapping to JSON
json_output_path = os.path.join(output_folder, 'file_lists.json')
with open(json_output_path, 'w') as f:
    json.dump(json_data, f, indent=4)
    print(f"File list with sample ranges saved to {json_output_path}")

Saved: /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5483_sample_songs/216_sample/one_minute_segments/one_minute_segment_1.wav
Saved: /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5483_sample_songs/216_sample/one_minute_segments/one_minute_segment_2.wav
Saved: /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5483_sample_songs/216_sample/one_minute_segments/one_minute_segment_3.wav
Saved: /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5483_sample_songs/216_sample/one_minute_segments/one_minute_segment_4.wav
Saved: /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5483_sample_songs/216_sample/one_minute_segments/one_minute_segment_5.wav
File list with sample ranges saved to /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5483_sample_songs/216_sample/one_minute_segments/file_lists.json
