# Audio Segmentation: 30s to 3s Clips

## Overview

In this notebook we will be dividing each audio from the original dataset into 10 `3s audios` then save them into a new folder

## Importing libraries

In [1]:
import librosa
import soundfile as sf
import os
from glob import glob

## Dividing the audios

In [None]:
input_directory = 'dataset/Audio_30s'
output_directory = 'dataset/Audio_3s'

#### in case the directory path you inserted doesn't exist the directory would be created 

In [11]:
def makedir(output_directory):
     if not os.path.exists(output_directory):
        os.makedirs(output_directory)

#### Here's an overview of the steps The audio files segmentation takes:
- ***File Retrieval***: It collects all .wav files present in the input directory and its subdirectories.
- ***Segmentation*** : For each audio file, it extracts segments of the defined duration and number from the audio file.
- ***Segment Writing*** : Each segment is saved as a separate .wav file in the specified output directory, labeled with the original file name and a segment index.


In [12]:
def audio_segmentation(input_directory,output_directory):

    audio_files = glob(os.path.join(input_directory, '**/*.wav'), recursive=True)

    segment_duration = 3
    num_segments = 10

    for file_path in sorted(audio_files):

        try:
            signal, sr = librosa.load(file_path, sr=None)
            duration = librosa.get_duration(y=signal, sr=sr)

            frame_indices = librosa.time_to_samples([segment_duration * i for i in range(num_segments + 1)], sr=sr)
            segments = [signal[frame_indices[i]:frame_indices[i + 1]] for i in range(num_segments)]

            file_name = os.path.basename(file_path)
            file_name_without_extension = os.path.splitext(file_name)[0]

            for i, segment in enumerate(segments):
                output_filename = os.path.join(output_directory, f"{file_name_without_extension}.{i}.wav")
                with sf.SoundFile(output_filename, 'w', samplerate=sr, channels=len(segment.shape)) as f:
                    f.write(segment)
        

        except (librosa.util.exceptions.ParameterError, FileNotFoundError) as e:
            print(f"Error processing {file_path}: {e}")

    print(f"All the files have been successfully segmented") 


In [15]:
makedir(output_directory)
audio_segmentation(input_directory,output_directory)

All the files have been successfully segmented
