In [None]:
!pip install librosa soundfile webrtcvad pydub

In [None]:
import os
import numpy as np
import librosa
import soundfile as sf
from pydub import AudioSegment
from google.colab import drive
import webrtcvad
import wave
import contextlib
from datetime import datetime

In [None]:
class AudioProcessor:
    def __init__(self, target_sr=16000, min_segment_length=20, max_segment_length=30):
        """
        Initialize the audio processor.
        
        Args:
            target_sr (int): Target sample rate in Hz
            min_segment_length (int): Minimum segment length in seconds
            max_segment_length (int): Maximum segment length in seconds
        """
        self.target_sr = target_sr
        self.min_segment_length = min_segment_length
        self.max_segment_length = max_segment_length
        self.vad = webrtcvad.Vad(3)  # Aggressiveness mode 3 (highest)
        
    def mount_drive(self):
        """Mount Google Drive."""
        drive.mount('/content/drive')
        
    def load_and_resample(self, file_path):
        """
        Load audio file and resample if necessary.
        
        Args:
            file_path (str): Path to the audio file
            
        Returns:
            tuple: Audio data and sample rate
        """
        # Load audio file
        audio, sr = librosa.load(file_path, sr=None)
        
        # Resample if sample rate is higher than target
        if sr > self.target_sr:
            audio = librosa.resample(audio, orig_sr=sr, target_sr=self.target_sr)
            sr = self.target_sr
            
        return audio, sr
    
    def detect_voice_activity(self, audio, sr):
        """
        Detect segments with voice activity.
        
        Args:
            audio (numpy.ndarray): Audio data
            sr (int): Sample rate
            
        Returns:
            list: List of (start, end) tuples in seconds
        """
        # Convert to 16-bit PCM
        audio_pcm = (audio * 32768).astype(np.int16)
        
        # Parameters for VAD
        frame_duration = 30  # ms
        frames_per_window = sr * frame_duration // 1000
        
        # Split audio into frames
        frames = []
        for i in range(0, len(audio_pcm), frames_per_window):
            frame = audio_pcm[i:i + frames_per_window]
            if len(frame) == frames_per_window:
                frames.append(frame.tobytes())
        
        # Detect speech in frames
        is_speech = []
        for frame in frames:
            try:
                is_speech.append(self.vad.is_speech(frame, sr))
            except:
                is_speech.append(False)
        
        # Find continuous speech segments
        segments = []
        start = None
        for i, speech in enumerate(is_speech):
            if speech and start is None:
                start = i
            elif not speech and start is not None:
                end = i
                duration = (end - start) * frame_duration / 1000
                if duration >= self.min_segment_length:
                    segments.append((
                        start * frame_duration / 1000,
                        min(end * frame_duration / 1000,
                            start * frame_duration / 1000 + self.max_segment_length)
                    ))
                start = None
                
        return segments
    
    def get_next_file_number(self, output_dir, filename_prefix):
        """
        Find the next available file number in the sequence.
        
        Args:
            output_dir (str): Output directory
            filename_prefix (str): Prefix for output filenames
            
        Returns:
            int: Next available file number
        """
        existing_files = os.listdir(output_dir)
        existing_numbers = []
        
        # Extract existing numbers from filenames
        for filename in existing_files:
            if filename.startswith(filename_prefix) and filename.endswith('.wav'):
                try:
                    # Extract the number part from filename (e.g., "training-001" -> 1)
                    num_str = filename.replace(filename_prefix + '-', '').replace('.wav', '')
                    num = int(num_str)
                    existing_numbers.append(num)
                except ValueError:
                    continue
        
        # If no existing files found, start from 0
        if not existing_numbers:
            return 0
            
        # Return the next number in sequence
        return max(existing_numbers) + 1
    
    def save_segments(self, audio, sr, segments, output_dir, filename_prefix):
        """
        Save audio segments to files with sequential naming.
        
        Args:
            audio (numpy.ndarray): Audio data
            sr (int): Sample rate
            segments (list): List of (start, end) tuples
            output_dir (str): Output directory
            filename_prefix (str): Prefix for output filenames (e.g., "training")
        """
        os.makedirs(output_dir, exist_ok=True)
        
        # Get the starting file number
        current_number = self.get_next_file_number(output_dir, filename_prefix)
        
        saved_files = []
        for start, end in segments:
            # Convert time to samples
            start_sample = int(start * sr)
            end_sample = int(end * sr)
            
            # Extract segment
            segment = audio[start_sample:end_sample]
            
            # Generate filename with sequential numbering
            filename = f"{filename_prefix}-{current_number:03d}.wav"
            filepath = os.path.join(output_dir, filename)
            
            # Save segment
            sf.write(filepath, segment, sr)
            saved_files.append(filename)
            
            # Increment counter
            current_number += 1
            
        return saved_files
            
    def process_audio_file(self, input_file, output_dir, filename_prefix):
        """
        Process a single audio file.
        
        Args:
            input_file (str): Path to input audio file
            output_dir (str): Output directory
            filename_prefix (str): Prefix for output filenames (e.g., "training")
            
        Returns:
            tuple: Number of segments created and list of saved filenames
        """
        # Load and resample audio
        audio, sr = self.load_and_resample(input_file)
        
        # Detect voice activity segments
        segments = self.detect_voice_activity(audio, sr)
        
        # Save segments and get list of saved files
        saved_files = self.save_segments(audio, sr, segments, output_dir, filename_prefix)
        
        return len(segments), saved_files

# Example usage
def main():
    # Initialize processor
    processor = AudioProcessor(
        target_sr=16000,
        min_segment_length=20,
        max_segment_length=30
    )
    
    # Mount Google Drive
    processor.mount_drive()
    
    # Configure paths
    input_file = '/content/drive/MyDrive/path/to/your/audio.mp3'  # Update this
    output_dir = '/content/drive/MyDrive/path/to/output'  # Update this
    filename_prefix = 'training'  # This will create files like training-000.wav, training-001.wav, etc.
    
    # Process audio file
    num_segments, saved_files = processor.process_audio_file(input_file, output_dir, filename_prefix)
    
    print(f"Processing complete. Created {num_segments} segments.")
    print("\nSaved files:")
    for filename in saved_files:
        print(f"- {filename}")

if __name__ == "__main__":
    main()

