In [None]:
# Downloading LibriSpeech Dataset
!wget https://www.openslr.org/resources/12/train-clean-100.tar.gz
!wget https://www.openslr.org/resources/12/train-other-500.tar.gz

In [None]:
# Download LibriSpeech dev dataset
!wget https://www.openslr.org/resources/12/dev-clean.tar.gz
#!wget https://www.openslr.org/resources/12/dev-other.tar.gz

In [None]:
import tarfile

# Extract 'train-clean-100.tar.gz'
with tarfile.open('dev-clean.tar.gz', 'r:gz') as tar:
    tar.extractall('LibriSpeech/dev-clean')

"""
# Extract 'train-other-500.tar.gz'
with tarfile.open('train-other-500.tar.gz', 'r:gz') as tar:
    tar.extractall('LibriSpeech/dev-other')
"""

In [None]:
import os

# Gather audio file paths and transcriptions
dataset = []

for root, dirs, files in os.walk("LibriSpeech"):
    for file in files:
        if file.endswith(".flac"):
            audio_path = os.path.join(root, file)
            transcript_path = os.path.join(root, file.replace(".flac", ".txt"))
            dataset.append((audio_path, transcript_path))
print(dataset[0][0])

LibriSpeech/dev-clean/LibriSpeech/dev-clean/652/130726/652-130726-0024.flac


In [4]:
from pydub import AudioSegment
import os
import math

In [53]:
from pydub import AudioSegment
import numpy as np
import os

def add_noise_to_audio(audio, noise_level=0.02):

    # Convert audio to numpy array (pydub uses raw audio)
    samples = np.array(audio.get_array_of_samples())
    
    # Generate random noise
    noise = np.random.normal(0, noise_level * np.max(samples), samples.shape).astype(samples.dtype)
    
    # Add noise to the audio signal
    noisy_samples = samples + noise
    noisy_samples = np.clip(noisy_samples, -32768, 32767)  # Ensure values are within int16 range
    
    # Convert numpy array back to AudioSegment
    noisy_audio = AudioSegment(
        noisy_samples.tobytes(),
        frame_rate=audio.frame_rate,
        sample_width=audio.sample_width,
        channels=audio.channels
    )
    
    return noisy_audio

def split_or_pad_audio(file_path, output_dir, segment_duration=5000):
    # Load the audio file
    audio = AudioSegment.from_file(file_path, format="flac")
    audio_length = len(audio)
    
    # Calculate the number of 5-second segments
    num_segments = math.ceil(audio_length / segment_duration)
    
    segments = []
    for i in range(num_segments):
        start = i * segment_duration
        end = start + segment_duration
        segment = audio[start:end]
        
        # If segment is less than 5 seconds, pad with silence
        if len(segment) < segment_duration:
            segment = segment + AudioSegment.silent(duration=(segment_duration - len(segment)))
        
        # Define the output file path
        segment_filename = f"{os.path.splitext(os.path.basename(file_path))[0]}_seg_{i}.flac"
        segment_path = os.path.join(output_dir, segment_filename)
    

        noisy_seg = add_noise_to_audio(segment)
        noisy_path = segment_path.replace("clean","noisy")
        noisy_dir = os.path.dirname(noisy_path)
        os.makedirs(noisy_dir, exist_ok=True)
        noisy_seg.export(noisy_path, "flac")
        
        # Export the segment
        segment.export(segment_path, format="flac")
        segments.append(segment_path)

    
    return segments

In [None]:
nossy_aud = add_noise_to_audio(dataset[0][0], 0.05)
audio = AudioSegment.from_file(dataset[0][0], format="flac")
print(len(audio))

# Define the output file path
output_dir = "LibriSpeech_test"  
os.makedirs(output_dir, exist_ok=True)
filename= f"{os.path.basename(dataset[0][0])}".replace(".flac",".wav")
print(filename)
path = os.path.join(output_dir, filename)

filename_noisy = f"{os.path.splitext(os.path.basename(dataset[0][0]))[0]}_noisy.wav"
path_noisy = os.path.join(output_dir, filename_noisy)
print(path_noisy)

# Export the segment
audio.export(path, format="wav")
nossy_aud.export(path_noisy, format="wav")

In [14]:
output_dir = "LibriSpeech_test_chop"
os.makedirs(output_dir, exist_ok=True)
print(dataset[0][0])
segment_list = split_or_pad_audio(dataset[0][0],output_dir,5000)
print(segment_list[0])

LibriSpeech/dev-clean/LibriSpeech/dev-clean/652/130726/652-130726-0024.flac
LibriSpeech_test_chop/652-130726-0024_seg_0.wav


In [96]:
# Spliting Audios 5 seconds


import os

# Gather audio file paths and transcriptions
dataset = []
count = 0
max_files = 10

for root, dirs1, files in os.walk("LibriSpeech/dev-clean"):
    for file in files:
        if file.endswith(".flac"):
            audio_path = os.path.join(root, file)
            output_dir = os.path.dirname(audio_path.replace("dev-clean", "segments/clean"))
            os.makedirs(output_dir, exist_ok=True)
            segment_list = split_or_pad_audio(audio_path,output_dir,5000)
            for segment in segment_list:
                clean_path = segment
                noisy_path = clean_path.replace("clean", "noisy")
                print(clean_path)
                print(noisy_path)
                dataset.append((noisy_path, segment))
            count += 1
            if count >= max_files:
                break
    if count >= max_files:
        break



LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0024_seg_0.flac
LibriSpeech/segments/noisy/LibriSpeech/segments/noisy/652/130726/652-130726-0024_seg_0.flac
LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0024_seg_1.flac
LibriSpeech/segments/noisy/LibriSpeech/segments/noisy/652/130726/652-130726-0024_seg_1.flac
LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0024_seg_2.flac
LibriSpeech/segments/noisy/LibriSpeech/segments/noisy/652/130726/652-130726-0024_seg_2.flac
LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0028_seg_0.flac
LibriSpeech/segments/noisy/LibriSpeech/segments/noisy/652/130726/652-130726-0028_seg_0.flac
LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0026_seg_0.flac
LibriSpeech/segments/noisy/LibriSpeech/segments/noisy/652/130726/652-130726-0026_seg_0.flac
LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0026

In [92]:
print(segment_list)

['LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0028_seg_0.flac']


In [97]:
print(dataset[0][0])
print(dataset[0][1])
naudio = AudioSegment.from_file(dataset[0][0],"flac")
caudio = AudioSegment.from_file(dataset[0][1],"flac")

naudio.export("naudio.wav","wav")
caudio.export("caudio.wav","wav")


LibriSpeech/segments/noisy/LibriSpeech/segments/noisy/652/130726/652-130726-0024_seg_0.flac
LibriSpeech/segments/clean/LibriSpeech/segments/clean/652/130726/652-130726-0024_seg_0.flac


<_io.BufferedRandom name='caudio.wav'>