In [37]:
import librosa
import numpy as np
from pydub.utils import mediainfo
import os

In [35]:

def load_and_trim_audio(file_path, target_sr=16000):
    try:
        # Load audio file
        audio, sr = librosa.load(file_path, sr=target_sr)
        # Trim leading and trailing silence
        trimmed_audio, _ = librosa.effects.trim(audio)
        return trimmed_audio, sr
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None, None

In [4]:
def normalize_audio(audio):
    return librosa.util.normalize(audio)


In [5]:
def segment_audio(audio, segment_length=3, sr=16000):
    segment_samples = segment_length * sr
    segments = [audio[i:i + segment_samples] for i in range(0, len(audio), segment_samples)]
    return segments


In [60]:
def extract_mel_spectrogram(audio, sr, n_mels=128, hop_length=256):
    # Compute mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels, hop_length=hop_length)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return mel_spectrogram_db

In [61]:
def pitch_shift(audio, sr, n_steps):
    return librosa.effects.pitch_shift(audio, sr, n_steps=n_steps)

def time_stretch(audio, rate):
    return librosa.effects.time_stretch(audio, rate)


In [62]:
def is_audio_file(file_path):
    audio_extensions = ['.wav', '.mp3', '.flac', '.ogg', '.aac', '.m4a']
    ext = os.path.splitext(file_path)[1].lower()
    return ext in audio_extensions

In [63]:
def preprocess_dataset(input_folder, output_folder, target_sr=16000):
    print(f"Input folder: {input_folder}")  # Debugging line
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for file_name in os.listdir(input_folder):
        print(file_name)
        file_path = os.path.join(input_folder, file_name)
        print(f"file path is :{file_path}")
    
        if is_audio_file(file_path):  # Ensure it's an audio file
            try:
                audio, sr = load_and_trim_audio(file_path, target_sr)
                if audio is None:
                    continue
                audio = normalize_audio(audio)
                segments = segment_audio(audio, segment_length=3, sr=sr)
                
                for i, segment in enumerate(segments):
                    mel_spectrogram = extract_mel_spectrogram(segment, sr)
                    output_file_path = os.path.join(output_folder, f"{os.path.splitext(file_name)[0]}_segment_{i}.npy")
                    np.save(output_file_path, mel_spectrogram)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")


In [64]:
os.listdir("D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\\voice\\9006")

['2902-9006-0000.flac',
 '2902-9006-0001.flac',
 '2902-9006-0002.flac',
 '2902-9006-0003.flac',
 '2902-9006-0004.flac',
 '2902-9006-0005.flac',
 '2902-9006-0006.flac',
 '2902-9006-0007.flac',
 '2902-9006-0008.flac',
 '2902-9006-0009.flac',
 '2902-9006-0010.flac',
 '2902-9006-0011.flac',
 '2902-9006-0012.flac',
 '2902-9006-0013.flac',
 '2902-9006-0014.flac',
 '2902-9006-0015.flac',
 '2902-9006-0016.flac',
 '2902-9006-0017.flac',
 '2902-9006-0018.flac',
 '2902-9006-0019.flac',
 '2902-9006-0020.flac']

In [65]:

input_folder = "D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\\voice\\9006"
output_folder = 'preprocessed_data'
preprocess_dataset(input_folder, output_folder)

Input folder: D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006
2902-9006-0000.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0000.flac
2902-9006-0001.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0001.flac
2902-9006-0002.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0002.flac
2902-9006-0003.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0003.flac
2902-9006-0004.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0004.flac
2902-9006-0005.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0005.flac
2902-9006-0006.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voic



2902-9006-0018.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0018.flac
2902-9006-0019.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0019.flac
2902-9006-0020.flac
file path is :D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006\2902-9006-0020.flac


In [49]:
os.path.isfile("D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006")

False

In [50]:
is_audio_file("D:\Data_analytics_new\Omdena\Autoshield\Deep-Fake-Voice-Generator\data\voice\9006")



False