## ITW Exploration - Normalization

In [None]:
import os
import subprocess
import re
import pandas as pd

from configs.config import ITW_DATASET_PATH, DATASET_PATH, ELEVEN_LABS_DATASET_PATH

SOX = "sox"
FFMPEG = "ffmpeg"

In [None]:
AUDIO_DIR = os.path.join(ITW_DATASET_PATH, "fake")
rows = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    lufs = float(lufs_re.search(ff_out).group(1))

    rows.append((f, sr, ch, lufs))

df = pd.DataFrame(rows, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")



DONE
          file  sample_rate  channels  lufs
0  file001.wav        16000         1 -35.9
1  file002.wav        16000         1 -35.1
2  file003.wav        16000         1 -34.6
3  file004.wav        16000         1 -35.1
4  file005.wav        16000         1 -32.1


In [None]:
AUDIO_DIR = os.path.join(ITW_DATASET_PATH, "real")
rows_real = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    match = lufs_re.search(ff_out)
    if match:
        lufs = float(match.group(1))
    else:
        print(f"Warning: LUFS not found for {path}")
        lufs = None  # or skip the file entirely

    rows_real.append((f, sr, ch, lufs))

df_real = pd.DataFrame(rows_real, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")



DONE


In [None]:
df_fake = pd.DataFrame(rows, columns=["file", "sample_rate", "channels", "lufs"])
df_fake.to_csv(r"c:\Users\konst\Desktop\itw_fake_sr_chan_lufs")
df_real = pd.DataFrame(rows_real, columns=["file", "sample_rate", "channels", "lufs"])
df_real.to_csv(r"c:\Users\konst\Desktop\itw_real_sr_chan_lufs")
print(df_real.head())

        file  sample_rate  channels  lufs
0    100.wav        16000         1 -22.1
1   1000.wav        16000         1 -29.6
2  10001.wav        16000         1 -23.6
3  10003.wav        16000         1 -29.1
4  10004.wav        16000         1 -26.2


In [None]:
AUDIO_DIR = os.path.join(DATASET_PATH, "training", "real")
rows_real_train = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    match = lufs_re.search(ff_out)
    if match:
        lufs = float(match.group(1))
    else:
        print(f"Warning: LUFS not found for {path}")
        lufs = None  # or skip the file entirely

    rows_real_train.append((f, sr, ch, lufs))

df_real_train = pd.DataFrame(rows_real_train, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")

df_real_train = pd.DataFrame(rows_real_train, columns=["file", "sample_rate", "channels", "lufs"])
df_real_train.to_csv(r"c:\Users\konst\Desktop\FoR_real_train_sr_chan_lufs")




DONE


In [None]:
AUDIO_DIR = os.path.join(DATASET_PATH, "training", "fake")
rows_fake_train = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    match = lufs_re.search(ff_out)
    if match:
        lufs = float(match.group(1))
    else:
        print(f"Warning: LUFS not found for {path}")
        lufs = None  # or skip the file entirely

    rows_fake_train.append((f, sr, ch, lufs))

df_fake_train = pd.DataFrame(rows_fake_train, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")

df_fake_train = pd.DataFrame(rows_fake_train, columns=["file", "sample_rate", "channels", "lufs"])
df_fake_train.to_csv(r"c:\Users\konst\Desktop\FoR_fake_train_sr_chan_lufs")




DONE


In [24]:
# Mean of LUFS
mean_lufs = df_fake['lufs'].mean()

# Standard deviation of LUFS
std_lufs = df_fake['lufs'].std()
print("For fake files of ITW")
print(f"Mean LUFS: {mean_lufs:.2f}")
print(f"LUFS Standard Deviation : {std_lufs:.2f}")


For fake files of ITW
Mean LUFS: -28.58
LUFS Standard Deviation : 4.18


In [25]:
# Mean of LUFS
mean_lufs = df_real['lufs'].mean()

# Standard deviation of LUFS
std_lufs = df_real['lufs'].std()
print("For real files of ITW")
print(f"Mean LUFS: {mean_lufs:.2f}")
print(f"LUFS Standard Deviation: {std_lufs:.2f}")


For real files of ITW
Mean LUFS: -25.73
LUFS Standard Deviation: 5.84


In [26]:
# Mean of LUFS
mean_lufs = df_fake_train['lufs'].mean()

# Standard deviation of LUFS
std_lufs = df_fake_train['lufs'].std()
print("For fake files of FoR")
print(f"Mean LUFS: {mean_lufs:.2f}")
print(f"LUFS Standard Deviation: {std_lufs:.2f}")


For fake files of FoR
Mean LUFS: -15.55
LUFS Standard Deviation: 2.04


In [27]:
# Mean of LUFS
mean_lufs = df_real_train['lufs'].mean()

# Standard deviation of LUFS
std_lufs = df_real_train['lufs'].std()
print("For real files of FoR")

print(f"Mean LUFS: {mean_lufs:.2f}")
print(f"LUFS Standard Deviation: {std_lufs:.2f}")


For real files of FoR
Mean LUFS: -17.89
LUFS Standard Deviation: 2.50


In [7]:
# Get all unique values in 'channels'
unique_channels = df['channels'].unique()
print(unique_channels)


[1]


In [None]:
AUDIO_DIR = os.path.join(ITW_DATASET_PATH, "fake_norm")
rows_fake_after_loud_norm = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    match = lufs_re.search(ff_out)
    if match:
        lufs = float(match.group(1))
    else:
        print(f"Warning: LUFS not found for {path}")
        lufs = None  # or skip the file entirely

    rows_fake_after_loud_norm.append((f, sr, ch, lufs))

df_fake_after_loud_norm = pd.DataFrame(rows_fake_after_loud_norm, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")

df_fake_after_loud_norm = pd.DataFrame(rows_fake_after_loud_norm, columns=["file", "sample_rate", "channels", "lufs"])
df_fake_after_loud_norm.to_csv(r"c:\Users\konst\Desktop\FoR_fake_after_loud_norm_sr_chan_lufs")

# Mean of LUFS
mean_lufs = df_fake_after_loud_norm['lufs'].mean()

# Standard deviation of LUFS
std_lufs = df_fake_after_loud_norm['lufs'].std()
print("For real files of FoR")

print(f"Mean LUFS: {mean_lufs:.2f}")
print(f"LUFS Standard Deviation: {std_lufs:.2f}")




DONE
For real files of FoR
Mean LUFS: -18.19
LUFS Standard Deviation: 1.33


### Eleven labs audio files preprocessing

In [None]:
AUDIO_DIR = os.path.join(ELEVEN_LABS_DATASET_PATH, "fake")
rows = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    lufs = float(lufs_re.search(ff_out).group(1))

    rows.append((f, sr, ch, lufs))

df_fake = pd.DataFrame(rows, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")
print(df.head())



DONE
          file  sample_rate  channels  lufs
0  file001.wav        16000         1 -35.9
1  file002.wav        16000         1 -35.1
2  file003.wav        16000         1 -34.6
3  file004.wav        16000         1 -35.1
4  file005.wav        16000         1 -32.1


In [None]:
INPUT_DIR = os.path.join(ELEVEN_LABS_DATASET_PATH, "fake")
OUTPUT_DIR = os.path.join(ELEVEN_LABS_DATASET_PATH, "fake_normalized")
TARGET_LUFS = -15.0  # Common broadcast standard

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

for f in os.listdir(INPUT_DIR):
    if not f.endswith(".wav"):
        continue

    input_path = os.path.join(INPUT_DIR, f)
    output_path = os.path.join(OUTPUT_DIR, f)

    print(f"Normalizing: {f} to {TARGET_LUFS} LUFS...")

    # Using the 'loudnorm' filter in a single pass (simple mode)
    # This automatically adjusts integrated loudness, range, and true peak.
    cmd = [
        FFMPEG, "-y", "-i", input_path,
        "-filter:a", f"loudnorm=I={TARGET_LUFS}:LRA=11:TP=-1.5",
        output_path
    ]

    try:
        subprocess.run(cmd, check=True, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        print(f"Error processing {f}: {e}")

print(f"\nDONE! Files saved to: {OUTPUT_DIR}")

Normalizing: file001.wav to -15.0 LUFS...
Normalizing: file002.wav to -15.0 LUFS...
Normalizing: file003.wav to -15.0 LUFS...
Normalizing: file004.wav to -15.0 LUFS...
Normalizing: file005.wav to -15.0 LUFS...
Normalizing: file006.wav to -15.0 LUFS...
Normalizing: file007.wav to -15.0 LUFS...
Normalizing: file008.wav to -15.0 LUFS...
Normalizing: file009.wav to -15.0 LUFS...
Normalizing: file010.wav to -15.0 LUFS...
Normalizing: file011.wav to -15.0 LUFS...
Normalizing: file012.wav to -15.0 LUFS...
Normalizing: file013.wav to -15.0 LUFS...
Normalizing: file014.wav to -15.0 LUFS...
Normalizing: file015.wav to -15.0 LUFS...
Normalizing: file016.wav to -15.0 LUFS...
Normalizing: file017.wav to -15.0 LUFS...
Normalizing: file018.wav to -15.0 LUFS...
Normalizing: file019.wav to -15.0 LUFS...
Normalizing: file020.wav to -15.0 LUFS...
Normalizing: file021.wav to -15.0 LUFS...
Normalizing: file022.wav to -15.0 LUFS...
Normalizing: file023.wav to -15.0 LUFS...
Normalizing: file024.wav to -15.0 

In [None]:
AUDIO_DIR = os.path.join(ELEVEN_LABS_DATASET_PATH, "fake_normalized")
rows = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    lufs = float(lufs_re.search(ff_out).group(1))

    rows.append((f, sr, ch, lufs))

df = pd.DataFrame(rows, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")
print(df.head())



DONE
          file  sample_rate  channels  lufs
0  file001.wav       192000         1 -16.3
1  file002.wav       192000         1 -15.8
2  file003.wav       192000         1 -15.4
3  file004.wav       192000         1 -16.4
4  file005.wav       192000         1 -15.3


In [None]:
# --- CONFIGURATION ---
INPUT_DIR = os.path.join(ELEVEN_LABS_DATASET_PATH, "real")
OUTPUT_DIR = os.path.join(ELEVEN_LABS_DATASET_PATH, "real_normalized")
TARGET_LUFS = -18.0  # Common broadcast standard

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

for f in os.listdir(INPUT_DIR):
    if not f.endswith(".wav"):
        continue

    input_path = os.path.join(INPUT_DIR, f)
    output_path = os.path.join(OUTPUT_DIR, f)

    print(f"Normalizing: {f} to {TARGET_LUFS} LUFS...")

    # Using the 'loudnorm' filter in a single pass (simple mode)
    # This automatically adjusts integrated loudness, range, and true peak.
    cmd = [
        FFMPEG, "-y", "-i", input_path,
        "-filter:a", f"loudnorm=I={TARGET_LUFS}:LRA=11:TP=-1.5",
        output_path
    ]

    try:
        subprocess.run(cmd, check=True, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        print(f"Error processing {f}: {e}")

print(f"\nDONE! Files saved to: {OUTPUT_DIR}")

Normalizing: george_downsampled_single_channel_chunk_0000.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0001.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0002.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0003.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0004.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0005.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0006.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0007.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0008.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0009.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0010.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk_0011.wav to -18.0 LUFS...
Normalizing: george_downsampled_single_channel_chunk

In [None]:
AUDIO_DIR = os.path.join(ELEVEN_LABS_DATASET_PATH, "real_normalized")
rows = []
lufs_re = re.compile(r"Input Integrated:\s*(-?\d+(\.\d+)?)")


for f in os.listdir(AUDIO_DIR):
    if not f.endswith(".wav"):
        continue

    path = os.path.join(AUDIO_DIR, f)

    # soxi (metadata)
    soxi_out = subprocess.check_output([SOX, "--info", path], text=True)

    sr = int(re.search(r"Sample Rate\s*:\s*(\d+)", soxi_out).group(1))
    ch = int(re.search(r"Channels\s*:\s*(\d+)", soxi_out).group(1))

    # ffmpeg loudness analysis
    ff_out = subprocess.check_output(
        [
            FFMPEG, "-i", path,
            "-filter:a", "loudnorm=print_format=summary",
            "-f", "null", "-"
        ],
        stderr=subprocess.STDOUT,
        text=True
    )
    lufs = float(lufs_re.search(ff_out).group(1))

    rows.append((f, sr, ch, lufs))

df = pd.DataFrame(rows, columns=["file", "sample_rate", "channels", "lufs"])

print("DONE")
print(df.head())



DONE
                                               file  sample_rate  channels  \
0  george_downsampled_single_channel_chunk_0000.wav       192000         1   
1  george_downsampled_single_channel_chunk_0001.wav       192000         1   
2  george_downsampled_single_channel_chunk_0002.wav       192000         1   
3  george_downsampled_single_channel_chunk_0003.wav       192000         1   
4  george_downsampled_single_channel_chunk_0004.wav       192000         1   

   lufs  
0 -17.9  
1 -17.7  
2 -18.1  
3 -18.6  
4 -19.0  
