In [None]:
import os
import sys
import shutil
import numpy as np
import soundfile as sf

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from frechet_audio_distance import CLAPScore
from utils import gen_sine_wave

In [None]:
# 630k-audioset (for general audio less than 10-sec)
SAMPLE_RATE = 48000
LENGTH_IN_SECONDS = 2

# init
clap = CLAPScore(
    ckpt_dir="../checkpoints/clap",
    submodel_name="630k-audioset",
    verbose=True,
    audio_load_worker=8,
    enable_fusion=False,
)

In [None]:
# TEST USING SINE WAVES

# generate sine waves and text
text_audio1 = []
text_audio2 = []

# text and audio 1 (sine waves between 100-1000 Hz)
text_folder1, audio_folder1, count, noise_param = "text1", "audio1", 10, None
os.makedirs(text_folder1, exist_ok=True)
os.makedirs(audio_folder1, exist_ok=True)
frequencies = np.linspace(100, 1000, count).tolist()
for freq in frequencies:
    # text
    text_audio1.append(f"sin_{freq:.0f}.wav,{freq:.0f} Hz sine wave")
    # audio
    samples = gen_sine_wave(freq, LENGTH_IN_SECONDS, SAMPLE_RATE, param=noise_param)
    filename = os.path.join(audio_folder1, "sin_%.0f.wav" % freq)
    print("Creating: %s with %i samples." % (filename, samples.shape[0]))
    sf.write(filename, samples, SAMPLE_RATE, "PCM_24")
# save text to csv
with open(os.path.join(text_folder1, "text.csv"), "w") as f:
    f.write("filename,caption\n")
    for line in text_audio1:
        f.write(line + "\n")

# text and audio 2 (noisy sine waves between 1000-10000 Hz)
text_folder2, audio_folder2, count, noise_param = "text2", "audio2", 10, 0.5
os.makedirs(text_folder2, exist_ok=True)
os.makedirs(audio_folder2, exist_ok=True)
frequencies = np.linspace(1000, 10000, count).tolist()
for freq in frequencies:
    # text
    text_audio2.append(f"sin_{freq:.0f}.wav,{freq:.0f} Hz sine wave")
    # audio
    samples = gen_sine_wave(freq, LENGTH_IN_SECONDS, SAMPLE_RATE, param=noise_param)
    filename = os.path.join(audio_folder2, "sin_%.0f.wav" % freq)
    print("Creating: %s with %i samples." % (filename, samples.shape[0]))
    sf.write(filename, samples, SAMPLE_RATE, "PCM_24")
# save text to csv
with open(os.path.join(text_folder2, "text.csv"), "w") as f:
    f.write("filename,caption\n")
    for line in text_audio2:
        f.write(line + "\n")

# text 1 vs. audio 1
clap_score = clap.score(
    text_path="./text1/text.csv",
    audio_dir="./audio1",
    text_column="caption",
    text_embds_path=None,
    audio_embds_path=None,
)
print(f"CLAP score text 1 vs audio 1 [mu. std]: {clap_score}")

# text 2 vs. audio 2
clap_score = clap.score(
    text_path="./text2/text.csv",
    audio_dir="./audio2",
    text_column="caption",
    text_embds_path=None,
    audio_embds_path=None,
)
print(f"CLAP score text 2 vs audio 2 [mu. std]: {clap_score}")

# text 1 vs. audio 2
clap_score = clap.score(
    text_path="./text1/text.csv",
    audio_dir="./audio2",
    text_column="caption",
    text_embds_path=None,
    audio_embds_path=None,
)
print(f"CLAP score text 1 vs audio 2 [mu. std]: {clap_score}")

# text 2 vs. audio 1
clap_score = clap.score(
    text_path="./text2/text.csv",
    audio_dir="./audio1",
    text_column="caption",
    text_embds_path=None,
    audio_embds_path=None,
)
print(f"CLAP score text 2 vs audio 1 [mu. std]: {clap_score}")

shutil.rmtree("text1")
shutil.rmtree("audio1")
shutil.rmtree("text2")
shutil.rmtree("audio2")

In [None]:
# text and audio matching
clap_score = clap.score(
    text_path="./clap_score_text/text_matching.csv",
    audio_dir="./clap_score_audio",
    text_column="caption",
    text_embds_path=None,
    audio_embds_path=None,
)
print(f"CLAP score text and audio matching [mu. std]: {clap_score}")

# text and audio not matching
clap_score = clap.score(
    text_path="./clap_score_text/text_not_matching.csv",
    audio_dir="./clap_score_audio",
    text_column="caption",
    text_embds_path=None,
    audio_embds_path=None,
)
print(f"CLAP score text and audio not matching [mu. std]: {clap_score}")