In [2]:
import os
import re
import numpy as np
import pandas as pd
from scipy.io.wavfile import write
from IPython.display import Audio

import rich
import matplotlib.pyplot as plt
import seaborn as sns
from ast import literal_eval
from audiomentations import Gain,Normalize,LoudnessNormalization,AddGaussianSNR,Compose,Limiter,ClippingDistortion

In [1]:
DEFAULT_FS = 44100

In [None]:
class TestToneEvalClass:
    def __init__(self, duration, samplerate, frequency, filefold):
        self.samplerate = samplerate
        self.frequency = frequency
        self.duration = duration
        self.filefold = filefold
    
    def SinWaveGenerator(self):
        t = np.linspace(0, self.duration, int(self.duration * self.samplerate), endpoint=False)
        sine = np.sin(2 * np.pi * self.frequency * t)
        return sine

    def Adding_WNSNR(self, snr_db, data):
        if(snr_db != 0):
            Audio_Transform = AddGaussianSNR(min_snr_db=snr_db,max_snr_db=snr_db,p=1.0)
            data = Audio_Transform(data, sample_rate=self.samplerate)
        return data

    def Add_HummingSNR(self, snr_db, audio_signal, frequencies):
        #if amplitudes is None:
        # Set default amplitude to 0.5 for all frequencies if not provided
        #    amplitudes = [0.5] * len(frequencies)
        if (snr_db>0):
            originalRMS = self.Calculate_rms(audio_signal)
            print(f"The original level of signal is {originalRMS}")
            noise_RMS = self.Calculate_desired_noise_rms(originalRMS,snr_db)
            print(f"The noise level of signal is {noise_RMS}dB")
            noise_amplitude = self.Convert_decibels_to_amplitude_ratio(noise_RMS)
            print(f"comparing to adding noise with {noise_amplitude}")

            # Create a time array based on the length of the audio signal
            t = np.arange(len(audio_signal)) / self.samplerate

            # Initialize the new signal as a copy of the original audio signal
            new_audio_signal = np.copy(audio_signal)

            # Add each sine wave to the audio signal
            for freq in frequencies:
                sine_wave = noise_RMS * np.sin(2 * np.pi * freq * t)
                new_audio_signal += sine_wave
            return new_audio_signal
        else:
            return audio_signal

    def Adding_Limiter(self,data,thres_db,attac_time=0.0001,reles_time=0.0001):
        if(thres_db != 0):
            Audiomentations_Transform = Limiter(min_threshold_db=-thres_db,max_threshold_db=-thres_db,min_attack=attac_time,max_attack=attac_time,min_release=reles_time,max_release=reles_time,threshold_mode="relative_to_signal_peak",p=1.0)
#            Audiomentations_Transform = Limiter(min_threshold_db=-thres_db,max_threshold_db=-thres_db,min_attack=0.0005,max_attack=0.0005,min_release=0.05,max_release=0.05,threshold_mode="relative_to_signal_peak",p=1.0)
            data = Audiomentations_Transform(data, sample_rate=self.samplerate )
        return data
        
    def Adding_Clipping(self, samples, sample_rate, clipping_rate):
        if clipping_rate != 0:
            print("starting clipping")
            clipping_rate = round(clipping_rate, 1)
            lower_percentile_threshold = clipping_rate / 2
            lower_threshold, upper_threshold = np.percentile(
                samples, [lower_percentile_threshold, 100 - lower_percentile_threshold]
            )
            samples = np.clip(samples, lower_threshold, upper_threshold)
        return samples

    
    def DropingSamplesByPercentage(self, percentage, data):
        if(percentage > 0 and percentage < 1):
            num_samples = len(data)
            num_samples_to_drop = int(percentage*num_samples)
            drop_indices = np.random.choice(num_samples,num_samples_to_drop,replace=False)
            remapping_data = data
            remapping_data[drop_indices] = 0
        return remapping_data
    
    def DropingSamplesByNum(self, drop_samplenum, data):
        if(drop_samplenum > 0):
            num_samples = len(data)
            drop_indices = np.random.choice(num_samples,drop_samplenum,replace=False)
            data[drop_indices] = 0
        return data
    
    def PlotTimeSignal(self,data):
        time = np.linspace(0., duration, len(data))  # Time values for each sample 
        plt.figure(figsize=(10,4))
        plt.plot(time,data,label="Waveform")
        plt.xlabel('Time (s)')
        plt.ylabel('Amplitude')
        plt.ylim(-1.1, 1.1)
        plt.title('Waveform of the Audio')
        plt.grid(True)
        plt.legend(loc="upper right")
        plt.show()

    def PlotFFT(self,data):
        data = data / np.max(np.abs(data))
        window = np.hanning(len(data))  
        y_windowed = data * window  
        
        # Zero-pad to increase FFT resolution
        #N = 2**16  # Larger FFT size (e.g., 16384)
        fft_result = np.fft.fft(y_windowed, n=len(data))
        frequencies = np.fft.fftfreq(len(data), d=1/self.samplerate)
        
        # Keep only the positive half of the spectrum
        half_N = len(data) // 2
        fft_magnitude = np.abs(fft_result[:half_N]) / len(data)
        fft_magnitude_db = 20 * np.log10(fft_magnitude + 1e-12)
        frequencies = frequencies[:half_N]
        
        # Plot FFT with log scale
        plt.figure(figsize=(10, 4))
        plt.plot(frequencies, fft_magnitude_db, color='b')
        plt.xscale("log")  # Logarithmic frequency axis
        plt.xlabel("Frequency (Hz)", fontsize=14)
        plt.ylabel("Magnitude (dB)", fontsize=14)
        plt.title("Enhanced FFT Spectrum (Log Scale)", fontsize=16)
        plt.grid()
        plt.ylim(-120, 0)
        plt.show()
        
    def SignalFileGenerator(self, audiodata, filename):
        ##by default all the data will be nomalized
        Normalize_Transform = Normalize(p=1.0)
        audiodata = Normalize_Transform(audiodata,self.samplerate)
        write(self.filefold+filename, self.samplerate, audiodata)
        return self.filefold+filename

    def Mp3MixingFileGenerator(self,audiodata,filename,bitrate=64):
        #the single file set to LUFS -14
        Lufs_Transform = LoudnessNormalization(min_lufs=-14.0,max_lufs=-14.0,p=1.0)
        mixing_data = Lufs_Transform(audiodata, self.samplerate)
        tmp_file = self.SignalFileGenerator(mixing_data, filename)
        command_out = os.popen("sh /home/codecrack/Jnotebook/CODECbreakCode/Audio_LameCompress.sh -a %s -b %s " %(tmp_file,bitrate)).read()
        match = re.search(r"outputMp3toWavfilepath=\s*(.+?)\s+by FFMPEG", command_out)
        if match:
            file_path = match.group(1)  # Capture the file path
            return file_path
        else:
            print("File path not found in the output.") 
            return "File path not found in the output."
        
        
    def Calculate_desired_noise_rms(self,clean_rms, snr):
        a = float(snr) / 20
        noise_rms = clean_rms / (10**a)
        return noise_rms
    
    def Convert_decibels_to_amplitude_ratio(self,decibels):
        return 10 ** (decibels / 20)
    
    def Calculate_rms(self,samples):
        return np.sqrt(np.mean(np.square(samples)))


### testTone Track

In [None]:
## duration on second
duration = 8
samplerate = DEFAULT_FS
frequency_Vocal = 240
output_fold = '/home/codecrack/Jnotebook/44k1/Sine/'
sineTone_Vocal = TestToneEvalClass(duration, samplerate, frequency_Vocal, output_fold)
sinewave_Vocal = sineTone_Vocal.SinWaveGenerator()
sineTone_Vocal.SignalFileGenerator(sinewave_Vocal,f"vocals.wav" )

frequency_Drum = 100
sineTone_Drum = TestToneEvalClass(duration, samplerate, frequency_Drum, output_fold)
sinewave_Drum = sineTone_Drum.SinWaveGenerator()
sineTone_Drum.SignalFileGenerator(sinewave_Drum,f"drums.wav" )

frequency_Guitar = 1200
sineTone_Guitar = TestToneEvalClass(duration, samplerate, frequency_Guitar, output_fold)
sinewave_Guitar = sineTone_Guitar.SinWaveGenerator()
sineTone_Guitar.SignalFileGenerator(sinewave_Guitar,f"other.wav" )

frequency_Bass = 330
sineTone_Bass = TestToneEvalClass(duration, samplerate, frequency_Bass, output_fold)
sinewave_Bass = sineTone_Bass.SinWaveGenerator()
sineTone_Bass.SignalFileGenerator(sinewave_Bass,f"bass.wav" )

## Track Test Import

In [3]:
import sys
sys.path.append('/home/codecrack/Jnotebook/')
from CODECbreakCode.AudioMixer import FullTrackAudioMixer
import CODECbreakCode.Evaluator as Evaluator
from CODECbreakCode.Evaluator import MeasureHAAQIOutput

## Gospel Test

In [4]:
Mixing_Path = '/home/codecrack/Jnotebook/44k1/Gospel'
Noise_Generator_MP3 = FullTrackAudioMixer(Mixing_Path, StartingTime=8)
#Noise_Generator_MP3.ManipulateInitGAIN([-9, -10, 10, -10]) #this loudness adjust only incase the result want to check the cirtain level situation
Referece_File = Noise_Generator_MP3.TestNoisedFullTrack([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"Reference_IN_test.wav",isNormalised=False,isCompensated=True)

#Referece_File = Noise_Generator_MP3.TestNoisedFullTrack([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"Reference_IN_Orig.wav",isNormalised=False,isCompensated=True)

print(f"Referece_File:{Referece_File}")

Referece_MP3File = Evaluator.Mp3LameLossyCompress(Referece_File,64)
print(f"Referece_MP3File:{Referece_MP3File}")

Vocal duration orginal is 75.73730158730159 seconds, now is the 8.0, the audio changing to the MONO
Drum duration orginal is 75.73730158730159 seconds, now is the 8.0, the audio changing to the MONO
Bass duration orginal is 75.73730158730159 seconds, now is the 8.0, the audio changing to the MONO
Other duration orginal is 75.73730158730159 seconds, now is the 8.0,  the audio changing to the MONO
Mixing File Load Sucessful
The mixing ouput in the RMS, Vocal: -16.12dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Drum: -18.87dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Bass: -20.83dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Other: -25.72dB, Clipping Ratio&Cliped Num: (0.0, 0)
The pre-mixing ouput(no Normalize, no -14 LUFS) in the RMS, Total: -13.2dB, Clipping Ratio&Cliped Num: (0.0, 0)
It is Unormailzed on each track when mixing
AfterCompensation, The mixing ouput in the RMS, Vocal: -16.12dB, Clipping Ratio&Cliped Num: 



Referece_MP3File:/home/codecrack/Jnotebook/44k1/Gospel/Mixing_Result_Mp3_Wav/Reference_IN_test_64kbps.wav


In [5]:
## initialise the HAAQI Function
MeasureHAAQI = MeasureHAAQIOutput(Referece_MP3File)#Initilize the HAAQI with a permanent reference
MeasureHAAQI.MeasureHAQQIOutput(Referece_MP3File) #Test on how far from itself to itself

0.993324166236273

### Observation Log

##### Gospel track 
if only vocal track has permutation, all other track not do anything. The limiter do degraded the quality in the conditions. 
[50, 3.0 ,0] in score 0.818,and
[50,3.0,3.0] in score 0.526
Also the clipping and whitenoise in other tracks saw no change on the trends of limitrer.
[50, 3.0, 0.0, 50, 3.0, 0.0, 50, 3.0, 0.0, 50, 3.0, 0.0] 0.72,
[50, 3.0, 3.0, 50, 3.0, 0.0, 50, 3.0, 0.0, 50, 3.0, 0.0] 0.491

##### Eventually, it is become a war of loudness, Which is the real reason the system goes wrong
The case is most strange is here:
[0, 0.0, 3.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0] 0.56
[0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 0.0, 0, 0.0, 3.0] 0.75 adding the limiter on guitar and drum excatlly boost the score.
[0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0] 0.77 adding the bass limiter is not big deal,onlu 0.02 change.
[0, 0.0, 6.0, 0, 0.0, 3.0, 0, 0.0, 0.0, 0, 0.0, 3.0] 0.73 even then limiter keep adding up + 3dB, the metric believe it is only small degradation.
[0, 0.0, 15.0, 0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0] 0.58, 0 is equal to put the vocal limiter on -15dB

### Test

In [6]:
#solution = [0, 0.0, 6.0, 0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0] 
solution = [0, 0.0, 0.0, 0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0] 
#without round the score

v_int_noise = solution[0]
v_float_clippingper = solution[1]
v_float_IIdynamic = solution[2]
d_int_noise = solution[3]
d_float_clippingper = solution[4]
d_float_IIdynamic = solution[5]
b_int_noise = solution[6]
b_float_clippingper = solution[7]
b_float_IIdynamic = solution[8]
o_int_noise = solution[9]
o_float_clippingper = solution[10]
o_float_IIdynamic = solution[11]    
filename = f'audio_mixing_FT_HAQI_V_SNR_{v_int_noise}.0_CP_{v_float_clippingper}_IITH_{v_float_IIdynamic}_D_SNR_{d_int_noise}.0_CP_{d_float_clippingper}_IITH_{d_float_IIdynamic}_B_SNR_{b_int_noise}.0_CP_{b_float_clippingper}_IITH_{b_float_IIdynamic}_O_SNR_{o_int_noise}.0_CP_{o_float_clippingper}_IITH_{o_float_IIdynamic}.wav'
Gener_Audio = Noise_Generator_MP3.TestNoisedFullTrack(solution,filename,isNormalised=False,isCompensated=True)
Gener_Audio_mp3 = Evaluator.Mp3LameLossyCompress(Gener_Audio,64)
    #print(Gener_Audio)
score = MeasureHAAQI.MeasureHAQQIOutput(Gener_Audio_mp3)
print(f"HAAQI Result is {score}")

The mixing ouput in the RMS, Vocal: -16.12dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Drum: -18.94dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Bass: -20.85dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Other: -25.74dB, Clipping Ratio&Cliped Num: (0.0, 0)
The pre-mixing ouput(no Normalize, no -14 LUFS) in the RMS, Total: -13.21dB, Clipping Ratio&Cliped Num: (0.0, 0)
It is Unormailzed on each track when mixing
AfterCompensation, The mixing ouput in the RMS, Vocal: -16.12dB, Clipping Ratio&Cliped Num: (0.0, 0)
AfterCompensation, The mixing ouput in the RMS, Drum: -18.87dB, Clipping Ratio&Cliped Num: (0.0, 0)
AfterCompensation, The mixing ouput in the RMS, Bass: -20.83dB, Clipping Ratio&Cliped Num: (0.0, 0)
AfterCompensation, The mixing ouput in the RMS, Other: -25.72dB, Clipping Ratio&Cliped Num: (0.0, 0)
After LUFS&Peak Normlizaiton, the mixing ouput in the RMS, Total: -11.35dB, Clipping Ratio&Cliped Num: (0.0, 6)




HAAQI Result is 0.5432479775377717


## Reggae Test

In [3]:
Mixing_Path = '/home/codecrack/Jnotebook/44k1/Reggea'
Noise_Generator_MP3 = FullTrackAudioMixer(Mixing_Path)
#Noise_Generator_MP3.ManipulateInitGAIN([-10, -10, 10, -10]) #this loudness adjust only incase the result want to check the cirtain level situation
Referece_File = Noise_Generator_MP3.TestNoisedFullTrack([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"Reference_IN_Test.wav",isNormalised=False,isCompensated=True)
#Noise_Generator_MP3.ManipulateInitGAIN([0, 0, 0, 0])
#Referece_File = Noise_Generator_MP3.TestNoisedFullTrack([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"Reference_IN_Orig.wav",isNormalised=False,isCompensated=True)
print(f"Referece_File:{Referece_File}")

Referece_MP3File = Evaluator.Mp3LameLossyCompress(Referece_File,64)
print(f"Referece_MP3File:{Referece_MP3File}")

Vocal duration orginal is 17.46267573696145 seconds, now is the 8.0, the audio changing to the MONO
Drum duration orginal is 17.46267573696145 seconds, now is the 8.0, the audio changing to the MONO
Bass duration orginal is 17.46267573696145 seconds, now is the 8.0, the audio changing to the MONO
Other duration orginal is 17.46267573696145 seconds, now is the 8.0,  the audio changing to the MONO
Mixing File Load Sucessful
The mixing ouput in the RMS, Vocal: -17.98dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Drum: -14.85dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Bass: -19.59dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Other: -32.89dB, Clipping Ratio&Cliped Num: (0.0, 0)
The pre-mixing ouput(no Normalize, no -14 LUFS) in the RMS, Total: -12.21dB, Clipping Ratio&Cliped Num: (0.0, 0)
It is Unormailzed on each track when mixing
AfterCompensation, The mixing ouput in the RMS, Vocal: -17.98dB, Clipping Ratio&Cliped Num:



In [4]:
## initialise the HAAQI Function
MeasureHAAQI = MeasureHAAQIOutput(Referece_MP3File)#Initilize the HAAQI with a permanent reference
MeasureHAAQI.MeasureHAQQIOutput(Referece_MP3File) #Test on how far from itself to itself

0.9972462823059598

In [21]:
solution = [0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0] 

#without round the score

v_int_noise = solution[0]
v_float_clippingper = solution[1]
v_float_IIdynamic = solution[2]
d_int_noise = solution[3]
d_float_clippingper = solution[4]
d_float_IIdynamic = solution[5]
b_int_noise = solution[6]
b_float_clippingper = solution[7]
b_float_IIdynamic = solution[8]
o_int_noise = solution[9]
o_float_clippingper = solution[10]
o_float_IIdynamic = solution[11]    
filename = f'audio_mixing_FT_HAQI_V_SNR_{v_int_noise}.0_CP_{v_float_clippingper}_IITH_{v_float_IIdynamic}_D_SNR_{d_int_noise}.0_CP_{d_float_clippingper}_IITH_{d_float_IIdynamic}_B_SNR_{b_int_noise}.0_CP_{b_float_clippingper}_IITH_{b_float_IIdynamic}_O_SNR_{o_int_noise}.0_CP_{o_float_clippingper}_IITH_{o_float_IIdynamic}.wav'
Gener_Audio = Noise_Generator_MP3.TestNoisedFullTrack(solution,filename,isNormalised=False,isCompensated=True)
Gener_Audio_mp3 = Evaluator.Mp3LameLossyCompress(Gener_Audio,64)
    #print(Gener_Audio)
score = MeasureHAAQI.MeasureHAQQIOutput(Gener_Audio_mp3)
print(f"HAAQI Result is {score}")

The mixing ouput in the RMS, Vocal: -18.09dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Drum: -14.97dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Bass: -19.62dB, Clipping Ratio&Cliped Num: (0.0, 0)
The mixing ouput in the RMS, Other: -32.92dB, Clipping Ratio&Cliped Num: (0.0, 0)
The pre-mixing ouput(no Normalize, no -14 LUFS) in the RMS, Total: -12.31dB, Clipping Ratio&Cliped Num: (0.0, 0)
It is Unormailzed on each track when mixing
AfterCompensation, The mixing ouput in the RMS, Vocal: -17.98dB, Clipping Ratio&Cliped Num: (0.0, 0)
AfterCompensation, The mixing ouput in the RMS, Drum: -14.85dB, Clipping Ratio&Cliped Num: (0.0, 0)
AfterCompensation, The mixing ouput in the RMS, Bass: -19.59dB, Clipping Ratio&Cliped Num: (0.0, 0)
AfterCompensation, The mixing ouput in the RMS, Other: -32.89dB, Clipping Ratio&Cliped Num: (0.0, 0)
After LUFS&Peak Normlizaiton, the mixing ouput in the RMS, Total: -9.13dB, Clipping Ratio&Cliped Num: (0.0013180272



HAAQI Result is 0.7957607628681589


##### Observation Log

Again in Reggea Track:
[50, 3.0, 0.0, 50, 3.0, 0.0, 50, 3.0, 0.0, 50, 3.0, 0.0]:  0.689, no limiter 
[50, 3.0, 3.0, 50, 3.0, 3.0, 50, 3.0, 3.0, 50, 3.0, 3.0] : 0.549, all limiter
[50, 3.0, 0.0, 50, 3.0, 3.0, 50, 3.0, 3.0, 50, 3.0, 3.0]: 0.491, all limiter except vocal,
[50, 3.0, 3.0, 50, 3.0, 0.0, 50, 3.0, 0.0, 50, 3.0, 0.0]: 0.562, only vocal limiter,
But "limiter balance" seems not the single explain here, the latter case vocal still not balance but the score is discrepency.

In the only limiter Model
[0, 0.0, 3.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0]: 0.721 only vocal
[0, 0.0, 0.0, 0, 0.0, 3.0, 0, 0.0, 0.0, 0, 0.0, 0.0]: 0.646 only drum
[0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 0.0, 0, 0.0, 0.0]: 0.646 only drum and vocal
[0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0, 0, 0.0, 3.0]: 0.795