# Clean Audio Metric 

Goal: Use this notebook to measure the methods to clean the dancer's video's audio to compare to the song track they are dancing to.

In [1]:
# OS Library
import os

# MoviePy Library
from moviepy.editor import *

# MatPlotLib
import matplotlib.pyplot as plt

# Data Reading/Displaying LIbraries
import pandas as pd
import IPython.display as ipd

# Libraries to process the audio
import librosa
import numpy as np
import scipy.signal as signal
import scipy.io
from scipy.signal import lfilter
from scipy.signal import stft, istft
from scipy.ndimage import median_filter
from sklearn.metrics import mean_squared_error
import mir_eval
import soundfile as sf

In [2]:
def find(ext, path):
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(ext):
                f = os.path.join(root, file)

    return f

In [3]:
# Convert each video audio to mp3
rbdys_dir = os.path.join('..', 'Data', 'Red Bull Dance Your Style 2022')
sub_dirs = [sub_dir[0] for sub_dir in os.walk(rbdys_dir)]
sub_dirs.remove(rbdys_dir)

for dir in sub_dirs:
    clip = find('.mp4', dir)
    video_clip = VideoFileClip(clip)
    audio = video_clip.audio

    base, ext = os.path.splitext(clip)
    new_file = base + '.mp3'
    audio_file = os.path.join(new_file)
    audio.write_audiofile(audio_file)

    video_clip.close()
    audio.close()

MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\Angyil\Angyil_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\Flexx\Flexx_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\King Davinci\King Davinci_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\Luwan\Luwan_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\Mickey\Mickey_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\Pop.Korn\Pop.Korn_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\Sara\Sara_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\The Crown\The Crown_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\The D Soraki\The D Soraki_clip.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ..\Data\Red Bull Dance Your Style 2022\Yoe\Yoe_clip.mp3


                                                                     

MoviePy - Done.




In [11]:
# Save the Spectrogram data for analysis (original video audio and song audio)
for dir in sub_dirs:
    for root, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith('.mp3'):
                audio_file = os.path.join(root, file)
                x, Fs = librosa.load(audio_file)

                if len(x.shape) == 2:
                    x = librosa.to_mono(x)

                N = 4096
                H = 1024
                L = x.shape[0]
                X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N,
                     window='hann', pad_mode='constant', center=True)
                
                base, ext = os.path.splitext(audio_file)
                data_file = base + '.mat'
                scipy.io.savemat(data_file, {'time': x,
                                             'sample freq': Fs,
                                             'spectrogram': X,
                                             'N': N,
                                             'H': H,
                                             'L': L})

In [6]:
def dtw(main_audio, compared_audio, sr, hop_length):
    # Extract Mel-Frequency cepstral coefficients (MFCC)
    mfcc_orig = librosa.feature.mfcc(y=main_audio, sr=sr, n_mfcc=13)
    mfcc_comp = librosa.feature.mfcc(y=compared_audio, sr=sr, n_mfcc=13)

    # Compute the DTW
    D, wp = librosa.sequence.dtw(X=mfcc_orig, Y=mfcc_comp, metric='cosine')

    # Extract the matching point
    matching_points = np.array(wp)

    # Find the best matching point
    min_distance_index = np.argmin(D[-1])
    start_index_in_video = matching_points[min_distance_index, 1]
    start_time_in_video = start_index_in_video * hop_length / sr
    
    return start_time_in_video

In [15]:
# Load .mat files
for dir in sub_dirs:
    for root, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith('.mat'):
                if '_clip' in file:
                    video_data_file = os.path.join(root, file)
                    video_data = scipy.io.loadmat(video_data_file)
                else:
                    song_data_file = os.path.join(root, file)
                    song_data = scipy.io.loadmat(video_data_file)

        # Compare video audio to song audio
        video_data_time = video_data['time']
        if video_data_time.ndim == 2 and video_data_time.shape[0] == 1:
            video_data_time = video_data_time.flatten()
        video_data_Fs = video_data['sample freq']
        video_data_spec = video_data['spectrogram']
        video_data_N = video_data['N']
        video_data_L = video_data['L']
        video_data_H = video_data['H']

        song_data_time = song_data['time']
        print(song_data_time.shape)
        if song_data_time.ndim == 2 and song_data_time.shape[0] == 1:
            song_data_time = song_data_time.flatten()
        song_data_Fs = song_data['sample freq']
        song_data_spec = song_data['spectrogram']
        song_data_N = song_data['N']
        song_data_L = song_data['L']
        song_data_H = song_data['H']

        # Use DTW to get exact clip and duration to use for the song audio
        song_timestamp = dtw(song_data_time, video_data_time, song_data_Fs, song_data_H)
        print(dir)
        print(f"The song starts at approximately {song_timestamp[0][0]:.2f} seconds in the video.")


        # # Ensure both signals are the same length
        # min_length = min(len(song_data_time), len(video_data_time))
        # song_data_time = song_data_time[:min_length]
        # video_data_time = video_data_time[:min_length]

        # # Compute SNR
        # signal_power = np.mean(song_data_time**2)
        # noise_power = np.mean((song_data_time - video_data_time)**2)

        # snr = 10*np.log10(signal_power/noise_power)

        # # Compute SDR, SIR, and SAR
        # sdr, sir, sar, _ = mir_eval.separation.bss_eval_sources(
        #     np.array([song_data_time]), np.array([video_data_time])
        # )

        # # Calculate RMSE
        # rmse = np.sqrt(mean_squared_error(song_data_time, video_data_time))

        # print(dir)
        # print(f"SNR: {snr:.2f} dB")
        # print(f"SDR: {sdr[0]:.2f} dB")
        # print(f"SIR: {sir[0]:.2f} dB")
        # print(f"SAR: {sar[0]:.2f} dB")
        # print(f"RMSE: {rmse:.2f}")
        # print('\t\t')

(1, 1280003)
..\Data\Red Bull Dance Your Style 2022\Angyil
The song starts at approximately 0.00 seconds in the video.
(1, 1280003)
..\Data\Red Bull Dance Your Style 2022\Flexx
The song starts at approximately 0.00 seconds in the video.
(1, 1280003)
..\Data\Red Bull Dance Your Style 2022\King Davinci
The song starts at approximately 126.41 seconds in the video.
(1, 1676682)
..\Data\Red Bull Dance Your Style 2022\Luwan
The song starts at approximately 99.75 seconds in the video.
(1, 1103603)
..\Data\Red Bull Dance Your Style 2022\Mickey
The song starts at approximately 99.10 seconds in the video.
(1, 1147482)
..\Data\Red Bull Dance Your Style 2022\Pop.Korn
The song starts at approximately 75.88 seconds in the video.
(1, 1213632)
..\Data\Red Bull Dance Your Style 2022\Sara
The song starts at approximately 125.20 seconds in the video.
(1, 1522332)
..\Data\Red Bull Dance Your Style 2022\The Crown
The song starts at approximately 91.90 seconds in the video.
(1, 1125432)
..\Data\Red Bull Dan