In [3]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from pydub import AudioSegment

In [4]:
def eucl_dist(vec1, vec2):
    minn_length = min(vec1.shape[1], vec2.shape[1])
    if vec1.shape[1] < vec2.shape[1]: 
        vec2 = vec2[:,:minn_length]
    else: 
        vec1 = vec1[:,:minn_length]
    return np.linalg.norm(vec1-vec2)

In [16]:
# PROPERTY OF DEEPMIND1234
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=22050)

    # Extract the MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract the chroma features
    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)

    # Extract the spectral contrast features
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Concatenate the features into a single array
    features = np.concatenate([mfcc, chroma, contrast], axis=0)

    return features

In [24]:
features_d = extract_features('media/deepak_test_1.wav')
features_a = extract_features('media/aadarsh_test_1.wav')
features_s = extract_features('media/sean_test_1.wav')
features_ta = extract_features('media/test_vector.wav')

In [26]:
print(eucl_dist(features_d, features_a))
print(eucl_dist(features_d, features_s))
print(eucl_dist(features_s, features_a))

1798.4835678049874
1884.2936605450861
1556.6665932492106


In [25]:
print(eucl_dist(features_d, features_ta))
print(eucl_dist(features_a, features_ta))
print(eucl_dist(features_s, features_ta))

2442.5959027852955
2189.013501486318
2005.0505213918545
