In [70]:
import librosa
import numpy as np
import math
import librosa.display as display
import matplotlib.pyplot as plt
from collections import Counter
import soundfile as sf
import IPython.display as ipd

<h1>Generating Initial Features</h1>

In [137]:
def give_music_array_and_sample_rate(audio):
    y, sr = librosa.load(audio)
    return y,sr

In [138]:
def give_tempo_and_beat_frames(y,sr):
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    return tempo,beat_frames

In [139]:
def give_chroma(y,sr):
    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
    return chroma

In [140]:
def give_pitch_magnitude(y,sr):
    pitch, magnitude = librosa.piptrack(y=y, sr=sr)
    return pitch, magnitude

In [141]:
def give_mfcc(y,sr):
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    return mfcc

In [142]:
def give_spectral_contrast(y,sr):
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return spectral_contrast

In [143]:
def give_rms(y):
    rms = librosa.feature.rms(y=y)
    return rms

In [144]:
def give_zero_crossing_rate(y):
    zcr = librosa.feature.zero_crossing_rate(y)
    return zcr

In [145]:
def give_onset_envelope_and_onsets(y,sr):
    onset_envelope = librosa.onset.onset_strength(y=y, sr=sr)
    onsets = librosa.onset.onset_detect(y=y, onset_envelope=onset_envelope, sr=sr)
    return onset_envelope,onsets

<h1>Load Initial Features</h1>

In [146]:
def load_initial_features(audio):
    y,sr = give_music_array_and_sample_rate(audio)
    tempo,beat_frames = give_tempo_and_beat_frames(y,sr)
    chroma = give_chroma(y,sr)
    pitch, magnitude = give_pitch_magnitude(y,sr)
    mfcc = give_mfcc(y,sr)
    spectral_contrast = give_spectral_contrast(y,sr)
    rms = give_rms(y)
    zcr = give_zero_crossing_rate(y)
    onset_envelope,onsets = give_onset_envelope_and_onsets(y,sr)
    
    return y, sr, tempo, beat_frames, chroma, pitch, magnitude, mfcc, spectral_contrast, rms, zcr, onset_envelope, onsets

<h1>Utility Functions</h1>

In [187]:
def util_hz_to_cents(arr):
    res = []
    for i in range(arr.size):
        if(arr[i]>0):
            mul = arr[i]/440
            ans = 1200* (math.log2(mul))
            res.append(ans)
    return res

In [147]:
def util_most_frequent(list1):
    count = Counter(list1)
    return max(count, key=count.get) , max(count.values())

In [148]:
def util_return_key_name(key):
    if key == 0:
        return "C"
    elif key == 1:
        return "C#/Db"
    elif key == 2:
        return "D"
    elif key == 3:
        return "D#/Eb"
    elif key == 4:
        return "E"
    elif key == 5:
        return "F"
    elif key == 6:
        return "F#/Gb"
    elif key == 7:
        return "G"
    elif key == 8:
        return "G#/Ab"
    elif key == 9:
        return "A"
    elif key == 10:
        return "A#/Bb"
    else :
        return "B"

In [149]:
def util_return_mode_type_value(chroma):
    ionian = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1]
    dorian = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0]
    phrygian = [1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0]
    lydian = [1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1]
    mixolydian = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0]
    aeolian = [1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0]
    locrian = [1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0]
    
    chroma_mean = np.mean(chroma, axis=1)
    
    # Compute the correlation between the chroma features and each reference pattern
    corr_ionian = np.corrcoef(chroma_mean, ionian)[0, 1]
    corr_dorian = np.corrcoef(chroma_mean, dorian)[0, 1]
    corr_phrygian = np.corrcoef(chroma_mean, phrygian)[0, 1]
    corr_lydian = np.corrcoef(chroma_mean, lydian)[0, 1]
    corr_mixolydian = np.corrcoef(chroma_mean, mixolydian)[0, 1]
    corr_aeolian = np.corrcoef(chroma_mean, aeolian)[0, 1]
    corr_locrian = np.corrcoef(chroma_mean, locrian)[0, 1]

    # Determine the mode with the highest correlation
    mode_type = np.argmax([corr_ionian, corr_dorian, corr_phrygian, corr_lydian, corr_mixolydian, corr_aeolian, corr_locrian]) 
    return mode_type

In [150]:
def util_return_mode_type_name(mode_type_value):
    if mode_type_value == 0:
        return "ionian"
    elif mode_type_value == 1:
        return "dorian"
    elif mode_type_value == 2:
        return "phrygian"
    elif mode_type_value == 3:
        return "lydian"
    elif mode_type_value == 4:
        return "mixolydian"
    elif mode_type_value == 5:
        return "aeolian"
    else :
        return "locrian"

<h1>Feature Extraction Starts</h1>

In [168]:
def get_number_of_beats(beat_frames):
    n_beats = len(beat_frames)
    return n_beats

In [169]:
def get_hemitonicity(spectral_contrast):
    hemitonicity = np.mean(spectral_contrast)
    return hemitonicity

In [188]:
def get_melodic_range(pitch):
    pitch_sequence = np.argmax(pitch, axis=0)
    cents = util_hz_to_cents(pitch_sequence)
    min_cents     = np.min(cents)
    max_cents     = np.max(cents)
    melodic_range = max_cents - min_cents
    return melodic_range

In [171]:
def get_melisma(mfcc,spectral_contrast,rms,zcr):
    melisma = np.mean(mfcc) * np.mean(spectral_contrast) * np.mean(rms) * np.mean(zcr)
    return melisma

In [172]:
def get_phrase_min_max_mean_overlap(onsets,sr):
    phrase_length = np.diff(onsets)
    phrase_length_sec = phrase_length / sr
    
    max_phrase_len = np.max(phrase_length_sec)
    min_phrase_len = np.min(phrase_length_sec)
    mean_phrase_len = max_phrase_len - min_phrase_len
    
    phrase_symmetry = max_phrase_len/min_phrase_len
    
    overlap = []
    for i in range(len(phrase_length) - 1):
        overlap.append(sum(phrase_length[i:i+2]) / max(phrase_length[i:i+2]))

    num,times = util_most_frequent(overlap)
    phrase_overlap = (times/len(overlap))*100
    
    return max_phrase_len, min_phrase_len, mean_phrase_len, phrase_symmetry, phrase_overlap

In [173]:
def get_key(chroma):
    key = np.argmax(np.mean(chroma, axis=1))
    key_name = util_return_key_name(key)
    return key,key_name

In [174]:
def get_mode(chroma):
    mode = 1 if np.mean(chroma[5:7]) > np.mean(chroma[-5:-3]) else 0
    return mode

In [175]:
def get_mode_type(chroma):
    mode_type_value = util_return_mode_type_value(chroma)
    mode_type_name  = util_return_mode_type_name(mode_type_value)
    return mode_type_value, mode_type_name

In [176]:
def get_tuning(y,sr):
    tuning = librosa.estimate_tuning(y=y,sr=sr)
    return tuning

In [177]:
def get_timbre(y,sr):
    timbre = librosa.feature.spectral_centroid(y=y, sr=sr)
    timbre_mean = np.mean(timbre)
    return timbre, timbre_mean

<h2>Confusing but Important Features</h2>

In [179]:
def get_texture(y,sr):
    ## feature is incomplete
    ## need an R&D
    texture_matrix = librosa.feature.melspectrogram(y=y, sr=sr)
    #texture_matrix = librosa.feature.spectral_contrast(y=y, sr=sr)
    texture = (np.mean(texture_matrix, axis=1))
    return texture

In [180]:
def get_dynamics(y):
    ## feature is incomplete
    ## need an R&D
    dynamics = give_rms(y)
    return dynamics

In [181]:
def get_motif(y,sr):
    ## feature is incomplete
    ## need an R&D
    motif_matrix = librosa.feature.tempogram(y=y, sr=sr)
    motif = (np.mean(motif_matrix, axis=1))
    return motif

In [193]:
def get_features():
    print("bal")
    #audio_path = 'data/Vibe - Odhora (with lyrics).wav'
    audio = 'data/Vibe - Odhora (with lyrics).wav'
    y, sr, tempo, beat_frames, chroma, pitch, magnitude, mfcc, spectral_contrast, rms, zcr, onset_envelope, onsets = load_initial_features(audio)
    ## Featues
    n_beats = number_of_beats(beat_frames)
    hemitonicity = get_hemitonicity(spectral_contrast)
    melodic_range = get_melodic_range(pitch)
    melisma = get_melisma(mfcc,spectral_contrast,rms,zcr)
    max_phrase_len, min_phrase_len, mean_phrase_len, phrase_symmetry, phrase_overlap = get_phrase_min_max_mean_overlap(onsets,sr)
    key,key_name = get_key(chroma)
    mode = get_mode(chroma)
    mode_type_value,mode_type_name = get_mode_type(chroma)
    tuning = get_tuning(y,sr)
    timbre, timbre_mean = get_timbre(y,sr)
    print(n_beats)
    print(tempo)
    print(hemitonicity)
    print(melodic_range)
    print(melisma)
    print(max_phrase_len, min_phrase_len, mean_phrase_len, phrase_symmetry, phrase_overlap)
    print(key,key_name)
    print(mode)
    print(mode_type_value,mode_type_name)
    print(tuning)
    print(timbre, timbre_mean)
    

In [None]:
get_features()

bal
