In [1]:
import librosa
import numpy as np
import math
import librosa.display as display
import matplotlib.pyplot as plt
from collections import Counter
import soundfile as sf
import IPython.display as ipd
import csv
import os
import glob

<h1>Generating Initial Features</h1>

In [2]:
def give_music_array_and_sample_rate(audio):
    y, sr = librosa.load(audio)
    return y,sr

In [3]:
def give_tempo_and_beat_frames(y,sr):
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    return tempo,beat_frames

In [4]:
def give_chroma(y,sr):
    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
    return chroma

In [5]:
def give_pitch_magnitude(y,sr):
    pitch, magnitude = librosa.piptrack(y=y, sr=sr)
    return pitch, magnitude

In [6]:
def give_mfcc(y,sr):
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    return mfcc

In [7]:
def give_spectral_contrast(y,sr):
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return spectral_contrast

In [8]:
def give_rms(y):
    rms = librosa.feature.rms(y=y)
    return rms

In [9]:
def give_zero_crossing_rate(y):
    zcr = librosa.feature.zero_crossing_rate(y)
    return zcr

In [10]:
def give_onset_envelope_and_onsets(y,sr):
    onset_envelope = librosa.onset.onset_strength(y=y, sr=sr)
    onsets = librosa.onset.onset_detect(y=y, onset_envelope=onset_envelope, sr=sr)
    return onset_envelope,onsets

<h1>Load Initial Features</h1>

In [11]:
def load_initial_features(audio):
    y,sr = give_music_array_and_sample_rate(audio)
    tempo,beat_frames = give_tempo_and_beat_frames(y,sr)
    chroma = give_chroma(y,sr)
    pitch, magnitude = give_pitch_magnitude(y,sr)
    mfcc = give_mfcc(y,sr)
    spectral_contrast = give_spectral_contrast(y,sr)
    rms = give_rms(y)
    zcr = give_zero_crossing_rate(y)
    onset_envelope,onsets = give_onset_envelope_and_onsets(y,sr)
    
    return y, sr, tempo, beat_frames, chroma, pitch, magnitude, mfcc, spectral_contrast, rms, zcr, onset_envelope, onsets

<h1>Utility Functions</h1>

In [12]:
def util_hz_to_cents(arr):
    res = []
    for i in range(arr.size):
        if(arr[i]>0):
            mul = arr[i]/440
            ans = 1200* (math.log2(mul))
            res.append(ans)
    return res

In [13]:
def util_most_frequent(list1):
    count = Counter(list1)
    return max(count, key=count.get) , max(count.values())

In [14]:
def util_return_key_name(key):
    if key == 0:
        return "C"
    elif key == 1:
        return "C#/Db"
    elif key == 2:
        return "D"
    elif key == 3:
        return "D#/Eb"
    elif key == 4:
        return "E"
    elif key == 5:
        return "F"
    elif key == 6:
        return "F#/Gb"
    elif key == 7:
        return "G"
    elif key == 8:
        return "G#/Ab"
    elif key == 9:
        return "A"
    elif key == 10:
        return "A#/Bb"
    else :
        return "B"

In [15]:
def util_return_mode_type_value(chroma):
    ionian = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1]
    dorian = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0]
    phrygian = [1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0]
    lydian = [1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1]
    mixolydian = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0]
    aeolian = [1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0]
    locrian = [1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0]
    
    chroma_mean = np.mean(chroma, axis=1)
    
    # Compute the correlation between the chroma features and each reference pattern
    corr_ionian = np.corrcoef(chroma_mean, ionian)[0, 1]
    corr_dorian = np.corrcoef(chroma_mean, dorian)[0, 1]
    corr_phrygian = np.corrcoef(chroma_mean, phrygian)[0, 1]
    corr_lydian = np.corrcoef(chroma_mean, lydian)[0, 1]
    corr_mixolydian = np.corrcoef(chroma_mean, mixolydian)[0, 1]
    corr_aeolian = np.corrcoef(chroma_mean, aeolian)[0, 1]
    corr_locrian = np.corrcoef(chroma_mean, locrian)[0, 1]

    # Determine the mode with the highest correlation
    mode_type = np.argmax([corr_ionian, corr_dorian, corr_phrygian, corr_lydian, corr_mixolydian, corr_aeolian, corr_locrian]) 
    return mode_type

In [16]:
def util_return_mode_type_name(mode_type_value):
    if mode_type_value == 0:
        return "ionian"
    elif mode_type_value == 1:
        return "dorian"
    elif mode_type_value == 2:
        return "phrygian"
    elif mode_type_value == 3:
        return "lydian"
    elif mode_type_value == 4:
        return "mixolydian"
    elif mode_type_value == 5:
        return "aeolian"
    else :
        return "locrian"

<h1>Feature Extraction Starts</h1>

In [17]:
def get_number_of_beats(beat_frames):
    n_beats = len(beat_frames)
    return n_beats

In [18]:
def get_hemitonicity(spectral_contrast):
    hemitonicity = np.mean(spectral_contrast)
    return hemitonicity

In [19]:
def get_melodic_range(pitch):
    pitch_sequence = np.argmax(pitch, axis=0)
    cents = util_hz_to_cents(pitch_sequence)
    min_cents     = np.min(cents)
    max_cents     = np.max(cents)
    melodic_range = max_cents - min_cents
    return melodic_range

In [20]:
def get_melisma(mfcc,spectral_contrast,rms,zcr):
    melisma = np.mean(mfcc) * np.mean(spectral_contrast) * np.mean(rms) * np.mean(zcr)
    return melisma

In [21]:
def get_phrase_min_max_mean_overlap(onsets,sr):
    phrase_length = np.diff(onsets)
    phrase_length_sec = phrase_length / sr
    
    max_phrase_len = np.max(phrase_length_sec)
    min_phrase_len = np.min(phrase_length_sec)
    mean_phrase_len = max_phrase_len - min_phrase_len
    
    phrase_symmetry = max_phrase_len/min_phrase_len
    
    overlap = []
    for i in range(len(phrase_length) - 1):
        overlap.append(sum(phrase_length[i:i+2]) / max(phrase_length[i:i+2]))

    num,times = util_most_frequent(overlap)
    phrase_overlap = (times/len(overlap))*100
    
    return max_phrase_len, min_phrase_len, mean_phrase_len, phrase_symmetry, phrase_overlap

In [22]:
def get_key(chroma):
    key = np.argmax(np.mean(chroma, axis=1))
    key_name = util_return_key_name(key)
    return key,key_name

In [23]:
def get_mode(chroma):
    mode = 1 if np.mean(chroma[5:7]) > np.mean(chroma[-5:-3]) else 0
    return mode

In [24]:
def get_mode_type(chroma):
    mode_type_value = util_return_mode_type_value(chroma)
    mode_type_name  = util_return_mode_type_name(mode_type_value)
    return mode_type_value, mode_type_name

In [25]:
def get_tuning(y,sr):
    tuning = librosa.estimate_tuning(y=y,sr=sr)
    return tuning

In [26]:
def get_timbre(y,sr):
    timbre = librosa.feature.spectral_centroid(y=y, sr=sr)
    timbre_mean = np.mean(timbre)
    return timbre, timbre_mean

<h2>Confusing but Important Features</h2>

In [27]:
def get_texture(y,sr):
    ## feature is incomplete
    ## need an R&D
    texture_matrix = librosa.feature.melspectrogram(y=y, sr=sr)
    #texture_matrix = librosa.feature.spectral_contrast(y=y, sr=sr)
    texture = (np.mean(texture_matrix, axis=1))
    return texture

In [28]:
def get_dynamics(y):
    ## feature is incomplete
    ## need an R&D
    dynamics = give_rms(y)
    return dynamics

In [29]:
def get_motif(y,sr):
    ## feature is incomplete
    ## need an R&D
    motif_matrix = librosa.feature.tempogram(y=y, sr=sr)
    motif = (np.mean(motif_matrix, axis=1))
    return motif

<h1>Write CSV</h1>

In [30]:
fieldnames = ["Country","Song Name","Number Of Beats","Tempo","Hemitonicity","Melodic Range","Melisma","Max Phrase Length",
              "Min Phrase Length","Avg Phrase Length","Phrase Symmetry","Phrase Overlap","Key",
              "Mode","Mode Type","Tuning","Timbre","Timbre Mean"]

In [31]:
def generate_dictionary_for_csv(fieldnames,feature_list):
    dict = []
    obj = {}
    for i in range(len(fieldnames)):
        obj[fieldnames[i]] = feature_list[i]
    dict.append(obj)
    return dict  

In [32]:
def has_header(csv_path):
    try:
        with open(csv_path, 'r') as file:
            reader = csv.reader(file)
            header = next(reader)
            return header is not None
    except StopIteration:
        return False

In [33]:
def create_csv(csv_path,feature_list):
    with open(csv_path,mode='a') as file:
        writer = csv.DictWriter(file,fieldnames=fieldnames)
        if not has_header(csv_path):
            writer.writeheader()
        dict = generate_dictionary_for_csv(fieldnames,feature_list)
        writer.writerows(dict)
        file.close()    

In [34]:
def get_features(audio,song_name):
    print(audio)
    ## Loading the initial features
    y, sr, tempo, beat_frames, chroma, pitch, magnitude, mfcc, spectral_contrast, rms, zcr, onset_envelope, onsets = load_initial_features(audio)
    
    ## Getting values of the featues
    n_beats = get_number_of_beats(beat_frames)
    hemitonicity = get_hemitonicity(spectral_contrast)
    melodic_range = get_melodic_range(pitch)
    melisma = get_melisma(mfcc,spectral_contrast,rms,zcr)
    max_phrase_len, min_phrase_len, avg_phrase_len, phrase_symmetry, phrase_overlap = get_phrase_min_max_mean_overlap(onsets,sr)
    key,key_name = get_key(chroma)
    mode = get_mode(chroma)
    mode_type_value,mode_type_name = get_mode_type(chroma)
    tuning = get_tuning(y,sr)
    timbre, timbre_mean = get_timbre(y,sr)
    
    ## Creating feature_list 
    feature_list = []
    feature_list.append(1)
    feature_list.append(song_name)
    feature_list.append(n_beats)
    feature_list.append(tempo)
    feature_list.append(hemitonicity)
    feature_list.append(melodic_range)
    feature_list.append(melisma)
    feature_list.append(max_phrase_len)
    feature_list.append(min_phrase_len)
    feature_list.append(avg_phrase_len)
    feature_list.append(phrase_symmetry)
    feature_list.append(phrase_overlap)
    feature_list.append(key)
    feature_list.append(mode)
    feature_list.append(mode_type_value)
    feature_list.append(tuning)
    feature_list.append(timbre)
    feature_list.append(timbre_mean)
    
    ## Printing the features
#     print("Number Of Beats:", n_beats)
#     print("Tempo:",tempo)
#     print("Hemitonicity:", hemitonicity)
#     print("Melodic Range:",melodic_range)
#     print("Melisma:",melisma)
#     print("Max Phrase Length:",max_phrase_len,"\nMax Phrase Length::",min_phrase_len,"\nAvg Phrase Length::",mean_phrase_len)
#     print("Phrase Symmetry:",phrase_symmetry)
#     print("Phrase Overlap:",phrase_overlap)
#     print("Key:",key_name,"-",key)
#     print("Mode:","In Major Key -" if mode==0 else "In Minor Key -",mode)
#     print("Mode Type:",mode_type_name,"-",mode_type_value)
#     print("Tuning:",tuning)
#     print("Timbre:",timbre,"\nTimbre Mean:",timbre_mean)
    
    return feature_list

In [35]:
audio_path = 'data\\testing\\'
csv_path = 'csvfile.csv'

for audio in glob.glob(audio_path + '*'):
    song_name = audio.split('\\')[-1]
    feature_list = get_features(audio,song_name)
    create_csv(csv_path,feature_list)
    print("\n")

data\testing\Arnob-She je boshe ache.wav


data\testing\Spiritbox - The Mara Effect live at Silverside Sound.wav


data\testing\Take  No Prisoners - Megadeth (original version).wav


data\testing\Vibe - Odhora (with lyrics).wav




In [37]:
import pandas as pd
data = pd.read_csv("csvfile.csv")
data

Unnamed: 0,Country,Song Name,Number Of Beats,Tempo,Hemitonicity,Melodic Range,Melisma,Max Phrase Length,Min Phrase Length,Avg Phrase Length,Phrase Symmetry,Phrase Overlap,Key,Mode,Mode Type,Tuning,Timbre,Timbre Mean
0,1,Arnob-She je boshe ache.wav,437,129.199219,26.446217,5673.504545,-0.641634,0.003628,9.1e-05,0.003537,40.0,19.252078,11,0,3,0.09,[[0. 0. 0. ... 0. 0. 0.]],2605.370422
1,1,Spiritbox - The Mara Effect live at Silverside...,1762,107.666016,24.030729,5673.504545,1.74141,0.008662,9.1e-05,0.008571,95.5,7.413088,6,0,3,0.05,[[ 0. 0. 0. .....,2402.74173
2,1,Take No Prisoners - Megadeth (original versio...,540,161.499023,21.134573,5442.330452,0.952498,0.004263,9.1e-05,0.004172,47.0,15.184049,6,1,6,0.05,[[0. 0. 0. ... 0. 0. 0.]],2411.465939
3,1,Vibe - Odhora (with lyrics).wav,594,129.199219,28.259078,5673.504545,1.488525,0.002766,9.1e-05,0.002676,30.5,13.048246,2,0,3,0.01,[[ 0. 0. 0. .....,2511.507527
