In [40]:
import librosa
import numpy as np
import math
import librosa.display as display
import matplotlib.pyplot as plt
from collections import Counter
import soundfile as sf
import IPython.display as ipd
import csv
import os
import glob

<h1>Generating Initial Features</h1>

In [41]:
def give_music_array_and_sample_rate(audio):
    y, sr = librosa.load(audio)
    return y,sr

In [42]:
def give_tempo_and_beat_frames(y,sr):
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    return tempo,beat_frames

In [43]:
def give_chroma(y,sr):
    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
    return chroma

In [44]:
def give_pitch_magnitude(y,sr):
    pitch, magnitude = librosa.piptrack(y=y, sr=sr)
    return pitch, magnitude

In [45]:
def give_mfcc(y,sr):
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    return mfcc

In [46]:
def give_spectral_contrast(y,sr):
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return spectral_contrast

In [47]:
def give_rms(y):
    rms = librosa.feature.rms(y=y)
    return rms

In [48]:
def give_zero_crossing_rate(y):
    zcr = librosa.feature.zero_crossing_rate(y)
    return zcr

In [49]:
def give_onset_envelope_and_onsets(y,sr):
    onset_envelope = librosa.onset.onset_strength(y=y, sr=sr)
    onsets = librosa.onset.onset_detect(y=y, onset_envelope=onset_envelope, sr=sr)
    return onset_envelope,onsets

<h1>Load Initial Features</h1>

In [50]:
def load_initial_features(audio):
    y,sr = give_music_array_and_sample_rate(audio)
    tempo,beat_frames = give_tempo_and_beat_frames(y,sr)
    chroma = give_chroma(y,sr)
    pitch, magnitude = give_pitch_magnitude(y,sr)
    mfcc = give_mfcc(y,sr)
    spectral_contrast = give_spectral_contrast(y,sr)
    rms = give_rms(y)
    zcr = give_zero_crossing_rate(y)
    onset_envelope,onsets = give_onset_envelope_and_onsets(y,sr)
    
    return y, sr, tempo, beat_frames, chroma, pitch, magnitude, mfcc, spectral_contrast, rms, zcr, onset_envelope, onsets

<h1>Utility Functions</h1>

In [51]:
def util_hz_to_cents(arr):
    res = []
    for i in range(arr.size):
        if(arr[i]>0):
            mul = arr[i]/440
            ans = 1200* (math.log2(mul))
            res.append(ans)
    return res

In [52]:
def util_most_frequent(list1):
    count = Counter(list1)
    return max(count, key=count.get) , max(count.values())

In [53]:
def util_return_key_name(key):
    if key == 0:
        return "C"
    elif key == 1:
        return "C#/Db"
    elif key == 2:
        return "D"
    elif key == 3:
        return "D#/Eb"
    elif key == 4:
        return "E"
    elif key == 5:
        return "F"
    elif key == 6:
        return "F#/Gb"
    elif key == 7:
        return "G"
    elif key == 8:
        return "G#/Ab"
    elif key == 9:
        return "A"
    elif key == 10:
        return "A#/Bb"
    else :
        return "B"

In [54]:
def util_return_mode_type_value(chroma):
    ionian = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1]
    dorian = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0]
    phrygian = [1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0]
    lydian = [1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1]
    mixolydian = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0]
    aeolian = [1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0]
    locrian = [1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0]
    
    chroma_mean = np.mean(chroma, axis=1)
    
    # Compute the correlation between the chroma features and each reference pattern
    corr_ionian = np.corrcoef(chroma_mean, ionian)[0, 1]
    corr_dorian = np.corrcoef(chroma_mean, dorian)[0, 1]
    corr_phrygian = np.corrcoef(chroma_mean, phrygian)[0, 1]
    corr_lydian = np.corrcoef(chroma_mean, lydian)[0, 1]
    corr_mixolydian = np.corrcoef(chroma_mean, mixolydian)[0, 1]
    corr_aeolian = np.corrcoef(chroma_mean, aeolian)[0, 1]
    corr_locrian = np.corrcoef(chroma_mean, locrian)[0, 1]

    # Determine the mode with the highest correlation
    mode_type = np.argmax([corr_ionian, corr_dorian, corr_phrygian, corr_lydian, corr_mixolydian, corr_aeolian, corr_locrian]) 
    return mode_type

In [55]:
def util_return_mode_type_name(mode_type_value):
    if mode_type_value == 0:
        return "ionian"
    elif mode_type_value == 1:
        return "dorian"
    elif mode_type_value == 2:
        return "phrygian"
    elif mode_type_value == 3:
        return "lydian"
    elif mode_type_value == 4:
        return "mixolydian"
    elif mode_type_value == 5:
        return "aeolian"
    else :
        return "locrian"

<h1>Feature Extraction Starts</h1>

In [56]:
def get_number_of_beats(beat_frames):
    n_beats = len(beat_frames)
    return n_beats

In [57]:
def get_hemitonicity(spectral_contrast):
    hemitonicity = np.mean(spectral_contrast)
    return hemitonicity

In [58]:
def get_melodic_range(pitch):
    pitch_sequence = np.argmax(pitch, axis=0)
    cents = util_hz_to_cents(pitch_sequence)
    min_cents     = np.min(cents)
    max_cents     = np.max(cents)
    melodic_range = max_cents - min_cents
    return melodic_range

In [59]:
def get_melisma(mfcc,spectral_contrast,rms,zcr):
    melisma = np.mean(mfcc) * np.mean(spectral_contrast) * np.mean(rms) * np.mean(zcr)
    return melisma

In [60]:
def get_phrase_min_max_mean_overlap(onsets,sr):
    phrase_length = np.diff(onsets)
    phrase_length_sec = phrase_length / sr
    
    max_phrase_len = np.max(phrase_length_sec)
    min_phrase_len = np.min(phrase_length_sec)
    mean_phrase_len = max_phrase_len - min_phrase_len
    
    phrase_symmetry = max_phrase_len/min_phrase_len
    
    overlap = []
    for i in range(len(phrase_length) - 1):
        overlap.append(sum(phrase_length[i:i+2]) / max(phrase_length[i:i+2]))

    num,times = util_most_frequent(overlap)
    phrase_overlap = (times/len(overlap))*100
    
    return max_phrase_len, min_phrase_len, mean_phrase_len, phrase_symmetry, phrase_overlap

In [61]:
def get_key(chroma):
    key = np.argmax(np.mean(chroma, axis=1))
    key_name = util_return_key_name(key)
    return key,key_name

In [62]:
def get_mode(chroma):
    mode = 1 if np.mean(chroma[5:7]) > np.mean(chroma[-5:-3]) else 0
    return mode

In [63]:
def get_mode_type(chroma):
    mode_type_value = util_return_mode_type_value(chroma)
    mode_type_name  = util_return_mode_type_name(mode_type_value)
    return mode_type_value, mode_type_name

In [64]:
def get_tuning(y,sr):
    tuning = librosa.estimate_tuning(y=y,sr=sr)
    return tuning

In [65]:
def get_timbre(y,sr):
    timbre = librosa.feature.spectral_centroid(y=y, sr=sr)
    timbre_mean = np.mean(timbre)
    return timbre, timbre_mean

<h2>Confusing but Important Features</h2>

In [66]:
def get_texture(y,sr):
    ## feature is incomplete
    ## need an R&D
    texture_matrix = librosa.feature.melspectrogram(y=y, sr=sr)
    #texture_matrix = librosa.feature.spectral_contrast(y=y, sr=sr)
    texture = (np.mean(texture_matrix, axis=1))
    return texture..

In [67]:
def get_dynamics(y):
    ## feature is incomplete
    ## need an R&D
    dynamics = give_rms(y)
    return dynamics

In [68]:
def get_motif(y,sr):
    ## feature is incomplete
    ## need an R&D
    motif_matrix = librosa.feature.tempogram(y=y, sr=sr)
    motif = (np.mean(motif_matrix, axis=1))
    return motif

<h1>Write CSV</h1>

In [None]:
fieldnames = ["Country","Song Name","Number Of Beats","Tempo","Hemitonicity","Melodic Range","Melisma","Max Phrase Length",
              "Min Phrase Length","Avg Phrase Length","Phrase Symmetry","Phrase Overlap","Key",
              "Mode","Mode Type","Tuning","Timbre","Timbre Mean"]

In [70]:
def generate_dictionary_for_csv(fieldnames,feature_list):
    dict = []
    obj = {}
    for i in range(len(fieldnames)):
        obj[fieldnames[i]] = feature_list[i]
    dict.append(obj)
    return dict  

In [71]:
def has_header(csv_path):
    try:
        with open(csv_path, 'r', encoding='utf-8') as file:
            reader = csv.reader(file)
            header = next(reader)
            return header is not None
    except StopIteration:
        return False

In [72]:
def create_csv(csv_path,feature_list):
    with open(csv_path, mode='a', encoding="utf-8") as file:
        writer = csv.DictWriter(file,fieldnames=fieldnames)
        if not has_header(csv_path):
            writer.writeheader()
        dict = generate_dictionary_for_csv(fieldnames,feature_list)
        writer.writerows(dict)
        file.close()    

In [99]:
def get_features(audio,song_name,country_type,iteration_no):
    print(iteration_no,song_name)
    ## Loading the initial features
    y, sr, tempo, beat_frames, chroma, pitch, magnitude, mfcc, spectral_contrast, rms, zcr, onset_envelope, onsets = load_initial_features(audio)
    
    ## Getting values of the featues
    n_beats = get_number_of_beats(beat_frames)
    hemitonicity = get_hemitonicity(spectral_contrast)
    melodic_range = get_melodic_range(pitch)
    melisma = get_melisma(mfcc,spectral_contrast,rms,zcr)
    max_phrase_len, min_phrase_len, avg_phrase_len, phrase_symmetry, phrase_overlap = get_phrase_min_max_mean_overlap(onsets,sr)
    key,key_name = get_key(chroma)
    mode = get_mode(chroma)
    mode_type_value,mode_type_name = get_mode_type(chroma)
    tuning = get_tuning(y,sr)
    timbre, timbre_mean = get_timbre(y,sr)
    
    ## Creating feature_list 
    feature_list = []
    feature_list.append(country_type)
    feature_list.append(song_name)
    feature_list.append(n_beats)
    feature_list.append(tempo)
    feature_list.append(hemitonicity)
    feature_list.append(melodic_range)
    feature_list.append(melisma)
    feature_list.append(max_phrase_len)
    feature_list.append(min_phrase_len)
    feature_list.append(avg_phrase_len)
    feature_list.append(phrase_symmetry)
    feature_list.append(phrase_overlap)
    feature_list.append(key)
    feature_list.append(mode)
    feature_list.append(mode_type_value)
    feature_list.append(tuning)
    feature_list.append(timbre)
    feature_list.append(timbre_mean)
    
    ## Printing the features
#     print("Number Of Beats:", n_beats)
#     print("Tempo:",tempo)
#     print("Hemitonicity:", hemitonicity)
#     print("Melodic Range:",melodic_range)
#     print("Melisma:",melisma)
#     print("Max Phrase Length:",max_phrase_len,"\nMax Phrase Length::",min_phrase_len,"\nAvg Phrase Length::",mean_phrase_len)
#     print("Phrase Symmetry:",phrase_symmetry)
#     print("Phrase Overlap:",phrase_overlap)
#     print("Key:",key_name,"-",key)
#     print("Mode:","In Major Key -" if mode==0 else "In Minor Key -",mode)
#     print("Mode Type:",mode_type_name,"-",mode_type_value)
#     print("Tuning:",tuning)
#     print("Timbre:",timbre,"\nTimbre Mean:",timbre_mean)
    
    return feature_list

In [100]:
bangladesh_song_path = 'Music Downloader & Converter\\bangladesh_song_folder\\'
kolkata_song_path = 'Music Downloader & Converter\\kolkata_song_folder\\'
bangladesh_song_csv = 'bangladesh_csvfile.csv'
kolkata_song_csv = 'kolkata_csvfile.csv'
bangladesh = 1
kolkata = 0
country_type = kolkata
audio_path = kolkata_song_path
csv_path = kolkata_song_csv
iteration_no = 0

In [101]:
for audio in glob.glob(audio_path + '*.wav'):
    song_name = audio.split('\\')[-1]
    iteration_no += 1
    feature_list = get_features(audio,song_name,country_type,iteration_no)
    create_csv(csv_path,feature_list)
    print("\n")

1 Amar Bhindeshi Tara - Chondrobindu (Lyrics).wav


2 Amar Priya Cafe by Moheener Ghoraguli.wav


3 Ami ek garib premik neela  আমি এক গরীব প্রেমিক নীলা  Argha dev  Lyrical Video.wav


4 Amra Bhison EkaCactus আমরা ভীষণ একা ক্যাকটাস LYRICS.wav


5 Anjan Dutta - Bhogoban Jaane  অঞ্জন দত্ত - ভগবান জানে  With Lyric  LyricsDekho.wav


6 Aro Ekber  Fossils  Audio Song  Rupam Islam.wav


7 Baari Esho.wav


8 Bela Bose by Anjan Dutta 1994.wav


9 Bhalo Lage - Moheener Ghoraguli II ভালোবাসি জ্যোৎস্নায় কাশবনে ছুটতে (Lyrics).wav


10 Bhalo Theko - Recreated  ভালো থেকো  Cactuss  New Band Song.wav


11 Bishakto Chumbon OFFICIAL MUSIC VIDEO  EESHAAN  ALBUM  SANKRAMAN.wav


12 Bnaador.wav


13 Bondhu Tomay - Chandrabindu.wav


14 Brishti Uh Ma with lyrics  Cactus Bengali Band Songs  Cactus.wav


15 Calcium With Lyrics  Anjan Dutta  Purono Guitar Modern Songs Anjan Dutta.wav


16 Chandrabindu   Mon   Lyrics.wav


17 Chandrabindu  - ghum ghum classroom.wav


18 Chandrabindu - Juju.wav


19 Chi Chi Chi 

In [102]:
import pandas as pd
data = pd.read_csv(csv_path)
data

Unnamed: 0,Country,Song Name,Number Of Beats,Tempo,Hemitonicity,Melodic Range,Melisma,Max Phrase Length,Min Phrase Length,Avg Phrase Length,Phrase Symmetry,Phrase Overlap,Key,Mode,Mode Type,Tuning,Timbre,Timbre Mean
0,0,Amar Bhindeshi Tara - Chondrobindu (Lyrics).wav,496,99.384014,27.764351,5664.146501,-0.101531,0.005533,0.000091,0.005442,61.0,8.316832,4,1,3,-0.07,[[0. 0. 0. ... 0. 0. 0.]],901.820891
1,0,Amar Priya Cafe by Moheener Ghoraguli.wav,545,95.703125,27.670139,5673.504545,-1.204248,0.009751,0.000091,0.009660,107.5,6.336088,9,0,0,0.21,[[ 0. 0. 0. .....,1382.374804
2,0,Ami ek garib premik neela আমি এক গরীব প্রেমিক...,528,129.199219,28.686027,5554.061737,-1.632554,0.002540,0.000091,0.002449,28.0,10.983982,11,0,3,0.02,[[0. 0. 0. ... 0. 0. 0.]],1322.298105
3,0,Amra Bhison EkaCactus আমরা ভীষণ একা ক্যাকটাস L...,395,86.132812,27.164264,5673.504545,-1.814555,0.009841,0.000091,0.009751,108.5,16.791604,3,0,6,-0.05,[[0. 0. 0. ... 0. 0. 0.]],2428.018683
4,0,Anjan Dutta - Bhogoban Jaane অঞ্জন দত্ত - ভগব...,760,129.199219,26.189592,4891.012510,0.479927,0.003991,0.000091,0.003900,44.0,7.054337,7,0,3,0.00,[[0. 0. 0. ... 0. 0. 0.]],1062.452775
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,0,Telephone by Moheener Ghoraguli.wav,693,107.666016,27.193383,5673.504545,-0.198035,0.002177,0.000091,0.002086,24.0,11.400153,4,0,0,0.03,[[ 0. 0. 0. .....,1896.084445
87,0,Tomay Dilam by Moheener Ghoraguli.wav,354,86.132812,27.017408,5442.330452,0.036489,0.004399,0.000091,0.004308,48.5,6.451613,10,1,6,0.00,[[0. 0. 0. ... 0. 0. 0.]],1470.205104
88,0,Tumi Na Thakle with lyrics Anjan Dutta & Usha...,593,129.199219,26.578651,5673.504545,1.475949,0.012381,0.000091,0.012290,136.5,7.479508,4,1,3,0.05,[[ 0. 0. 0. .....,2202.549519
89,0,একটা ছেলে। Akta chele by sahana-(lyrics).wav,493,143.554688,29.132384,5442.330452,-0.903691,0.006395,0.000091,0.006304,70.5,11.514196,2,0,3,0.02,[[ 0. 0. 0. .....,1622.834309
