In [1]:
# Usual Libraries
import pandas as pd
import numpy as np


# Librosa (the mother of audio files)
import librosa
import warnings
warnings.filterwarnings('ignore')

from xgboost import XGBClassifier

In [59]:
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
my_dict = dict(zip(range(len(genres)),genres))
filename = 'genres_original/jazz/jazz.00027.wav'
filedir = 'Data/' + filename

In [60]:
# Extract Features and saves as dataframe

def extrackt(filedir, filename, genre):
    offset = 0
    duration=3
    go = True
    i=0
    csv = []
    length = 0
    length_audio = librosa.get_duration(filename = filedir)
    
    while(go):
        y, sr = librosa.load(filedir, offset=offset, duration=duration)
        if (length != len(y) and length !=0) or (offset + duration) >= length_audio:
            i=0
            break
        length = len(y)
        offset += duration
        
        #________ chroma_stft _______
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_stft_mean = np.mean(chroma_stft)
        chroma_stft_var = np.var(chroma_stft)
        #______rms _____
        rms =  librosa.feature.rms(y=y)
        rms_mean = np.mean(rms)
        rms_var = np.var(rms)
        #______spectral_centroid _____
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_centroid_mean = np.mean(spectral_centroid)
        spectral_centroid_var = np.var(spectral_centroid)
        #______spectral_bandwidth______
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_bandwidth_mean = np.mean(spectral_bandwidth)
        spectral_bandwidth_var = np.var(spectral_bandwidth)
        #_____rolloff_______
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        rolloff_mean = np.mean(rolloff)
        rolloff_var = np.var(rolloff)
        #_____zero_crossing_rate______
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
        zero_crossing_rate_mean = np.mean(zero_crossing_rate)
        zero_crossing_rate_var = np.var(zero_crossing_rate)
        #_____harmony_____
        harmony = librosa.effects.harmonic(y)
        harmony_mean = np.mean(harmony)
        harmony_var = np.var(harmony)
        #_____perceptr____
        C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('A1')))
        freqs = librosa.cqt_frequencies(C.shape[0], fmin=librosa.note_to_hz('A1'))
        perceptr = librosa.perceptual_weighting(C**2, freqs, ref=np.max)
        perceptr_mean = np.mean(perceptr)
        perceptr_var = np.var(perceptr)
        #_____tempo______
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)[0]
        #_____mfcc______
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        mdict = {"name":filename,
                "name_v":filename+"."+str(i),
                "filedir":filedir,
                "genre":genre,
                "length":length,
                "chroma_stft_mean" : chroma_stft_mean,
                "chroma_stft_var":chroma_stft_var,
                "rms_mean": rms_mean,
                "rms_var":rms_var,
                "spectral_centroid_mean": spectral_centroid_mean,
                "spectral_centroid_var" : spectral_centroid_var,
                "spectral_bandwidth_mean":spectral_bandwidth_mean,
                "spectral_bandwidth_var": spectral_bandwidth_var, 
                "rolloff_mean" : rolloff_mean,
                "rolloff_var": rolloff_var ,
                "zero_crossing_rate_mean" : zero_crossing_rate_mean,
                "zero_crossing_rate_var":zero_crossing_rate_var,
                "harmony_mean":harmony_mean,
                "harmony_var":harmony_var,
                "perceptr_mean":perceptr_mean,
                "perceptr_var":perceptr_var,
                "tempo":tempo,}
        
        for index, a in enumerate(mfcc, start = 1):
            mdict["mfcc"+str(index)+"_mean"] = np.mean(a)
            mdict["mfcc"+str(index)+"_var"] = np.var(a)
            
        csv.append(mdict)
        i+=1
    return csv
    
df = pd.DataFrame(extrackt(filedir, "own_song", "unknown"))
data = df.iloc[0:, 4:] 

In [61]:
# Load Final model
from sklearn import preprocessing

#Prediction of first line
xgb = XGBClassifier()
xgb.load_model("xgb_model.txt")
preds = xgb.predict(data)
print(preds)
 # Use frequent value
prediction=my_dict[np.bincount(preds).argmax()]

print("The predicted genre is:    " + prediction)

[5 5 5 5 5 5 5 5 5 5]
The predicted genre is:    jazz
