In [4]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display as display
import matplotlib.pyplot as plt
import soundfile as sf
import matplotlib.pyplot as plt
import json
import math
import pickle
from pathlib import Path

In [2]:
Model_Name = 'MEL_Linear_1'

param_file = Model_Name + '_Param.txt'
modelstore = Path.cwd() / 'models'

with open(modelstore / param_file, 'r') as file:
    Feature_Params = json.load(file)
    
Feature_Params    

FileNotFoundError: [Errno 2] No such file or directory: '/home/mirko/git/instrumentdetection/models/MEL_Linear_1_Param.txt'

In [3]:
# Define Parameter Dictionary for Feature Extraction for a new Model

New_Model_Name = 'MEL_Linear_1'

param_file = New_Model_Name + '_Param.txt'


if Path(modelstore / param_file).is_file():
    print ("Modelname is used already!")
    
else:
    print ("Creating new Model File")


    Feature_Params = {}

    # Input DIM for fft = fft_window/2 +1, 3 * sampling_rate / hop_length



    Feature_Params['Description'] = 'Only FFT transformation with 2 channels for real and imaginary part, linear absolute values between 0 and 1'
    Feature_Params['Data'] = '/home/mirko/Downloads/IRMAS_Raw.pkl'
    Feature_Params['Transformation'] = 'FFT_Complex_1'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 2048
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 27
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    # Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])

    Feature_Params['Input_Dim'] = [Frequencies, Frames]

    with open(modelstore / param_file, 'w') as file:
        json.dump(Feature_Params, file)

Creating new Model File


FileNotFoundError: [Errno 2] No such file or directory: '/home/mirko/git/instrumentdetection/models/MEL_Linear_1_Param.txt'

In [47]:
# Read the raw Data

path = Feature_Params['Data']
data = pd.read_pickle(path)
path

'/home/mirko/Downloads/IRMAS_Raw.pkl'

In [48]:
def create_features(data, parameter=Feature_Params):

    desc = parameter['Description']
    Transformation = parameter['Transformation']
    srs = parameter['sampling_rate']
    hl = parameter['hop_length']
    fft = parameter['fft_window']
    n_mel = parameter['no_mel_bin']
    norm = parameter['loudness_normalization']
    shape = parameter['Input_Dim']
    fmin = parameter['fmin']
    fmax = parameter['fmax']
    
    if Transformation == 'MEL_linear':
        print(desc)
        print('Hop_length: ', hl)
        print('Sampling Rate:', srs)
        print('Fast Fourier Window:', fft)
        print('Number of MEL Bins:', n_mel)
        print('Shape of Feature: ', shape)
        print('Minimum Frequency: ', fmin )
        print('Maximum Frequency: ', fmax )
        
        features = MEL_linear(data, srs, hl, fft, n_mel, fmin, fmax)
        features = Scale_0_1(features)
        features = np.expand_dims(features, axis=3) #This adds a channel dimension of 1
        labels = np.array(data['labels'])
    
    if Transformation == 'FFT_Complex_1':
        print(desc)
        print('Hop_length: ', hl)
        print('Sampling Rate:', srs)
        print('Fast Fourier Window:', fft)
        print('Shape of Feature: ', shape)
       
        features = fft_complex(data, srs, hl, fft)
        labels = np.array(data['labels'])
        
    return features, labels
            
        

In [49]:
#Calculate the MEL Spectogramm with given Parameters, returns numpy array

def MEL_linear(data, srs, hl, fft, n_mel, fmin, fmax):
    
    sample_vector = np.array(data['raw_sounds'])
    result = []
    
    for sample in sample_vector:
        ft = librosa.stft(sample, hop_length=hl, n_fft = fft, window='hann') #Calculate Fast Fourier Transform
        D = np.abs(ft)**2 #Calculaing the Power
        mels = librosa.feature.melspectrogram(S=D, sr=srs, n_mels=n_mel, fmin=fmin, fmax=fmax) # Calculate MEL Spectogramm
        result.append(mels)
        
    return np.array(result)

In [50]:
#Calculate complex fft Transformation in 2 channels

def fft_complex(data, srs, hl, fft):
    sample_vector = np.array(data['raw_sounds'])
    result = []
    
    for sample in sample_vector:
        ft = librosa.stft(sample, hop_length=hl, n_fft = fft, window='hann') #Calculate Fast Fourier Transform
        result.append(ft)
        
    return np.array(result)    

In [51]:
# Too simple MinMax Scaler

def Scale_0_1(features):
    maximum = np.max(features)
    minimum = np.min(features)
    features = (features - minimum) / (maximum - minimum)
    
    return features

In [52]:
features, labels = create_features(data, parameter=Feature_Params)

A simple Feature shaped as Matix, frequencies biased to MEL, linear absolute values between 0 and 1
Hop_length:  2048
Sampling Rate: 44100
Fast Fourier Window: 1024
Number of MEL Bins: 13
Shape of Feature:  [13, 65]
Minimum Frequency:  0.0
Maximum Frequency:  11025.0


In [53]:
f_name = Model_Name + '_features.npy' 
l_name = Model_Name + '_labels.npy' 
np.save(modelstore / f_name, features)
np.save(modelstore / l_name, labels)

In [54]:
features.shape

(6705, 13, 65, 1)