In [74]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display as display
import matplotlib.pyplot as plt
import soundfile as sf
import matplotlib.pyplot as plt
import json
import math
import pickle
from pathlib import Path
from DataLoad import load_to_dataframe

In [75]:
# Runtime environment variables

local_Dump = False

modelstore = Path.cwd() / 'models'
if not os.path.exists(modelstore):
    os.makedirs(modelstore)  


In [76]:
# Run this cell if you want to read a model only that exist already


Model_Name = 'MEL_Linear_1'

param_file = Model_Name + '_Param.txt'

Feature_Params = {}

if Path(modelstore / param_file).is_file():

    with open(modelstore / param_file, 'r') as file:
        Feature_Params = json.load(file)
    
else:
    print('No such Model Definition:', Model_Name )

Feature_Params
    

No such Model Definition: MEL_Linear_1


{}

In [77]:
# Define Parameter Dictionary for Feature Extraction for a new Model, run only for a new model

New_Model_Name = 'MEL_Linear_1'

param_file = New_Model_Name + '_Param.txt'


if Path(modelstore / param_file).is_file():
    print ("Modelname is used already!")
    
else:
    print ("Creating new Model File")

 

    Feature_Params = {}

    # Input DIM for fft = fft_window/2 +1, 3 * sampling_rate / hop_length



    Feature_Params['Description'] = 'Only FFT transformation with 2 channels for real and imaginary part, linear absolute values between 0 and 1'
    Feature_Params['Data'] = '/home/mirko/Downloads/IRMAS_Raw.pkl'
    Feature_Params['Transformation'] = 'MEL_linear'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 13
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    # Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(modelstore / param_file, 'w') as file:
        json.dump(Feature_Params, file)

Creating new Model File


In [78]:
# Read the raw Data

if local_Dump:
    
    path = Feature_Params['Data']
    
    if not Path(path).is_file():
        print ('No such Raw Data')
        
    data = pd.read_pickle(path)
    path
    
else:
    df, data, class_dict = load_to_dataframe()

In [79]:
def create_features(data, parameter=Feature_Params):

    desc = parameter['Description']
    Transformation = parameter['Transformation']
    srs = parameter['sampling_rate']
    hl = parameter['hop_length']
    fft = parameter['fft_window']
    n_mel = parameter['no_mel_bin']
    norm = parameter['loudness_normalization']
    shape = parameter['Input_Dim']
    fmin = parameter['fmin']
    fmax = parameter['fmax']
    
    if Transformation == 'MEL_linear':
        print(desc)
        print('Hop_length: ', hl)
        print('Sampling Rate:', srs)
        print('Fast Fourier Window:', fft)
        print('Number of MEL Bins:', n_mel)
        print('Shape of Feature: ', shape)
        print('Minimum Frequency: ', fmin )
        print('Maximum Frequency: ', fmax )
        
        features = MEL_linear(data, srs, hl, fft, n_mel, fmin, fmax)
        features = Scale_0_1(features)
        features = np.expand_dims(features, axis=3) #This adds a channel dimension of 1
        labels = np.array(data['labels'])
    
    if Transformation == 'FFT_Complex_1':
        print(desc)
        print('Hop_length: ', hl)
        print('Sampling Rate:', srs)
        print('Fast Fourier Window:', fft)
        print('Shape of Feature: ', shape)
       
        features = fft_complex(data, srs, hl, fft)
        labels = np.array(data['labels'])
        
    return features, labels
            
        

In [80]:
#Calculate the MEL Spectogramm with given Parameters, returns numpy array

def MEL_linear(data, srs, hl, fft, n_mel, fmin, fmax):
    
    sample_vector = np.array(data['raw_sounds'])
    result = []
    
    for sample in sample_vector:
        ft = librosa.stft(sample, hop_length=hl, n_fft = fft, window='hann') #Calculate Fast Fourier Transform
        D = np.abs(ft)**2 #Calculaing the Power
        mels = librosa.feature.melspectrogram(S=D, sr=srs, n_mels=n_mel, fmin=fmin, fmax=fmax) # Calculate MEL Spectogramm
        result.append(mels)
        
    return np.array(result)

In [81]:
#Calculate complex fft Transformation in 2 channels

def fft_complex(data, srs, hl, fft):
    sample_vector = np.array(data['raw_sounds'])
    result = []
    
    for sample in sample_vector:
        ft = librosa.stft(sample, hop_length=hl, n_fft = fft, window='hann') #Calculate Fast Fourier Transform
        result.append(ft)
        
    return np.array(result)    

In [82]:
# Too simple MinMax Scaler

def Scale_0_1(features):
    maximum = np.max(features)
    minimum = np.min(features)
    features = (features - minimum) / (maximum - minimum)
    
    return features

In [83]:
features, labels = create_features(data, parameter=Feature_Params)

Only FFT transformation with 2 channels for real and imaginary part, linear absolute values between 0 and 1
Hop_length:  1024
Sampling Rate: 44100
Fast Fourier Window: 1024
Number of MEL Bins: 13
Shape of Feature:  [13, 130, 1]
Minimum Frequency:  0.0
Maximum Frequency:  11025.0


ValueError: zero-size array to reduction operation maximum which has no identity

In [84]:
if local_Dump:

    f_name = Model_Name + '_features.npy' 
    l_name = Model_Name + '_labels.npy' 
    np.save(modelstore / f_name, features)
    np.save(modelstore / l_name, labels)

In [85]:
features.shape

(6705, 13, 130, 1)