In [2]:
import numpy as np
import pandas as pd
import os
import librosa
import soundfile as sf
import json
import math
import pickle
from pathlib import Path
from feature_generator import create_features

In [3]:
# Runtime environment variables

local_Dump = False
overwrite = True

data_path =Path('/home/mirko/Downloads/IRMAS_Raw.pkl')

modelstore = Path.cwd() / 'models'
if not os.path.exists(modelstore):
    os.makedirs(modelstore) 

def check_writing(modelname):    
    if Path(modelstore / modelname).is_file():
        print ("Modelname is used already!")
        write_out = False
        if overwrite:
            print('Overwriting File')
            write_out = True
    else:
        print ("Creating new Model File")
        write_out = True

    return write_out
        

# This cell is only for reading of a Parameter Set
---

In [37]:
# Run this cell if you want to read a model only that exist already


Model_Name = 'Time_22050_1.txt'

if Path(modelstore / param_file).is_file():

    with open(modelstore / Model_Name, 'r') as file:
        Feature_Params = json.load(file)
    
else:
    print('No such Model Definition:', Model_Name )

Feature_Params

{'Description': 'Returning only the unmodified time samples, according sampling rate',
 'Data': '/home/mirko/Downloads/IRMAS_Raw.pkl',
 'Transformation': 'Time',
 'sampling_rate': 22050,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 13,
 'loudness_normalization': 'linear',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [66149, 1]}

# MEL Spectograms
___

In [22]:
# Defintion of MEL_Linear Parameter Sets

New_Model_Name = 'MEL_Lineddr_1.txt'

if check_writing(New_Model_Name): 

    Feature_Params = {}

    Feature_Params['Description'] = 'Create MEL Bins with linear loudness values'
    Feature_Params['Transformation'] = 'MEL_linear'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 13
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(modelstore / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'Create MEL Bins with linear loudness values',
 'Data': '/home/mirko/Downloads/IRMAS_Raw.pkl',
 'Transformation': 'MEL_linear',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 13,
 'loudness_normalization': 'linear',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [13, 130, 1]}

# Complex Fast Fourier Transformations
___

In [30]:
# Defintion of FFT_Complex Parameter Sets

New_Model_Name = 'FFT_Complex_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!'
    Feature_Params['Transformation'] = 'FFT_Complex'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 2
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(modelstore / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Modelname is used already!
Overwriting File


{'Description': 'Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!',
 'Transformation': 'FFT_Complex',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [513, 130, 2]}

# Absolute Fast Fourier Transformations
___

In [35]:
# Defintion of FFT_Absolute Value Parameter sets

New_Model_Name = 'FFT_Absolute_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Calculating the absolute Value only of the FFT, normalized between 0 and 1'
    Feature_Params['Transformation'] = 'FFT_Absolut'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(modelstore / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Modelname is used already!
Overwriting File


# Pure resampled or not resampled time domain series
---

In [42]:
# Defintion of resampled Time Series

New_Model_Name = 'Time_22050_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Returning only the unmodified time samples, according sampling rate'
    Feature_Params['Transformation'] = 'Time'
    Feature_Params['sampling_rate'] = 22050
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frames = Feature_Params['length_s'] * Feature_Params['sampling_rate'] - 1
    channels = 1
    Feature_Params['Input_Dim'] = [Frames, channels]

    with open(modelstore / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Modelname is used already!
Overwriting File


{'Description': 'Returning only the unmodified time samples, according sampling rate',
 'Transformation': 'Time',
 'sampling_rate': 22050,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [66149, 1]}

# Mel Frequency Cepstral Coefficients
---

In [6]:
# Defintion of resampled Time Series

New_Model_Name = 'MFCC_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Feature containing the Mel Frequency Cepstral Coefficients'
    Feature_Params['Transformation'] = 'MFCC'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 128
    Feature_Params['no_mfcc'] = 40
    Feature_Params['dct_type'] = 2
    Feature_Params['loudness_normalization'] = ''
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mfcc']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]
    with open(modelstore / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Modelname is used already!
Overwriting File


{'Description': 'Feature containing the Mel Frequency Cepstral Coefficients',
 'Transformation': 'MFCC',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 128,
 'no_mfcc': 40,
 'dct_type': 2,
 'loudness_normalization': '',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [40, 130, 1]}

In [7]:
data = pd.read_pickle(data_path)

In [8]:
feature, label = create_features(data, param_file = 'MFCC_1.txt')

Feature containing the Mel Frequency Cepstral Coefficients
Hop_length:  1024
Sampling Rate: 44100
Fast Fourier Window: 1024
Number of MEL Bins: 128
Number of Cepstral Coefficients:  40
Typer of discrete cosinus transform:  2
Shape of Feature:  [40, 130, 1]
Minimum Frequency:  0.0
Maximum Frequency:  11025.0


In [9]:
print(feature.shape)
print(label.shape)

(6705, 40, 130, 1)
(6705,)


In [12]:
feature[1]

array([[[0.52570164],
        [0.5559505 ],
        [0.55496395],
        ...,
        [0.45238218],
        [0.4229701 ],
        [0.47480527]],

       [[0.8203895 ],
        [0.8393347 ],
        [0.8361132 ],
        ...,
        [0.8452826 ],
        [0.8727619 ],
        [0.8785456 ]],

       [[0.6976082 ],
        [0.7394217 ],
        [0.74330336],
        ...,
        [0.7251199 ],
        [0.7282015 ],
        [0.7068402 ]],

       ...,

       [[0.7298024 ],
        [0.73496306],
        [0.7399664 ],
        ...,
        [0.70992345],
        [0.7326841 ],
        [0.72941643]],

       [[0.7215182 ],
        [0.7248754 ],
        [0.72925353],
        ...,
        [0.7125723 ],
        [0.7174738 ],
        [0.7271651 ]],

       [[0.729835  ],
        [0.7326011 ],
        [0.7264479 ],
        ...,
        [0.7280678 ],
        [0.72397894],
        [0.7402925 ]]], dtype=float32)

In [None]:
if local_Dump:

    f_name = Model_Name + '_features.npy' 
    l_name = Model_Name + '_labels.npy' 
    np.save(modelstore / f_name, features)
    np.save(modelstore / l_name, labels)