In [1]:
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import os
import librosa
import soundfile as sf
import json
import math
import pickle
from pathlib import Path
from feature_generator import create_features

In [2]:
# Runtime environment variables

local_Dump = False
overwrite = True

data_path =Path('/home/mirko/Downloads/IRMAS_Raw.pkl')


feature_store = Path.cwd() / 'features'
if not os.path.exists(feature_store):
    os.makedirs(feature_store)     

def check_writing(modelname):    
    if Path(feature_store / modelname).is_file():
        print ("Modelname is used already!")
        write_out = False
        if overwrite:
            print('Overwriting File')
            write_out = True
    else:
        print ("Creating new Model File")
        write_out = True

    return write_out
        

# This cell is only for reading of a Parameter Set
---

In [37]:
# Run this cell if you want to read a model only that exist already


Model_Name = 'Time_22050_1.txt'

if Path(feature_store / param_file).is_file():

    with open(feature_store / Model_Name, 'r') as file:
        Feature_Params = json.load(file)
    
else:
    print('No such Model Definition:', Model_Name )

Feature_Params

{'Description': 'Returning only the unmodified time samples, according sampling rate',
 'Data': '/home/mirko/Downloads/IRMAS_Raw.pkl',
 'Transformation': 'Time',
 'sampling_rate': 22050,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 13,
 'loudness_normalization': 'linear',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [66149, 1]}

# MEL Power-Spectograms
___

In [3]:
# Defintion of MEL_Linear Parameter Sets

New_Model_Name = 'MEL_Lin_Lores.txt'

if check_writing(New_Model_Name): 

    Feature_Params = {}

    Feature_Params['Description'] = 'Create MEL Bins with linear loudness values'
    Feature_Params['Transformation'] = 'MEL_linear'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 64
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'Create MEL Bins with linear loudness values',
 'Transformation': 'MEL_linear',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 64,
 'loudness_normalization': 'linear',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [64, 130, 1]}

# MEL dB-Spectograms
___

In [10]:
# Defintion of MEL_dB calculated Parameter Sets

New_Model_Name = 'MEL_dB_Percussion.txt'

if check_writing(New_Model_Name): 

    Feature_Params = {}

    Feature_Params['Description'] = 'Create MEL Bins with dB loudness values'
    Feature_Params['Transformation'] = 'MEL_dB'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 128
    Feature_Params['loudness_normalization'] = 'dB'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3
    Feature_Params['content'] = 'percuss'

    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'Create MEL Bins with dB loudness values',
 'Transformation': 'MEL_dB',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 128,
 'loudness_normalization': 'dB',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'content': 'percuss',
 'Input_Dim': [128, 130, 1]}

# MEL dB-Spectograms Decomposed
___

In [7]:
# Defintion of MEL_dB calculated Parameter Sets

New_Model_Name = 'MEL_dB_decompose_HP.txt'

if check_writing(New_Model_Name): 

    Feature_Params = {}

    Feature_Params['Description'] = 'MEL Spectograms for Harmonic and Percussive'
    Feature_Params['Transformation'] = 'MEL_dB_decompose'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 128
    Feature_Params['loudness_normalization'] = 'dB'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3
    Feature_Params['content'] = 'decomposed'
    Feature_Params['margin'] = 3

    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 2
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'MEL Spectograms for Harmonic and Percussive',
 'Transformation': 'MEL_dB_decompose',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 128,
 'loudness_normalization': 'dB',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'content': 'decomposed',
 'margin': 3,
 'Input_Dim': [128, 130, 2]}

# MEL Complex dB-Spectograms
___

In [7]:
# Defintion of MEL_dB calculated Parameter Sets

New_Model_Name = 'MEL_Complex dB.txt'

if check_writing(New_Model_Name): 

    Feature_Params = {}

    Feature_Params['Description'] = 'Create MEL Bins with dB loudness values separate for real and imaginary part'
    Feature_Params['Transformation'] = 'MEL_dB_complex'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 128
    Feature_Params['loudness_normalization'] = 'dB'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 2
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Modelname is used already!
Overwriting File


{'Description': 'Create MEL Bins with dB loudness values separate for real and imaginary part',
 'Transformation': 'MEL_dB_complex',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 128,
 'loudness_normalization': 'dB',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [128, 130, 1]}

# Complex Fast Fourier Transformations
___

In [12]:
# Defintion of FFT_Complex Parameter Sets

New_Model_Name = 'FFT_Complex.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!'
    Feature_Params['Transformation'] = 'FFT_Complex'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 2
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!',
 'Transformation': 'FFT_Complex',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [513, 130, 2]}

# Absolute Fast Fourier Transformations Linear
___

In [9]:
# Defintion of FFT_Absolute Value Parameter sets

New_Model_Name = 'FFT_Absolute_LIN.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Calculating the absolute Value only of the FFT, normalized between 0 and 1'
    Feature_Params['Transformation'] = 'FFT_Absolut'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Creating new Model File


{'Description': 'Calculating the absolute Value only of the FFT, normalized between 0 and 1',
 'Transformation': 'FFT_Absolut',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [513, 130, 1]}

# Absolute Fast Fourier Transformations dB
___

In [11]:
# Defintion of FFT_Absolute Value Parameter sets

New_Model_Name = 'FFT_Absolute_dB_lores.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Calculating the absolute Value only of the FFT, normalized between 0 and 1'
    Feature_Params['Transformation'] = 'FFT_Absolut'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 512
    Feature_Params['fft_window'] = 512
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'dB'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Creating new Model File


{'Description': 'Calculating the absolute Value only of the FFT, normalized between 0 and 1',
 'Transformation': 'FFT_Absolut',
 'sampling_rate': 44100,
 'hop_length': 512,
 'fft_window': 512,
 'no_mel_bin': '',
 'loudness_normalization': 'dB',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [257, 259, 1]}

In [5]:
X_train, X_test, = train_test_split(data, test_size=0.3, stratify=data['labels'])
X_train.to_pickle(p_home / (Dataset + '_train'))
X_test.to_pickle(p_home / (Dataset + '_test'))
count, dict_ = wave_manipulator.analyze(X_train)

# Pure resampled or not resampled time domain series
---

In [42]:
# Defintion of resampled Time Series

New_Model_Name = 'Time_22050_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Returning only the unmodified time samples, according sampling rate'
    Feature_Params['Transformation'] = 'Time'
    Feature_Params['sampling_rate'] = 22050
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frames = Feature_Params['length_s'] * Feature_Params['sampling_rate'] - 1
    channels = 1
    Feature_Params['Input_Dim'] = [Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Modelname is used already!
Overwriting File


{'Description': 'Returning only the unmodified time samples, according sampling rate',
 'Transformation': 'Time',
 'sampling_rate': 22050,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [66149, 1]}

# Mel Frequency Cepstral Coefficients (MELCC)
---

In [13]:
# Defintion of resampled Time Series

New_Model_Name = 'MFCC_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Feature containing the Mel Frequency Cepstral Coefficients'
    Feature_Params['Transformation'] = 'MFCC'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 128
    Feature_Params['no_mfcc'] = 40
    Feature_Params['dct_type'] = 2
    Feature_Params['loudness_normalization'] = ''
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mfcc']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]
    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Creating new Model File


{'Description': 'Feature containing the Mel Frequency Cepstral Coefficients',
 'Transformation': 'MFCC',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 128,
 'no_mfcc': 40,
 'dct_type': 2,
 'loudness_normalization': '',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [40, 130, 1]}

# Chromagram
---

In [5]:
# Defintion of resampled Time Series

New_Model_Name = 'Chroma_1D.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Feature containing the Mel Frequency Cepstral Coefficients'
    Feature_Params['Transformation'] = 'chroma'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 128
    Feature_Params['no_mfcc'] = 40
    Feature_Params['no_chroma'] = 12
    Feature_Params['chroma_tune'] = None
    Feature_Params['dct_type'] = 2
    Feature_Params['loudness_normalization'] = ''
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3
    Feature_Params['flatten'] = True

    Frequencies = Feature_Params['no_mfcc']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]
    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Creating new Model File


{'Description': 'Feature containing the Mel Frequency Cepstral Coefficients',
 'Transformation': 'chroma',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 128,
 'no_mfcc': 40,
 'no_chroma': 12,
 'chroma_tune': None,
 'dct_type': 2,
 'loudness_normalization': '',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'flatten': True,
 'Input_Dim': [40, 130, 1]}

In [4]:
data = pd.read_pickle(data_path)

In [8]:
feature, label = create_features(data, param_file = 'MEL_dB_decompose_HP.txt')

MEL Spectograms for Harmonic and Percussive
Hop_length:  1024
Sampling Rate: 44100
Fast Fourier Window: 1024
Number of MEL Bins: 128
Shape of Feature:  [128, 130, 2]
Minimum Frequency:  0.0
Maximum Frequency:  11025.0
Content:  decomposed
Margin:  3


In [9]:
feature.shape

(6705, 128, 130, 2)

In [5]:
feature, label = create_features(data, param_file = 'FFT_Complex_224x224.txt', suffix='_test')

Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!
Hop_length:  592
Sampling Rate: 44100
Fast Fourier Window: 446
Shape of Feature:  [224, 224, 2]


In [21]:
print(feature.shape)
print(label.shape)

(6705, 513, 130, 2)
(6705,)
