In [6]:
import numpy as np
import pandas as pd
import os
import librosa
import soundfile as sf
import json
import math
import pickle
from pathlib import Path
from feature_generator import create_features

In [7]:
# Runtime environment variables

local_Dump = False
overwrite = True

data_path =Path('/home/mirko/Downloads/IRMAS_Raw.pkl')


feature_store = Path.cwd() / 'features'
if not os.path.exists(feature_store):
    os.makedirs(feature_store)     

def check_writing(modelname):    
    if Path(feature_store / modelname).is_file():
        print ("Modelname is used already!")
        write_out = False
        if overwrite:
            print('Overwriting File')
            write_out = True
    else:
        print ("Creating new Model File")
        write_out = True

    return write_out
        

# This cell is only for reading of a Parameter Set
---

In [37]:
# Run this cell if you want to read a model only that exist already


Model_Name = 'Time_22050_1.txt'

if Path(feature_store / param_file).is_file():

    with open(feature_store / Model_Name, 'r') as file:
        Feature_Params = json.load(file)
    
else:
    print('No such Model Definition:', Model_Name )

Feature_Params

{'Description': 'Returning only the unmodified time samples, according sampling rate',
 'Data': '/home/mirko/Downloads/IRMAS_Raw.pkl',
 'Transformation': 'Time',
 'sampling_rate': 22050,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 13,
 'loudness_normalization': 'linear',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [66149, 1]}

# MEL Power-Spectograms
___

In [8]:
# Defintion of MEL_Linear Parameter Sets

New_Model_Name = 'MEL_Lin_Lores.txt'

if check_writing(New_Model_Name): 

    Feature_Params = {}

    Feature_Params['Description'] = 'Create MEL Bins with linear loudness values'
    Feature_Params['Transformation'] = 'MEL_linear'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 64
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'Create MEL Bins with linear loudness values',
 'Transformation': 'MEL_linear',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 64,
 'loudness_normalization': 'linear',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [64, 130, 1]}

# MEL dB-Spectograms
___

In [9]:
# Defintion of MEL_dB calculated Parameter Sets

New_Model_Name = 'MEL_dB_Lores.txt'

if check_writing(New_Model_Name): 

    Feature_Params = {}

    Feature_Params['Description'] = 'Create MEL Bins with dB loudness values'
    Feature_Params['Transformation'] = 'MEL_dB'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 64
    Feature_Params['loudness_normalization'] = 'dB'
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mel_bin']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'Create MEL Bins with dB loudness values',
 'Transformation': 'MEL_dB',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 64,
 'loudness_normalization': 'dB',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [64, 130, 1]}

# Complex Fast Fourier Transformations
___

In [18]:
# Defintion of FFT_Complex Parameter Sets

New_Model_Name = 'FFT_Complex_224x224.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!'
    Feature_Params['Transformation'] = 'FFT_Complex'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 592
    Feature_Params['fft_window'] = 446
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 2
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params       

Creating new Model File


{'Description': 'Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!',
 'Transformation': 'FFT_Complex',
 'sampling_rate': 44100,
 'hop_length': 592,
 'fft_window': 446,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [224, 224, 2]}

# Absolute Fast Fourier Transformations
___

In [14]:
# Defintion of FFT_Absolute Value Parameter sets

New_Model_Name = 'FFT_Absolute_224x224.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Calculating the absolute Value only of the FFT, normalized between 0 and 1'
    Feature_Params['Transformation'] = 'FFT_Absolut'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 592
    Feature_Params['fft_window'] = 446
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frequencies = math.ceil(Feature_Params['fft_window'] / 2 +1)
    
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Modelname is used already!
Overwriting File


{'Description': 'Calculating the absolute Value only of the FFT, normalized between 0 and 1',
 'Transformation': 'FFT_Absolut',
 'sampling_rate': 44100,
 'hop_length': 592,
 'fft_window': 446,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [224, 224, 1]}

# Pure resampled or not resampled time domain series
---

In [42]:
# Defintion of resampled Time Series

New_Model_Name = 'Time_22050_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Returning only the unmodified time samples, according sampling rate'
    Feature_Params['Transformation'] = 'Time'
    Feature_Params['sampling_rate'] = 22050
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = ''
    Feature_Params['loudness_normalization'] = 'linear'
    Feature_Params['fmin'] = ''
    Feature_Params['fmax'] = ''
    Feature_Params['length_s'] = 3

    Frames = Feature_Params['length_s'] * Feature_Params['sampling_rate'] - 1
    channels = 1
    Feature_Params['Input_Dim'] = [Frames, channels]

    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Modelname is used already!
Overwriting File


{'Description': 'Returning only the unmodified time samples, according sampling rate',
 'Transformation': 'Time',
 'sampling_rate': 22050,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': '',
 'loudness_normalization': 'linear',
 'fmin': '',
 'fmax': '',
 'length_s': 3,
 'Input_Dim': [66149, 1]}

# Mel Frequency Cepstral Coefficients
---

In [6]:
# Defintion of resampled Time Series

New_Model_Name = 'MFCC_1.txt'

if check_writing(New_Model_Name):

    Feature_Params = {}
    
    Feature_Params['Description'] = 'Feature containing the Mel Frequency Cepstral Coefficients'
    Feature_Params['Transformation'] = 'MFCC'
    Feature_Params['sampling_rate'] = 44100
    Feature_Params['hop_length'] = 1024
    Feature_Params['fft_window'] = 1024
    Feature_Params['no_mel_bin'] = 128
    Feature_Params['no_mfcc'] = 40
    Feature_Params['dct_type'] = 2
    Feature_Params['loudness_normalization'] = ''
    Feature_Params['fmin'] = 0.0
    Feature_Params['fmax'] = 11025.0
    Feature_Params['length_s'] = 3

    Frequencies = Feature_Params['no_mfcc']
    Frames = math.ceil(Feature_Params['length_s'] * Feature_Params['sampling_rate'] / Feature_Params['hop_length'])
    channels = 1
    Feature_Params['Input_Dim'] = [Frequencies, Frames, channels]
    with open(feature_store / New_Model_Name, 'w') as file:
        json.dump(Feature_Params, file)
        
Feature_Params           

Modelname is used already!
Overwriting File


{'Description': 'Feature containing the Mel Frequency Cepstral Coefficients',
 'Transformation': 'MFCC',
 'sampling_rate': 44100,
 'hop_length': 1024,
 'fft_window': 1024,
 'no_mel_bin': 128,
 'no_mfcc': 40,
 'dct_type': 2,
 'loudness_normalization': '',
 'fmin': 0.0,
 'fmax': 11025.0,
 'length_s': 3,
 'Input_Dim': [40, 130, 1]}

In [3]:
data = pd.read_pickle(data_path)
data

Unnamed: 0,tags,wav_path,raw_sounds,sample_rate,labels,names,no_samples
0,gel,data/IRMAS-TrainingData/gel/072__[gel][dru][po...,"[0.022094727, 0.022338867, 0.01928711, 0.02258...",44100,4,electric guitar,132299
1,flu,data/IRMAS-TrainingData/flu/[flu][pop_roc]0475...,"[-0.0010528564, -0.020324707, -0.03727722, -0....",44100,2,flute,132299
2,org,data/IRMAS-TrainingData/org/[org][jaz_blu]1162...,"[-0.102630615, -0.07052612, -0.03741455, -0.03...",44100,5,organ,132299
3,cla,data/IRMAS-TrainingData/cla/144__[cla][nod][cl...,"[-0.010559082, -0.020370483, -0.02897644, -0.0...",44100,1,clarinet,132299
4,voi,data/IRMAS-TrainingData/voi/[voi][pop_roc]2363...,"[0.015213013, 0.0115356445, 0.009475708, 0.010...",44100,10,human singing voice,132299
...,...,...,...,...,...,...,...
6700,tru,data/IRMAS-TrainingData/tru/[tru][jaz_blu]2064...,"[0.057662964, 0.05204773, 0.046905518, 0.04219...",44100,8,trumpet,132299
6701,vio,data/IRMAS-TrainingData/vio/[vio][cla]2270__1.wav,"[-0.030593872, -0.03842163, -0.045272827, -0.0...",44100,9,violin,132299
6702,gac,data/IRMAS-TrainingData/gac/[gac][cla]0631__1.wav,"[0.032608032, 0.03439331, 0.036132812, 0.03784...",44100,3,acoustic guitar,132299
6703,cla,data/IRMAS-TrainingData/cla/[cla][pop_roc]0198...,"[0.09324646, 0.099090576, 0.09463501, 0.083724...",44100,1,clarinet,132299


In [4]:
feature, label = create_features(data, param_file = 'FFT_Complex_224x224.txt')

FFT_Complex_224x224.txt
Features already calculated, read from disc, ignore parameter file


In [5]:
feature, label = create_features(data, param_file = 'FFT_Complex_224x224.txt', suffix='_test')

Real and imaginary part of FFT in 2 channels, use Leaky_Relu, values between -1 and 1!
Hop_length:  592
Sampling Rate: 44100
Fast Fourier Window: 446
Shape of Feature:  [224, 224, 2]


In [21]:
print(feature.shape)
print(label.shape)

(6705, 513, 130, 2)
(6705,)
