In [1]:
# file related
import os
from os.path import join

# machine learning
from keras.models import Sequential
from keras.layers import Dense, Input
import tensorflow as tf
from sklearn.model_selection import train_test_split

# utils
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import librosa
import import_ipynb
from IPython.display import Audio as play_audio
import scipy as sp
import matplotlib.patches as patches
from ctypes import *
import taunet_utils
%run transient_shaper_lib.ipynb

print(f"TF version {tf.__version__}")

TF version 2.12.0


# From RTNEURAL
Currently supported layers:  
- Dense
- GRU
- LSTM
- Conv1D
- Conv2D
- BatchNorm1D
- BatchNorm2D

Currently supported activations:  
- tanh
- ReLU
- Sigmoid
- SoftMax
- ELu
- PReLU

In [45]:
def af_dsp_init(path_to_dll):
    lib = CDLL(path_to_dll)

    lib.resetBuffer.argtypes = []
    lib.resetBuffer.restype = None

    lib.initAf.argtypes = []
    lib.initAf.restype = None

    lib.AFInCAppend.argtypes = [c_double]
    lib.AFInCAppend.restype = None

    lib.AFInCProcess.argtypes = []
    lib.AFInCProcess.restype = None

    lib.afGetT1A.argtypes = []
    lib.afGetT1A.restype = c_double

    lib.afGetT2A.argtypes = []
    lib.afGetT2A.restype = c_double

    lib.afGetTempo.argtypes = []
    lib.afGetTempo.restype = c_double

    lib.afGetSpectralCentroid.argtypes = []
    lib.afGetSpectralCentroid.restype = c_double

    lib.afGetSpectralFlatness.argtypes = []
    lib.afGetSpectralFlatness.restype = c_double

    lib.afGetPBandL.argtypes = []
    lib.afGetPBandL.restype = c_double

    lib.afGetPBandML.argtypes = []
    lib.afGetPBandML.restype = c_double

    lib.afGetPBandMH.argtypes = []
    lib.afGetPBandMH.restype = c_double

    lib.afGetPBandH.argtypes = []
    lib.afGetPBandH.restype = c_double

    return lib

lib = af_dsp_init("../src/af/AFInC.dll")


def create_dataset(audio_dir, human_input_csv):
    audio_data = taunet_utils.read_audio_files(audio_dir)
    human_data = pd.read_csv(human_input_csv)

    input_data = dict()
    output_data = dict()
    
    for audio, label, fs in audio_data:
        lib.initAf()
        lib.resetBuffer()
        label = label.split()[0]
        for sample in audio:
            lib.AFInCAppend(sample)
        lib.AFInCProcess()
        
        human_input = human_data[human_data['MEASUREMENT_ID'] == int(label)].iloc[0].to_dict()
        human_input.pop("MEASUREMENT_ID")
        human_input.pop("SONG_ID")
        human_output = dict()
        human_output["ATTACK_T1"] = human_input.pop("ATTACK_T1")
        human_output["SUSTAIN_T1"] = human_input.pop("SUSTAIN_T1")
        output_data[label] = tuple(human_output.values())
        
        input_data[label] = tuple(round(val, 4) for val in (
            lib.afGetTempo(),
            lib.afGetT1A() / fs,
            lib.afGetT2A() / fs,
            int(lib.afGetSpectralCentroid()),
            lib.afGetSpectralFlatness(),
            int(lib.afGetPBandL()),
            int(lib.afGetPBandML()),
            int(lib.afGetPBandMH()),
            int(lib.afGetPBandH()),
            lib.afGetCrestFactor()
        ))
        input_data[label] += tuple(human_input.values())
    return input_data, output_data


def create_model(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    # model.add(layers.LSTM(64, return_sequences=True))   # short term memory, useful if input data is related accross vectors
    model.add(Dense(2)) # no activation (linear): continuous mapping of outputs (this is not a classification task!)
    return model

input_data, output_data = create_dataset(os.path.join("dataset", "audio"), os.path.join("dataset", "human_input", "AITD_Dataset_Kristof_beta_1.csv"))
for d in input_data:
    print(f"{d}: {input_data[d]} -> {output_data[d]}")

input_shape = (10,)
model = create_model(input_shape)
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()


1: (123.9669, 0.0121, 0.2251, 15648, 0.7536, 10, 1, 1, 1, 0, 0.6738, 0.8758, 550.0, 350.0) -> (360.5123, 1144.7427)
10: (159.0106, 0.0132, 0.2123, 2440, 0.8535, 10, 1, 1, 1, 0, 0.3157, 0.8187, 550.0, 350.0) -> (330.558, 1814.8958)
11: (117.4935, 0.0409, 0.31, 7351, 0.7748, 10, 1, 1, 1, 546576167, 0.7365, 0.7567, 550.0, 350.0) -> (302.5441, 430.5534)
12: (172.4138, 0.3647, 0.4617, 15840, 1.569, 10, 1, 1, 1, 102581079, 0.7287, 0.8574, 550.0, 350.0) -> (70.1614, 1192.9986)
13: (156.7944, 0.0098, 0.1758, 3783, 0.859, 10, 1, 1, 1, 1866621861, 0.109, 0.9997, 550.0, 350.0) -> (283.9686, 1571.1926)
14: (152.5424, 0.0422, 0.245, 2145, 1.1746, 10, 1, 1, 1, 0, 0.6313, 0.2773, 550.0, 350.0) -> (281.9786, 461.7476)
15: (143.77, 1.4763, 0.1339, 2145, 2.2429, 10, 1, 1, 1, -1248480519, 0.5662, 0.8151, 550.0, 350.0) -> (206.762, 1754.8867)
16: (127.8409, 0.0327, 0.2358, 2134, 0.8128, 10, 1, 1, 1, 1320852491, 0.6062, 0.2058, 550.0, 350.0) -> (499.6292, 320.9579)
17: (140.1869, 0.0419, 0.18, 9530, 1.0342