In [1]:
# file related
import os
from os.path import join

# machine learning
from keras.models import Sequential
from keras.layers import Dense, Input
import tensorflow as tf
from sklearn.model_selection import train_test_split

# utils
import numpy as np
import matplotlib.pyplot as plt
import random
import librosa
import import_ipynb
from IPython.display import Audio as play_audio
import scipy as sp
import matplotlib.patches as patches
from ctypes import *
import taunet_utils
%run transient_shaper_lib.ipynb

print(f"TF version {tf.__version__}")

TF version 2.12.0


# From RTNEURAL
Currently supported layers:  
- Dense
- GRU
- LSTM
- Conv1D
- Conv2D
- BatchNorm1D
- BatchNorm2D

Currently supported activations:  
- tanh
- ReLU
- Sigmoid
- SoftMax
- ELu
- PReLU

In [2]:
def af_dsp_init(path_to_dll):
    lib = CDLL(path_to_dll)

    lib.resetBuffer.argtypes = []
    lib.resetBuffer.restype = None

    lib.initAf.argtypes = []
    lib.initAf.restype = None

    lib.AFInCAppend.argtypes = [c_double]
    lib.AFInCAppend.restype = None

    lib.AFInCProcess.argtypes = []
    lib.AFInCProcess.restype = None

    lib.afGetT1A.argtypes = []
    lib.afGetT1A.restype = c_double

    lib.afGetT2A.argtypes = []
    lib.afGetT2A.restype = c_double

    lib.afGetTempo.argtypes = []
    lib.afGetTempo.restype = c_double

    lib.afGetSpectralCentroid.argtypes = []
    lib.afGetSpectralCentroid.restype = c_double

    lib.afGetSpectralFlatness.argtypes = []
    lib.afGetSpectralFlatness.restype = c_double

    lib.afGetPBandL.argtypes = []
    lib.afGetPBandL.restype = c_double

    lib.afGetPBandML.argtypes = []
    lib.afGetPBandML.restype = c_double

    lib.afGetPBandMH.argtypes = []
    lib.afGetPBandMH.restype = c_double

    lib.afGetPBandH.argtypes = []
    lib.afGetPBandH.restype = c_double

    return lib

lib = af_dsp_init("../src/af/AFInC.dll")

def audio_to_af(dataset_path):
    data = taunet_utils.read_audio_files(dataset_path)
    input_data = dict()
    for audio, label, fs in data:
        lib.initAf()
        lib.resetBuffer()
        for sample in audio:
            lib.AFInCAppend(sample)
        lib.AFInCProcess()
        input_data[label] = (
            lib.afGetTempo(),
            lib.afGetT1A() / fs,
            lib.afGetT2A() / fs,
            int(lib.afGetSpectralCentroid()),
            lib.afGetSpectralFlatness(),
            int(lib.afGetPBandL()),
            int(lib.afGetPBandML()),
            int(lib.afGetPBandMH()),
            int(lib.afGetPBandH()),
            lib.afGetCrestFactor()
        )
    return input_data


def create_model(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    # model.add(layers.LSTM(64, return_sequences=True))   # short term memory, useful if input data is related accross vectors
    model.add(Dense(2)) # no activation (linear): continuous mapping of outputs (this is not a classification task!)
    return model

input_data = audio_to_af(os.getcwd())
print(input_data)

input_shape = (10,)
model = create_model(input_shape)
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()


  sample_rate, audio_data = sp.io.wavfile.read(path)


{'db Candy monster, lose weight snippet': (100.67114093959732, 0.0105, 0.5513020833333333, 3753, 0.6444302496316632, 10, 1, 1, 1, 1516921546), 'track_1-1': (175.78125, 0.022607709750566893, 0.19444444444444445, 29285, 0.7027047803848404, 10, 1, 1, 1, 0)}
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                704       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 2)                 66        
                                                                 
Total params: 2,850
Trainable params: 2,850
Non-trainable params: 0
_________________________________________________________________
