# The Factory
A Model Optimization Method (or MOM)

This is a "factory" designed to utilize optuna and labeled data to find the most accurate model yielding parameters

In [4]:
# Import Basic Python Functionality
import numpy as np
import pandas as pd
import subprocess
from pytube import YouTube
from pydub import AudioSegment
import copy
import random
import datetime
import time
import sys
import re
import os
import io
from os import walk
import scipy.io.wavfile
from scipy import signal
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
from matplotlib.ticker import ScalarFormatter
from matplotlib.ticker import FormatStrFormatter
import matplotlib.cm as cm
import seaborn as sns
import matplotlib.dates as mdates
import string
from pathlib import Path


# Import Python Libraries for Machine Learning
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import LabelEncoder
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score, calinski_harabasz_score, v_measure_score
from sklearn.preprocessing import MinMaxScaler, normalize, LabelBinarizer, minmax_scale
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import pairwise_distances, pairwise_distances_argmin_min
from sklearn.neighbors import KNeighborsClassifier


# Import Keras/Tensorflow Libraries for Deep Learning
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, Callback
from tensorflow.keras import Input
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Conv1D, SpatialDropout1D, Flatten, Activation, Lambda, Convolution1D, Dense, add, Lambda
from tensorflow.keras.applications import xception
import tensorflow_addons as tfa  


# GUI Components
from pydub.playback import play
import ipywidgets as widgets
from ipywidgets import interact
from tkinter import * 
from tkinter import ttk
from tkinter.ttk import *
from IPython.display import display
from IPython.display import Image

# Factory Components
import optuna


#%matplotlib inline


def vdir(directory): #verify a directory exists, if not make it
    if not os.path.exists(directory): os.mkdir(directory)
    return directory

data_folder=vdir('data')
more_folder=vdir('more')
downloads_folder=vdir(data_folder+'/downloads')
npys_folder=vdir(data_folder+'/npys')
time_series_npys_folder=vdir(npys_folder+'/time_series_audio_npys')    
audio_set_npys_folder=vdir(npys_folder+'/audio_set_npys')

time_series_downloads_folder=vdir(downloads_folder+'/time_series_audio_mp4s') #long time-series mp4 files go in here
audio_set_downloads_folder=vdir(downloads_folder+'/audio_set_mp4s') #short mp4 files from google's AudioSet data go here

#physical_devices = tf.config.list_physical_devices('GPU')
#tf.config.experimental.set_memory_growth(physical_devices[0], True)
Train_from_scratch = True

print("tf version:",tf.__version__)

tf.config.list_physical_devices('GPU')


tf version: 2.9.0


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
###
#   DATA SETUP
###

#Instantiation
classes = ["Motorboat_speedboat",
"Racecar_autoracing",
"Carpassingby",
"Tiresqueal",
"Caralarm",
"Vehiclehorn_carhorn_honking",
"Airbrake",
"Airhorn_truckhorn",
"Bus",
"Motorcycle",
"Trafficnoise_roadwaynoise",
"Railroadcar_trainwagon",
"Trainwheelssquealing",
"Helicopter",
"Fixed-wingaircraft_airplane",
"Lightengine(highfrequency)",
"Mediumengine(midfrequency)",
"Heavyengine(lowfrequency)"]
pdict = {}
pdict['r_smp'] = 44100

if not os.path.exists(more_folder+'/fltc.npy'):

    #Load the raw data files and convert them if needed
    video_files=np.sort(os.listdir(audio_set_downloads_folder))
    for i,video_file in enumerate(video_files):
        print(int(100*i/len(video_files)),"% converting      ",end='\r')
        video_name=''.join(video_file.split('.')[:-1])
        audio_file = os.path.join(audio_set_npys_folder, video_name+f'_{pdict["r_smp"]}.npy')
        if not os.path.exists(audio_file):
            try:
                seg = AudioSegment.from_file(audio_set_downloads_folder+'/'+video_file, format='mp4')
                samples = pydub_to_np(seg, pdict['r_smp'])
                np.save(audio_file, samples)
            except:
                print("\nError with "+video_name+"\n")  # to handle exception   
    print("100 % converted      ")
    test_numpy_files = [os.path.join(audio_set_npys_folder, f) for f in os.listdir(audio_set_npys_folder) if Path(f).suffix == '.npy']

    #Import and format labels and times:
    files_labels_times_raw = pd.read_csv('clean_download.csv')[['segment_id','name','time_interval']].to_numpy()
    files_raw=[audio_set_npys_folder+'/'+files_labels_times_raw[:,0][i]+'_'+str(pdict['r_smp'])+'.npy' for i in range(len(files_labels_times_raw))]
    labels_raw=[files_labels_times_raw[:,1][i][1:-1].replace(' ','').replace("'","").split(',') for i in range(len(files_labels_times_raw))]
    initial_length=len(files_raw) #8690
    times_raw_str=[files_labels_times_raw[:,2][i].replace(' ','').replace('(','').replace(')','').strip('][').split(',') for i in range(len(files_labels_times_raw))]
    times_raw=[[[int(float(tlist[i])*pdict['r_smp']),int(float(tlist[i+1])*pdict['r_smp'])] for i in range(len(tlist)//2)] for tlist in times_raw_str]
    #times_raw is now preformatted into time in samples

    #Pop nonexistent files and their corresponding labels:
    if os.path.exists(more_folder+'/pop_arr.npy'):
        pop_arr=np.load(more_folder+'/pop_arr.npy')
        lengths_tmp=np.load(more_folder+'/file_lengths.npy')
        [[files_raw.pop(i), labels_raw.pop(i), times_raw.pop(i)] for i in pop_arr]
    else:
        pop_arr=[]
        i,j,count=0,0,0
        while j<len(files_raw):
            if np.sort(test_numpy_files)[i]==files_raw[j]:
                i+=1
                j+=1
            else:
                count+=1
                pop_arr.append(j)
                j+=1
        pop_arr.reverse()
        for popi in pop_arr:
            files_raw.pop(popi)
            labels_raw.pop(popi)
            times_raw.pop(popi)
        assert len(files_raw)==initial_length-count #if this raises an error then something is not in the correct order (check the sorting of files_raw and test_numpy_files)

        #also pop files under specified video length (in s)
        tmp_data=[np.load(f) for f in files_raw]
        lengths_tmp=[len(tmp_data[i])/pdict['r_smp'] for i in range(len(tmp_data))]
        lengths_under_9s=[i for i,length in enumerate(lengths_tmp) if length<9]; lengths_under_9s.reverse()
        [[files_raw.pop(i), labels_raw.pop(i), times_raw.pop(i),lengths_tmp.pop(i)] for i in lengths_under_9s]
        pop_arr.extend(lengths_under_9s) #the way the indices were popped originally must remain the same order so long story short don't sort this list
        lengths_tmp=[len(np.load(f)) for f in files_raw]
        np.save(more_folder+'/pop_arr.npy',pop_arr)
        np.save(more_folder+'/file_lengths',lengths_tmp)
        del tmp_data

    #Reformat the times
    for i in range(len(times_raw)):
        for j in range(len(times_raw[i])):
            [tmp_ll,tmp_ul]=times_raw[i][j] #upper and lower limits are the event time tags (in samples)
            if tmp_ll>tmp_ul: #if our lower limit is greater than the upper limit
                tmp_ll,tmp_ul=tmp_ul,tmp_ll #flip them
            if tmp_ul>lengths_tmp[i]: #if the true upper limit goes beyond the length of our file
                tmp_ul=lengths_tmp[i] #set it equal to the length of the file
            times_raw[i][j]=[tmp_ll,tmp_ul]

    #Reformat the labels
    labels_c=[[classes.index(labels_raw[j][i]) for i in range(len(labels_raw[j]))] for j in range(len(labels_raw))] #write them as class indices instead of strings every time. This is important for our generator later

    labels=np.zeros((len(labels_raw),len(classes))) #I did this with labels_raw so it would be easier to show the below demonstration but you could do the same exact thing with labels_c instead
    for i,label_list in enumerate(labels_raw):
        for j in range(len(label_list)):
            labels[i,classes.index(label_list[j])]=1

    #Create class_groups
    #This is grouping each "file" into 18 different lists (classes). In this method one "file" may also be in multiple groups (I say "file" because these are really indices of the files_raw array)
    class_labels=np.identity(len(classes)) #an array of all the classes in our same format
    class_groups=[] #class_groups is a list of lists of each classes common file indices for data_array[index]. for example index 2 and 4 both contain 'Vehiclehorn_carhorn_honking' so we group them together in the same class_group
    for i in range(len(class_labels)):
        tmp_group=[]
        for j in range(len(labels)):
            if np.isin(2,class_labels[i]+labels[j]):
                tmp_group.append(j)
        class_groups.append(tmp_group)

    length_class_groups=[len(class_groups[i]) for i in range(len(class_groups))]

    np.save(more_folder+'/fltc.npy',np.array([files_raw,labels_c,times_raw,class_groups],dtype='object'))
else:
    [files_raw,labels_c,times_raw,class_groups]=np.load(more_folder+'/fltc.npy',allow_pickle=True)

def fltdc(lb,ub):
    f_=files_raw[lb:ub]
    l_=labels_c[lb:ub]
    t_=times_raw[lb:ub]
    d_=[np.load(f) for f in f_]
    c_=[[i-lb for i in class_groups[j] if i<ub and i>=lb] for j in range(len(class_groups))] #if lb was 0 always then the inner part would just be [i for i in class_groups[j] if i<ub]
    return f_,l_,t_,d_,c_

#Training data set
lb1,ub1=0,2400 #highest ub is len(files_raw)

smallFilenames,smallLabels,smallTimes,smallData,smallClassgroups=fltdc(lb1,ub1)
lengths_tmp=[len(smallData[i])/pdict['r_smp'] for i in range(len(smallData))]
assert np.min(lengths_tmp)>9.0 #if you changed the threshold earlier ignore this

#Test and Validation data sets
l=400 #size of each set

if ub1+l+l<=len(files_raw):
    lb2,ub2=ub1,ub1+l
    lb3,ub3=ub2,ub2+l
else:
    rng=np.random.randint(len(files_raw)-l,size=2)
    lb2,ub2=rng[0],rng[0]+l
    lb3,ub3=rng[1],rng[1]+l
smallFilenames2,smallLabels2,smallTimes2,smallData2,smallClassgroups2=fltdc(lb2,ub2)
smallFilenames3,smallLabels3,smallTimes3,smallData3,smallClassgroups3=fltdc(lb3,ub3)

# Generator C

Uses labelled data and grabs nK random windows from each label or random labels (depending on how big nP is and how small the data set is)

In [6]:
def generatorC(data_array,pdict,sm_labels,sm_times,sm_class_groups,return_startpoints=False): 
    
    nP=pdict['numP']
    nK=pdict['numK']
    window_size_secs=pdict['w_sec']
    samps_per_second=pdict['r_smp']
    v_res=pdict['vres']
    lrb=pdict['LRB']
    noverlap=v_res/8
    lr=L_R(lrb)
    
    window_samps=int(window_size_secs*samps_per_second)
    
    t_len=int((window_samps-noverlap)/(v_res-noverlap))   #These are the time and frequency dimensions of my plot. They come from the scipy spectrogram generation formulas.
    f_len=int(.75 * v_res/2)+1
    
    bs=nP*nK #batch size
    
    skip_n=nP-(len(sm_class_groups)-sm_class_groups.count([]))<=0 #if true we skip the independent picks process and just let repeated labels occur because otherwise we'd have a hard limit of nP=18

    while 1:
        
        startpoints=[]
        labels_gen=[]
        spectrograms=np.zeros((bs,f_len,t_len))
        
        for p in range(nP):
            sm_i,tmp_cgroup=0,[] 
            while tmp_cgroup==[] or (sm_i in labels_gen if skip_n else None): #skip_n is in there incase nP is greater than the 
                sm_i=np.random.randint(len(sm_class_groups))
                tmp_cgroup=sm_class_groups[sm_i] #choose a random non-empty non-repeated segment (the larger rng is the less time this takes)
            #i will be our label and tmp_cgroup will be our data_array indices to randomly pick windows from 
            np.random.shuffle(tmp_cgroup)
            
            #now sm_i is our current class and tmp_cgroup is our current classgroup 
            for k in range(nK):
                batch_idx=p*nK+k
                dind=tmp_cgroup[k] if k<len(tmp_cgroup) else np.random.choice(tmp_cgroup) #we want a random choice of the current classgroup (a random file from that class) but we don't want repeated files if we can avoid it so this is an ordered selection of the random shuffled tmp_cgroup until it runs out of options, then its just a random choice
                carr=data_array[dind]
                
                #Here we choose a window based around our time segment data
                #The way I approached this was to imagine two cases, one where the window is larger than the event time frame and one where the window is shorter than that event time frame
                t_ind=np.random.choice([i for i,smsmlbs in enumerate(sm_labels[dind]) if smsmlbs==sm_i])#randomly chooses a valid labels index where the current class exists in that file
                [ll,ul]=sm_times[dind][t_ind] #lower and upper limits of the event time frame
                
                if ll+window_samps>len(carr): #if the event time frame is smaller than the window and near the end of the file we need to make sure our window doesn't just go right off the end of the file
                    [tl,tu]=np.sort([len(carr)-window_samps,ul-window_samps])
                elif ul-window_samps<0: #if the event time frame is smaller than the window and closeto the beginning of the file we need to make sure our window doesn't precede 0 
                    [tl,tu]=np.sort([ll,0])
                else:
                    [tl,tu]=np.sort([ll,ul-window_samps])#let's take care of 2 possibilities at once, one the window is smaller than the event time frame (if window_samps<ul-ll then we want samp=np.random.randint(ll,ul-window_samps)), and two the window is larger than the event time frame(if window_samps>ul-ll then we want samp=np.random.randint(ul-window_samps,ll))
                samp=np.random.randint(tl,tu) if tl!=tu else tl #if tl==tu then np.random.randint will give an error so we just give it the exception
                window=carr[samp:samp+window_samps,xl(lr)]
                
                n_window=window/(np.max(np.abs(window))+1e-8)

                labels_gen.append(sm_i)
                startpoints.extend([dind,samp])
                
                frequencyx, timesx, spectrogramx = signal.spectrogram(n_window, samps_per_second, nperseg=v_res)
                log_spectrogram=np.log10(spectrogramx[:f_len,:t_len],out=spectrogramx[:f_len,:t_len],where=spectrogramx[:f_len,:t_len] > 0)
                diff=np.max(log_spectrogram)-np.min(log_spectrogram)
                spectrograms[batch_idx]=(log_spectrogram-np.min(log_spectrogram))/diff if diff!=0 else log_spectrogram-np.min(log_spectrogram)

        spectrograms_out=cm.viridis(spectrograms)[:,:,:,0:3]

        
        if return_startpoints:
            yield spectrograms_out, np.array(labels_gen).astype(int), startpoints
        else:
            yield spectrograms_out, np.array(labels_gen).astype(int)

#Additional Functions:
def xl(lr):
    return np.random.randint(2) if lr==2 else lr #these are split so we don't have to do string comprehension in our deepest loop
def L_R(lrb):
    return 0 if lrb=='L' else 1 if lrb=='R' else 2

# NN Architecture

In [7]:
def encoder_network(pdict, padding='same', drop=0.05):
    
    nb_channels = pdict['ch']
    dense_layers  = pdict['d']
    out_dim = pdict['od']
    norm = pdict['n']
    img_height = int((int(pdict['w_sec']*pdict['r_smp'])-(pdict['vres']/8))/(pdict['vres']*7/8))
    img_width = int(.75 * pdict['vres']/2)+1

    input_layer = Input(shape=(img_width, img_height, nb_channels))
    base_model = xception.Xception(weights='imagenet',
                              include_top=False,
                              input_shape=(img_width, img_height, nb_channels),
                              pooling='avg')

    x = base_model(input_layer)
    
    for d in dense_layers:
        x = Dense(d, activation='relu')(x)
        
    x = Dense(out_dim, activation='linear', name='output_dense')(x)

    # normalize output for cosine similarity
    if norm:
        x = Lambda(lambda xx: K.l2_normalize(xx, axis=1))(x)

    output_layer = x
    print(f'model.x = {input_layer.shape}')
    print(f'model.y = {output_layer.shape}')
    model = Model(input_layer, output_layer, name='encoder_model')

    return model

def vggish_network(pdict, padding='same'):
    nb_channels = pdict['ch']
    dense_layers  = pdict['d']
    out_dim = pdict['od']
    norm = pdict['n']
    img_height = int((int(pdict['w_sec']*pdict['r_smp'])-(pdict['vres']/8))/(pdict['vres']*7/8))
    img_width = int(.75 * pdict['vres']/2)+1
    
    input_layer = tf.keras.Input(shape=(img_width, img_height, nb_channels), name="img")
    x = tf.keras.layers.Conv2D(64, 3, data_format='channels_last', padding=padding, activation="relu")(input_layer)
    x = tf.keras.layers.MaxPooling2D(2)(x)
    x = tf.keras.layers.Conv2D(128, 3, data_format='channels_last', padding=padding, activation="relu")(x)
    x = tf.keras.layers.MaxPooling2D(2)(x)
    x = tf.keras.layers.Conv2D(256, 3, data_format='channels_last', padding=padding, activation="relu")(x)
    x = tf.keras.layers.Conv2D(256, 3, data_format='channels_last', padding=padding, activation="relu")(x)
    x = tf.keras.layers.MaxPooling2D(2)(x)
    x = tf.keras.layers.Conv2D(512, 3, data_format='channels_last', padding=padding, activation="relu")(x)
    x = tf.keras.layers.Conv2D(512, 3, data_format='channels_last', padding=padding, activation="relu")(x)
    x = tf.keras.layers.MaxPooling2D(2)(x)
    x = tf.keras.layers.Flatten()(x)
    for d in dense_layers:
        x = Dense(d)(x) #, activation='relu'
        
    # normalize output for cosine similarity
    if norm:
        x = Lambda(lambda xx: K.l2_normalize(xx, axis=1))(x)
    
    output_layer = tf.keras.layers.Dense(out_dim, activation='linear', name='output_dense')(x)
    model = Model(input_layer, output_layer, name="vggish_model")
    
    return model

def load_custom_model(pdict, model_folder='models/'):

    if 'iniW' in pdict.keys():
        print(f'|time:{pdict["iniW"]}')
        model_name = [n for n in os.listdir(model_folder) if f'|time:{pdict["iniW"]}' in n][0][:-3]
        model_file = os.path.join(model_folder, model_name + '.h5')

        print('loading previous model:\n', model_file)
        pdict = name2param(os.path.basename(model_file))
        my_acc, my_k = get_acc(pdict)
        my_loss = tfa.losses.TripletSemiHardLoss()
        model = load_model(model_file, custom_objects={'triplet_loss': my_loss, 'tr_acc': my_acc, 's_knn':my_k})
    else:
        model_name = param2name(pdict)
        model_file = os.path.join(model_folder, model_name)

        print('building new model:\n', model_file)
        my_acc, my_k = get_acc(pdict)
        my_loss = tfa.losses.TripletSemiHardLoss()
        model = vggish_network(pdict) if pdict['vggish'] else encoder_network(pdict)

    o = Adam(learning_rate=pdict['lr'], clipnorm=1.)
                      
    model.compile(loss=my_loss, optimizer=o, metrics=[my_acc,my_k])

    return model, model_name


def get_callbacks(model_name, model_folder, log_folder):
    pdict = name2param(os.path.basename(model_name))
    tensor_foldername = os.path.join(log_folder, model_name)
    model_filename = os.path.join(model_folder, model_name + '.h5')

    sv = ModelCheckpoint(filepath=model_filename, monitor='val_loss', save_best_only=True,
                         save_weights_only=False, mode='min')
    #print(os.listdir(model_folder)) ####
    stp = EarlyStopping(monitor='val_loss', min_delta=0, patience=pdict['pat'],
                        verbose=0, mode='min', baseline=None)
    tbd = TensorBoard(log_dir=tensor_foldername)
    file_writer = tf.summary.create_file_writer(tensor_foldername + "/metrics")
    file_writer.set_as_default()

    return [sv, stp, tbd]


def param2name(pdict):
    name = []
    for key in pdict.keys():
        if type(pdict[key]) is list:
            name.append(f'{key}:{"x".join(map(str, pdict[key]))}')
        else:
            name.append(f'{key}:{pdict[key]}')
    return '|'.join(name)


def name2param(name):
    regnumber = re.compile(r'^\d+(\.\d+)?$')
    pdict = dict([p.split(':') for p in name.split('|')])
    for key in pdict.keys():
        if regnumber.match(pdict[key]):
            try:
                pdict[key] = int(pdict[key])
            except:
                pdict[key] = float(pdict[key])
        else:
            if 'x' in pdict[key][:-1]:
                pdict[key] = list(map(int, pdict[key].split('x')))
            try:
                pdict[key] = float(pdict[key])
            except:
                pass
    return pdict

# Acc Function
def get_pairwise_dists(x, num_p, num_k):
    # pairwise distances for whole batch
    # (redundant computation but probably still faster than alternative)
    norms = tf.reduce_sum(x * x, 1)
    norms = tf.reshape(norms, [-1, 1])
    dists = norms - 2 * tf.matmul(x, x, transpose_b=True) + tf.transpose(norms)
    dists = K.sqrt(K.relu(dists))

    # get the max intra-class distance for each sample
    max_pos = [tf.reduce_max(tf.slice(dists, [i * num_k, i * num_k], [num_k, num_k]), axis=1) for i in range(0, num_p)]
    max_pos = K.concatenate(max_pos, axis=0)

    # get the min inter-class distance for each sample
    min_neg = []
    for i in range(0, num_p):
        left = tf.slice(dists, [i * num_k, 0], [num_k, i * num_k])
        right = tf.slice(dists, [i * num_k, (i + 1) * num_k], [num_k, (num_p - i - 1) * num_k])
        min_neg.append(tf.reduce_min(K.concatenate([left, right], axis=1), axis=1))
    min_neg = K.concatenate(min_neg, axis=0)

    min_max = K.concatenate([K.expand_dims(max_pos, axis=-1), K.expand_dims(min_neg, axis=-1)], axis=1)
    return min_max, dists

def get_triplet_dists(x, margin=0.5):
    x = K.transpose(x)
    max_pos = tf.gather(x, 0)
    min_neg = tf.gather(x, 1)
    # Use relu or softplus
    L_triplet = K.expand_dims(K.softplus(margin + max_pos - min_neg), 1)
    return L_triplet

#slower speed first iteration, all following are the same speed as get_pairwise_dists
def get_pythagorean_pairwise_dists(x,bs): #same operation as pairwise, I was just seeimg some strange results from get_pairwise_dists so I wrote this instead
    diffs=tf.stack([tf.stack([x[j]-x[i] for i in range(bs)]) for j in range(bs)]) #just the geometric differences between vectors i.e. [x1-x2, y1-y2, z1-z2, etc.]
    return tf.sqrt(tf.reduce_sum(diffs**2,axis=2))#differences squared (x1-x2)^2 then summed then the total square rooted by the pythagorean theorem in multiple dimensions

def get_knn_sum(dists, k, bs):
    #Now we want the lowest k distances in a given row not including the identity position
    #I couldnt find a function for the lowest k numbers so I took the top_k of the negative distances which are also the ones closest to 0 then just negate them at the end
    values,indices=tf.math.top_k(-dists,k=k+1) #we take k+1 because the identity (a vector minus itself is 0 and we don't want those so we grab an extra)
    k1_dists=-values
    #Remember to throw out the identity (do k1_dists[:,1:] to get the non-zero entries)
    fa=tf.reduce_sum(k1_dists[:,1:],axis=1) #function a, take the sum or mean of the dists for a row
    fb=tf.reduce_sum(fa) #function b, take the sum or mean of all the distances 
    return fb #, knn_inacc

def get_acc(pdict):
    nP = pdict['numP']
    nK = pdict['numK']
    margin = pdict['m']
    k = nK-1 #I would recommend nK-1 or less unless maybe you want an idea of how close the clustering groups themselves are to one another
    bs=nP*nK
    
    def s_knn(y_true, y_pred):
        dists = get_pythagorean_pairwise_dists(y_pred,bs) #designed for get_pythagorean_pairwise_dists but works with no noticeable difference on get_pairwise_dists, just watch out for those non-zero diagonals
        if tf.shape(dists)!=[]: #workaround for errors on initial empty arrays
            knn_sum = get_knn_sum(dists,k,bs)
            return knn_sum #*tf.cast(inacc+1,tf.float32)
        else:
            return tf.constant([0.0])
    
    def tr_acc(y_true, y_pred):
        min_max, _ = get_pairwise_dists(y_pred, nP, nK)
        loss = get_triplet_dists(min_max, margin)
        pos = K.less(loss, .5)
        return K.mean(pos)

    return tr_acc,s_knn


# Train the Siamese Shift encoder

In [8]:
# Folders
model_folder = vdir('models')
log_folder = vdir('logs')

def train_test_model(params_arr,acc_jm=10,acc_im=10):
    
    # Model Parameters
    pdict = {}
    pdict['vggish'] = True
    pdict['pat'] = 10 #default 10
    pdict['d'] = [2048, 2048] #default [256,128]
    pdict['od'] = 128 #default 128, better results with higher od
    pdict['n'] = 1
    pdict['ch'] = 3
    
    pdict['lr'] = 0.0001 #default 0.0001, best around 0.0001 with everything elseat default
    pdict['m'] = 0.2 #default 0.2
    #pdict['a'] = 0.0000001

    # Generator Parameters
    pdict['r_smp'] = 44100
    pdict['w_sec'] = 2 #default 2
    pdict['vres'] = 2**9 #default 2**9
    pdict['LRB']='L' #default 'L'
    pdict['numP'] = 5 #default 5
    pdict['numK'] = 5 #default 5
    
    for param_arr in params_arr:
        [param,val,_,_]=param_arr
        pdict[param] = val #this overrides the given parameter's default value

    # build the generators
    gen_trn = generatorC(smallData, pdict, smallLabels, smallTimes, smallClassgroups) #gen1s smallClassgroups
    gen_val = generatorC(smallData2, pdict, smallLabels2, smallTimes2, smallClassgroups2)

    # load the model
    pdict['time'] = int(time.time())
    #pdict['iniW'] = 1651599114
    model, model_name = load_custom_model(pdict)
    model.summary()

    # define the steps per epoch
    t_len = 10000 #(len(dataTrain)*3)*dataTrain[0].shape[0]/pdict['r_smp']/pdict['w_sec'] #one file divided by window size  (len(dataTrain)/2)*
    v_len = t_len/10 #t_len/10
    t_step = t_len//(pdict['numP']*pdict['numK'])+1  #t_len over batch size plus one so we never have a t_step of 0
    v_step = v_len//(pdict['numP']*pdict['numK'])+1


    ## Train the Model
    my_hist=model.fit(gen_trn, steps_per_epoch=t_step, epochs=100, #my_hist = 
                  validation_data=gen_val, validation_steps=v_step, verbose=2,
                  callbacks=get_callbacks(model_name, model_folder, log_folder))

    return_pdict=copy.deepcopy(pdict)


    ## Test and saving of model accuracy
    pdict['numP'] = 18 #must use 18 first go around, because the next generated y_pred2 will have all different classes and if all the classes are not in y_pred then this will make it miss values just because it doesn't know them
    pdict['numK'] = 10
    train_gen=generatorC(smallData,pdict,smallLabels,smallTimes,smallClassgroups)
    o_acc=[]
    for j in range(acc_jm):
        spectrograms,ls=next(train_gen)
        shuffled=np.random.permutation(len(ls)) #shuffle everything just incase
        spectrograms=spectrograms[shuffled]
        ls=ls[shuffled]
        y_pred = model.predict(spectrograms)

        k=9
        kclass=KNeighborsClassifier(n_neighbors=k)
        kclass.fit(y_pred,ls)

        acc=[]
        pdict['numP'] = 10 #these can be anything but 10x10 is sufficient
        pdict['numK'] = 10
        
        test_gen=generatorC(smallData3,pdict,smallLabels3,smallTimes3,smallClassgroups3)
        for i in range(acc_im):
            spectrograms2,ls2=next(test_gen)
            shuffled2=np.random.permutation(len(ls2))
            spectrograms2=spectrograms2[shuffled2]
            ls2=ls2[shuffled2]
            y_pred2 = model.predict(spectrograms2)
            ls2_pred= kclass.predict(y_pred2)
            acc.append((ls2_pred==ls2).mean())

        #print('Prediction Average Accuracy:',np.mean(acc))#,' std:',np.std(acc))
        o_acc.append(np.mean(acc))
    return_accuracy=np.mean(o_acc)
    print('Overall Accuracy of Current Model:',return_accuracy) #The goal here is anything above 1/18~=5.6%, 1/18 is the probability of guessing the right class randomly from 18 classes
    return return_pdict,return_accuracy

def factory(factory_subdir,params_arr,n_tr=10):
    test_accuracies=[] #initialize these
    test_pdicts=[]
    def objective(trial):
        for param_arr in params_arr: #go one by one through the parameters and suggest new values (depending on data type)
            tmp=[]
            [name,_,ranges,dtype]=param_arr
            if dtype=='categorical': tmp.append(trial.suggest_categorical(name, ranges))
            else:
                for i,rn in enumerate(ranges):
                    ind_str=str(i) if len(ranges)>1 else '' #this is because we need different "parameter" names for different values in optuna
                    if dtype=='int':
                        tmp.append(trial.suggest_int(name+ind_str, rn[0], rn[1], step=rn[2]))
                    else:
                        tmp.append(trial.suggest_float(name+ind_str, rn[0], rn[1], step=rn[2]))
            if len(tmp)==1: #formats everything but lists as just their value
                tmp=tmp[0]
            param_arr[1]=tmp
        tmp_pdict,tmp_acc=train_test_model(params_arr) #now train a model vased on those values and pull out the accuracy and pdict
        test_pdicts.append(tmp_pdict) #record every model and save it over each time
        test_accuracies.append(tmp_acc)
        np.save(factory_subdir+'/test_accuracies.npy',test_accuracies)
        np.save(factory_subdir+'/test_pdicts.npy',test_pdicts)
        return tmp_acc #this is our parameter we want to maximize
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_tr)
    study.best_params

    

# The Factory
factory_folder = vdir(more_folder+'/factory')
factory_subdir = vdir(factory_folder+'/vggish_varied_wsec2') #change this every time you start a new assembly line
params_arr=[['w_sec',0,[[0.75,3.75,0.25]],'float']]
#input each parameter in the form of ['param', init_val, [[min1,max1,step1],[min2,max2,step2],...] or [category1,category2,...],'dtype']  #dtype can be 'int','float',or'categorical'

factory(factory_subdir,params_arr,n_tr=30)

[32m[I 2022-06-01 16:33:53,184][0m A new study created in memory with name: no-name-83709d9a-1f78-406e-92cb-3c9f8053a7c5[0m


building new model:
 models/model_type_vggish:True|pat:10|d:2048x2048|od:128|n:1|ch:3|lr:0.0001|m:0.2|r_smp:44100|w_sec:2.25|vres:512|LRB:L|numP:5|numK:5|time:1654115633
Model: "vggish_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 img (InputLayer)            [(None, 193, 221, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 193, 221, 64)      1792      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 96, 110, 64)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 96, 110, 128)      73856     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 48, 55, 128)      0         
 2D)            

2022-06-01 16:33:53.190845: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-01 16:33:53.485118: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9833 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6
2022-06-01 16:33:58.421804: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2022-06-01 16:33:58.837228: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-01 16:33:58.837680: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such fi

401/401 - 32s - loss: 0.7452 - tr_acc: 0.0000e+00 - s_knn: 74.6968 - val_loss: 0.5906 - val_tr_acc: 0.0000e+00 - val_s_knn: 89.7689 - 32s/epoch - 80ms/step
Epoch 2/100


KeyboardInterrupt: 

In [8]:
factory_folder = more_folder+'/factory'
factory_subdir = factory_folder+'/vggish_varied_wsec2'

test_accuracies=np.load(factory_subdir+'/test_accuracies.npy')
test_pdicts=np.load(factory_subdir+'/test_pdicts.npy',allow_pickle=True)
print(test_accuracies)

[0.0795 0.0858 0.0925 0.0967 0.0813 0.0591 0.063  0.0895 0.0807 0.0858
 0.0886 0.075  0.078 ]


In [11]:
params=['time','w_sec']
df=pd.DataFrame([params+['acc']]+[[dicti[param] for param in params]+[test_accuracies[i]] for i,dicti in enumerate(test_pdicts)])
new_header = df.iloc[0]; df = df[1:]
df.columns = new_header
df=df.sort_values(by=['acc'])
df

Unnamed: 0,time,w_sec,acc
6,1654121075,2.5,0.0591
7,1654122115,2.25,0.063
12,1654125762,2.0,0.075
13,1654126732,3.25,0.078
1,1654115735,3.75,0.0795
9,1654123371,2.75,0.0807
5,1654120301,2.25,0.0813
2,1654117450,3.75,0.0858
10,1654123876,3.25,0.0858
11,1654124630,4.0,0.0886


In [None]:
x=list(df['acc'])
y=np.array(list(df['w_sec']))#[:,0]
z=np.polyfit(x,y,1)
p=np.poly1d(z)
plt.scatter(x,y)
plt.plot(x,p(x)) #'d' [500, 400], 'w_sec' 3.5, 'od' 500