In [None]:
# select a GPU
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

In [None]:
#imports 
import numpy as np
import scipy.io
from sklearn.metrics import confusion_matrix
import pandas as pd
from DCASE_plots import plot_confusion_matrix

import librosa
import soundfile as sound
import keras
import tensorflow
print("Librosa version = ",librosa.__version__)
print("Pysoundfile version = ",sound.__version__)
print("keras version = ",keras.__version__)
print("tensorflow version = ",tensorflow.__version__)

In [None]:
WhichTask = '1a'
model_dir = 'XXXXXXXXX_1a_DEV_310/'
model_path = model_dir + 'model.h5'

focal_loss = True
gamma=1.0
alpha=0.3
domain_aux = False

###############################

assert((domain_aux and focal_loss) == False)

if WhichTask =='1a':

    ThisPath = '../../commonData/dcase2020/TAU-urban-acoustic-scenes-2020-mobile-development/'
    File = ThisPath + 'evaluation_setup/fold1_evaluate.csv'
    sr = 44100
    num_audio_channels = 1 

    scene_map_str = """
    airport 0 
    bus 1
    metro 2
    metro_station 3
    park 4
    public_square 5
    shopping_mall 6
    street_pedestrian 7
    street_traffic 8
    tram 9
    """

    
elif WhichTask =='1b':
   
    ThisPath = '../../commonData/dcase2020/TAU-urban-acoustic-scenes-2020-3class-development/'
    File = ThisPath + 'evaluation_setup/fold1_evaluate.csv'
    num_audio_channels = 2    
    sr = 48000

    scene_map_str = """
    indoor 0 
    outdoor 1
    transportation 2
    """    


SampleDuration = 10
NumFreqBins = 128
NumFFTPoints = 2048
HopLength = int(NumFFTPoints/2)
NumTimeBins = int(np.ceil(SampleDuration*sr/float(HopLength)))

In [None]:
#load filenames and labels
dev_test_df = pd.read_csv(File,sep='\t', encoding='ASCII')
wavpaths_val = dev_test_df['filename'].tolist()


ClassNames = np.unique(dev_test_df['scene_label'])
print ClassNames
y_val_labels =  dev_test_df['scene_label'].astype('category').cat.codes.values


In [None]:
#load wav files and get log-mel spectrograms, deltas, and delta-deltas
def deltas(X_in):
    X_out = (X_in[:,:,2:,:]-X_in[:,:,:-2,:])/10.0
    X_out = X_out[:,:,1:-1,:]+(X_in[:,:,4:,:]-X_in[:,:,:-4,:])/5.0
    return X_out

LM_val = np.zeros((len(wavpaths_val),NumFreqBins,NumTimeBins,num_audio_channels),'float32')
for i in range(len(wavpaths_val)):
    sig,fs = sound.read(ThisPath + wavpaths_val[i],stop=SampleDuration*sr)
    for channel in range(num_audio_channels):
        if len(sig.shape)==1:
            sig = np.expand_dims(sig,-1)
        LM_val[i,:,:,channel]= librosa.feature.melspectrogram(sig[:,channel], 
                                       sr=sr,
                                       n_fft=NumFFTPoints,
                                       hop_length=HopLength,
                                       n_mels=NumFreqBins,
                                       fmin=0.0,
                                       fmax=sr/2,
                                       htk=True,
                                       norm=None)
    if i%700 == 699:
        print "%i/%i val samples done" % (i+1, len(wavpaths_val))
print "Done" 

LM_val=np.log(LM_val+1e-8)
LM_deltas_val = deltas(LM_val)
LM_deltas_deltas_val = deltas(LM_deltas_val)
LM_val = np.concatenate((LM_val[:,:,4:-4,:],LM_deltas_val[:,:,2:-2,:],LM_deltas_deltas_val),axis=-1)

print ('data dimension: ', LM_val.shape) 

In [None]:
#load and run the model
try:
    os.makedirs('plots/')
except OSError:
    if not os.path.isdir('plots/'):
        raise
png_name = 'plots/official_fold_task' + WhichTask + '_' + model_dir[:-1] + '.png'
savename = 'plots/official_fold_task' + WhichTask + '_' + model_dir[:-1] + '.output.csv'
        

if focal_loss:

    from DCASE_training_functions import categorical_focal_loss
    best_model = keras.models.load_model(model_path, 
                                         custom_objects={'categorical_focal_loss_fixed': categorical_focal_loss(gamma=gamma, alpha=alpha)})
    softmax = best_model.predict(LM_val)

else:

    if domain_aux:

        best_model = keras.models.load_model(model_path)
        from keras.models import Model

        exclude_da = Model(inputs = best_model.input, outputs = best_model.get_layer('activation_35').output)
        softmax = exclude_da.predict(LM_val)

    else:

        best_model = keras.models.load_model(model_path)
        softmax = best_model.predict(LM_val)

print (type(softmax))
y_pred_val = np.argmax(softmax,axis=1)


#get metrics
Overall_accuracy = np.sum(y_pred_val==y_val_labels)/float(LM_val.shape[0])
print("overall accuracy: ", Overall_accuracy)

plot_confusion_matrix(y_val_labels, y_pred_val, ClassNames,normalize=True,title=None, png_name=png_name)

conf_matrix = confusion_matrix(y_val_labels,y_pred_val)
conf_mat_norm_recall = conf_matrix.astype('float32')/conf_matrix.sum(axis=1)[:,np.newaxis]
conf_mat_norm_precision = conf_matrix.astype('float32')/conf_matrix.sum(axis=0)[:,np.newaxis]
recall_by_class = np.diagonal(conf_mat_norm_recall)
precision_by_class = np.diagonal(conf_mat_norm_precision)
mean_recall = np.mean(recall_by_class)
mean_precision = np.mean(precision_by_class)

print("per-class accuracy (recall): ",recall_by_class)
print("per-class precision: ",precision_by_class)
print("mean per-class recall: ",mean_recall)
print("mean per-class precision: ",mean_precision)
    
    
# create output.csv
scene_index_map={}
for line in scene_map_str.strip().split('\n'):
    ch, index = line.split()
    scene_index_map[int(index)] = ch

labels = [str(scene_index_map[c]) for c in y_pred_val]
filename = [a[6:] for a in wavpaths_val]

left = {'filename': filename,
        'scene_label': labels
}

left_df = pd.DataFrame(left)
if WhichTask =='1a':
    right_df = pd.DataFrame(softmax, columns = ['airport','bus','metro','metro_station','park','public_square','shopping_mall','street_pedestrian','street_traffic','tram'])
elif WhichTask =='1b':
    right_df = pd.DataFrame(softmax, columns = ['indoor','outdoor','transportation'])

merge = pd.concat([left_df, right_df], axis=1, sort=False)
merge.to_csv(savename, sep = '\t', index=False) 

print ('Done')