In [10]:
import tensorflow as tf
from tensorflow.keras import Model,Input
from tensorflow.keras.models import load_model
import os
import pandas as pd
import numpy as np
import random as rd
import numpy as np
from sklearn.metrics import confusion_matrix,accuracy_score
import joblib
import matplotlib.pyplot as plt
import librosa
import warnings
warnings.filterwarnings("ignore")

path_to_db_voice = "D:\\Utilisateurs\\ENAC\\projet_AIRBUS\\db\\voice\\"
path_to_db_plane = "D:\\Utilisateurs\\ENAC\\projet_AIRBUS\\db\\plane\\"
path_to_db_both = "D:\\Utilisateurs\\ENAC\\projet_AIRBUS\\db\\both\\"

In [7]:
rf = joblib.load("random_forest2.joblib")
conv = load_model('Conv2D.h5')

In [77]:
def gen_valid_test(db_folder_path,n_records):

    l_records = os.listdir(db_folder_path)
    sub_l_records = rd.sample(l_records,n_records)
    sample = rd.sample(sub_l_records,n_records)
    sp_valid_rf = []
    sp_valid_conv = []
    hops_valid = []
    for i in range(n_records):
        print('Building validation set {}/{}'.format(i,n_records),end='\r')
        signal,sample_rate = librosa.load(db_folder_path + sample[i])
        spectrum = librosa.stft(y=signal,center=False) 
        spectrum = spectrum.astype(float)
        # fig, ax = plt.subplots()
        # img = librosa.display.specshow(librosa.amplitude_to_db(spectrum),y_axis='log', x_axis='time', ax=ax)
        # ax.set_title('Power spectrogram')
        # fig.colorbar(img, ax=ax, format="%+2.0f dB")
        # plt.show()

        #pour le réseau convolutif
        conv_spectrum = spectrum[:,:100]
        if conv_spectrum.shape == (1025,100):
            sp_valid_conv.append(librosa.amplitude_to_db(conv_spectrum))

            #pour la random forest 
            rf_spectrum = np.abs(np.transpose(spectrum))
            sp_valid_rf.append(rf_spectrum)
            hops_valid.append(len(signal)//len(rf_spectrum))

    return sp_valid_rf,hops_valid,np.array(sp_valid_conv)

In [78]:
def has_voice2(x,hop,duration_sec_threshold,error_threshold):
    ratio = np.count_nonzero(x=='Voice')/len(x)
    duration_discrete_threshold = round(duration_sec_threshold * 22050 / hop)
    binary_pred = [0 if elt=='Plane' else 1 for elt in x]
    df_x = pd.DataFrame(binary_pred)
    df_sum = df_x.rolling(duration_discrete_threshold).mean()
    x_plot = [elt*hop/22050 for elt in range(len(x))] #x en seconds
    # plt.plot(x_plot,df_sum[0])
    # plt.show()
    return len(df_sum[df_sum[0]>error_threshold])>0

In [79]:
rf_plane_valid,hops_plane_valid,conv_plane_valid = gen_valid_test(path_to_db_plane,50)
rf_both_valid,hops_both_valid,conv_both_valid = gen_valid_test(path_to_db_both,50)

Building validation set 49/50

PARTIE RANDOM FOREST

In [80]:
rf_plane_y = np.array([0 for i in range(len(rf_plane_valid))])
rf_both_y = np.array([1 for i in range(len(rf_both_valid))])

In [81]:
prediction_for_records_plane = []
for i  in range(len(rf_plane_valid)):
    pred = rf.predict(rf_plane_valid[i])
    prediction_for_records_plane.append(has_voice2(pred,hops_plane_valid[i],1,0.8))

prediction_for_records_both = []
for i  in range(len(rf_plane_valid)):
    pred = rf.predict(rf_both_valid[i])
    prediction_for_records_both.append(has_voice2(pred,hops_both_valid[i],1,0.8))


accuracy_score(rf_plane_y,prediction_for_records_plane),accuracy_score(rf_both_y,prediction_for_records_both)

(0.6530612244897959, 0.9795918367346939)

PARTIE RESEAU CONVOLUTIF

In [82]:
conv_plane_y = np.array([0 for i in range(len(conv_plane_valid))])
conv_both_y = np.array([1 for i in range(len(conv_both_valid))])

In [89]:
prediction_for_records_plane = conv(conv_plane_valid).numpy().astype(int)
prediction_for_records_both = conv(conv_both_valid).numpy().astype(int)
# prediction_for_records_plane
accuracy_score(conv_plane_y,prediction_for_records_plane),accuracy_score(conv_both_y,prediction_for_records_both)

(0.8163265306122449, 0.8979591836734694)

In [140]:
def mix_of_models_valid(rf,conv):

    rf_plane_valid,hops_plane_valid,conv_plane_valid = gen_valid_test(path_to_db_plane,60)
    rf_both_valid,hops_both_valid,conv_both_valid = gen_valid_test(path_to_db_both,60)
    y_plane = np.array([0 for i in range(len(rf_plane_valid))])
    y_both = np.array([1 for i in range(len(rf_both_valid))])

    rf_valid = np.concatenate([rf_plane_valid,rf_both_valid])
    hops_valid = np.concatenate([hops_plane_valid,hops_both_valid])
    conv_valid = np.concatenate([conv_plane_valid,conv_both_valid])

    n = len(rf_valid)
    print(len(rf_valid),len(conv_both_valid)+len(conv_plane_valid))
    y_valid = np.concatenate([y_plane,y_both])

    rf_prediction = []
    for i  in range(n):
        pred = rf.predict(rf_valid[i])
        rf_prediction.append(has_voice2(pred,hops_valid[i],1,0.8))

    conv_prediction = conv(conv_valid).numpy().astype(int)

    final_pred = []
    for i in range(n):
        if rf_prediction[i]==1 and conv_prediction[i]==0: 
            final_pred.append(0)
        elif rf_prediction[i]==1 and conv_prediction[i]==1:
            final_pred.append(1)
        elif rf_prediction[i]==0 and conv_prediction[i]==0:
            final_pred.append(0)
        else :
            final_pred.append(1) #c'est nul

    return final_pred,y_valid,rf_prediction,conv_prediction

In [141]:
final_pred,y_valid,rf_prediction,conv_prediction = mix_of_models_valid(rf,conv)

119 119g validation set 59/60


In [142]:
print("Les deux modèles combinés:",accuracy_score(y_valid,final_pred))
print("Random Forest seule:",accuracy_score(y_valid,rf_prediction))
print("Conv2D seul:",accuracy_score(y_valid,conv_prediction))

Les deux modèles combinés: 0.8739495798319328
Random Forest seule: 0.7899159663865546
Conv2D seul: 0.8823529411764706
