In [14]:
import os
import shutil
import pandas as pd
import numpy as np
import librosa 
import librosa.display
import soundfile as sf
import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm.notebook import tqdm
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score

In [19]:
#預強調
def pre_emphasis(audio,coef):
    emphasized_signal = librosa.effects.preemphasis(y=audio,coef=coef)

    return emphasized_signal

In [20]:
# #MFCC 
def extract_mfcc(audio,sr,n_mfcc):
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)#列=特徵維度，行=時間維度
    return mfcc

In [21]:
def feature_with_delta(feature):
    delta = librosa.feature.delta(data=feature,width=7, order=1)
    delta_delta = librosa.feature.delta(data=feature, width=7,order=2)
    
    feature = np.concatenate((feature, delta, delta_delta), axis=0)
    return feature

In [22]:
#載入模型
model = load_model("SEResNet.h5")
#載入樣本

sample_audio_path = '真音/LA_T_1341447.flac'#示範
audio, sr = librosa.load(sample_audio_path, mono=True, sr=None)
audio = pre_emphasis(audio=audio,coef=0.97)

#將音頻長度統一至6秒(16000*6)
fixed_audio = audio
max_len = 16000*6

while len(fixed_audio) <= max_len:
    fixed_audio = np.concatenate((fixed_audio,audio),axis=0)
audio = fixed_audio[:max_len]
feature = extract_mfcc(audio=audio,sr=sr,n_mfcc=20)
feature = feature_with_delta(feature=feature)
# 使用reshape方法添加额外的维度
X = feature.T.reshape((1, feature.shape[1], feature.shape[0]))
pred = model.predict(X)
pred_classes = np.argmax(pred, axis=1)
print('此樣本為真音') if pred_classes==0 else print('此樣本為假音')

此樣本為真音
