In [1]:
import os
from sklearn.model_selection import train_test_split
import numpy as np
import scipy
import soundfile as sf
from spafe.utils import vis
from spafe.features.mfcc import mfcc, imfcc
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
import pickle

In [102]:
# 规定一些参数
NOR_LEN = 32000
frame_size = 0.025   # 帧长  25ms
frame_stride = 0.01  # 帧移  10ms
NFFT = 512  # N点 FFT 短时傅里叶变换
nfilt = 40    # 梅尔滤波器的个数
num_ceps = 15  # 倒谱系数
n_components = 8
# coes = [13,12,11,10,9,8]

In [103]:
# 数据路径 
train_data_path1 = './audio/train/flac/'
train_label_path1 = './audio/train.txt'
# 文件名与标签的映射
signs1 = np.loadtxt(train_label_path1,delimiter=' ',dtype=str)


In [104]:
# 形成2个文件路径集合，用于生成2个GMM，一个是spoof伪，一个是bonafide真
files_spoof = []
files_bonafide = []
for s in signs1:
    if s[1]=='spoof':
        files_spoof.append(train_data_path1+s[0]+'.flac')
    else:
        files_bonafide.append(train_data_path1+s[0]+'.flac')

print('spoof训练集大小',len(files_spoof))
print('bonafide训练集大小',len(files_bonafide))

spoof训练集大小 1779
bonafide训练集大小 221


In [107]:
def normalizeVoiceLen(y,normalizedLen): # y：data  normalizedLen:32000
    '''
    音频文件归一化长度
    '''
    nframes=len(y)
    y = np.reshape(y,[nframes,1]).T  
    #归一化音频长度为2s,32000数据点
    if(nframes<normalizedLen):  # 用0补
        res=normalizedLen-nframes
        res_data=np.zeros([1,res],dtype=np.float32)
        y=np.c_[y,res_data]
    else:
        y=y[:,0:normalizedLen]
    return y[0]  # 向量


def get_mfcc_features(files):
    '''
    mfcc提取
    
    files flac文件路径
    '''
    mfcc_features = np.asarray(())
    i = 0
    for f in files:
        data,sample_rate = sf.read(f)
        data = normalizeVoiceLen(data,NOR_LEN)  
        features = mfcc(data,fs=sample_rate,nfft=NFFT,num_ceps=num_ceps)
        features = preprocessing.scale(features)
        if mfcc_features.size == 0:
            mfcc_features = features
        else:
            mfcc_features=np.append(mfcc_features,features,axis=0)
    return mfcc_features

def get_Mfcc(sr,audio):
    audio=normalizeVoiceLen(audio,NOR_LEN)
    features=mfcc(audio,fs=sr,nfft=NFFT,num_ceps=num_ceps)
    features=preprocessing.scale(features)
    return features

In [108]:
x_train_spoof,x_train_bonafide = get_mfcc_features(files_spoof),get_mfcc_features(files_bonafide)

In [109]:
print('spoof:',x_train_spoof.shape,'  bonafide:',x_train_bonafide.shape)

spoof: (352242, 15)   bonafide: (43758, 15)


In [68]:
# 模型训练
from sklearn.mixture import GaussianMixture as GMM
def train_gmm(n_components,covariance_type='diag'):
    gmm1 = GMM(n_components=n_components,covariance_type='diag',n_init=3)
    gmm_model_spoof = gmm1.fit(x_train_spoof)

    gmm2 = GMM(n_components=n_components,covariance_type='diag',n_init=3)
    gmm_model_bonafide = gmm2.fit(x_train_bonafide)
    return gmm_model_spoof,gmm_model_bonafide

gmm_model_spoof,gmm_model_bonafide = train_gmm(n_components)

In [None]:
# 写模型
picklefile_spoof = 'spoof.gmm'
picklefile_bonafide = 'bonafide.gmm'

pickle.dump(gmm_model_spoof,open('./'+picklefile_spoof,'wb'))
pickle.dump(gmm_model_bonafide,open('./'+picklefile_bonafide,'wb'))

In [111]:
# 在验证集预测
# 数据处理
test_data_path = './audio/dev/flac/'  # path to testing data
test_label_path = './audio/dev.txt'
signs_test = np.loadtxt(test_label_path,delimiter=' ',dtype=str)
labels_test = signs_test[:,1]
files_test = [os.path.join(test_data_path,s[0])+'.flac' for s in signs_test]

print('验证集长度：',len(files_test))

验证集长度： 2500


In [112]:
# # 打开模型
modelpath = './'
gmm_files=[os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm')]
models=[pickle.load(open(fname,'rb')) for fname in gmm_files]
results = [fname[len(modelpath):-4] for fname in gmm_files]
for fname in gmm_files:
    print("fname:",fname)
print(results)


fname: ./bonafide.gmm
fname: ./spoof.gmm
['bonafide', 'spoof']




In [113]:
### tp_s,fp_s,tn_s,fn_s=0,0,0,0
tp_b,fp_b,tn_b,fn_b=0,0,0,0
j=0
spoof_test_true=0
bonafide_test_true=0
spoof_test_total=0
bonafide_test_total=0

for i,file in enumerate(files_test):
    audio,sr=sf.read(file) 
    feature=get_Mfcc(sr,audio)
    scores = None
    # 2个模型，所以长度是2
    log_likelihood=np.zeros(len(models))
    # 循环加载这两个模型
    for k in range(len(models)):
        gmm = models[k]
        ##求概率
        scores=np.array(gmm.score(feature).reshape(1,-1))
        log_likelihood[k]=scores.sum()
    
    winner=np.argmax(log_likelihood)  # 对应下标

    
    if labels_test[i] == 'spoof':
        spoof_test_total+=1
    else:
        bonafide_test_total+=1
        
    if results[winner] == labels_test[i]:  # 预测正确
        j = j + 1
        if results[winner] == 'spoof':
            tp_s += 1
            spoof_test_true=spoof_test_true+1
        else:
            tp_b += 1
            bonafide_test_true=bonafide_test_true+1
    else:  # 预测错误
        if labels_test[i] == 'spoof': 
            fn_s += 1
            fp_b += 1
        else:
            fn_b += 1
            fp_s += 1
            
        

# precision=tp/tp+fp        
print('precision_spoof:',tp_s/(tp_s+fp_s))
print('precision_bonafide:',tp_b/(tp_b+fp_b))

print("j:",j,"len:",len(files_test))
print("accuracy：",j/len(files_test))
# recall tp/p

print("recall_spoof：",spoof_test_true,'/',spoof_test_total,'=',spoof_test_true/spoof_test_total)
print("recall_bonafide：",bonafide_test_true,'/',bonafide_test_total,'=',bonafide_test_true/bonafide_test_total)

precision_spoof: 0.9598732840549102
precision_bonafide: 0.39464882943143814
j: 2116 len: 2500
accuracy： 0.8464
recall_spoof： 1880 / 2242 = 0.8385370205173952
recall_bonafide： 236 / 258 = 0.9147286821705426


In [94]:
# def record_predict(gmm_model_spoof,gmm_model_bonafide,files_test):
#     tp_s,fp_s,tn_s,fn_s=0,0,0,0
#     tp_b,fp_b,tn_b,fn_b=0,0,0,0
#     j=0

#     spoof_test_true=0
#     bonafide_test_true=0
#     spoof_test_total=0
#     bonafide_test_total=0
#     results = ['spoof','bonafide'] # 0 假 1 真
    
#     for i,file in enumerate(files_test):
#         audio,sr = sf.read(file)
#         feature = get_Mfcc(sr,audio)
#         scores = None
#         log_likelihood = np.zeros(2)
#         scores = np.array(gmm_model_bonafide.score(feature).reshape(1,-1))
#         log_likelihood[1] = scores.sum()
#         scores = np.array(gmm_model_spoof.score(feature).reshape(1,-1))
#         log_likelihood[0] = scores.sum()

#         winner=np.argmax(log_likelihood)  # 对应下标

#         if labels_test[i] == 'spoof':
#             spoof_test_total+=1
#         else:
#             bonafide_test_total+=1

#         if results[winner] == labels_test[i]:  # 预测正确
#             j = j + 1
#             if results[winner] == 'spoof':
#                 tp_s += 1
#                 spoof_test_true+=1
#             else:
#                 tp_b += 1
#                 bonafide_test_true+=1
#         else:  # 预测错误
#             if labels_test[i] == 'spoof': 
#                 fn_s += 1
#                 fp_b += 1
#             else:
#                 fn_b += 1
#                 fp_s += 1
     
#     # precision=tp/tp+fp        
#     print('precision_spoof:',tp_s/(tp_s+fp_s))
#  #   print('precision_bonafide:',tp_b/(tp_b+fp_b))

#     print("j:",j,"len:",len(files_test))
#     print("accuracy：",j/len(files_test))
#     # recall tp/p

#     print("recall_spoof：",spoof_test_true,'/',spoof_test_total,'=',spoof_test_true/spoof_test_total)
#     print("recall_bonafide：",bonafide_test_true,'/',bonafide_test_total,'=',bonafide_test_true/bonafide_test_total)

In [100]:
# for c in coes:
#     gmm_model_spoof,gmm_model_bonafide = train_gmm(c)
#     print('n_components=',c)
#     record_predict(gmm_model_spoof,gmm_model_bonafide,files_test)