In [1]:
import os
from os import listdir
from os.path import isfile, join
import pandas as pd

# first, we need to import our essentia module. It is aptly named 'essentia'!
import essentia

# as there are 2 operating modes in essentia which have the same algorithms,
# these latter are dispatched into 2 submodules:
import essentia.standard
import essentia.streaming

# pylab contains the plot() function, as well as figure, etc... (same names as Matlab)
from pylab import plot, show, figure, imshow
%matplotlib inline
import matplotlib.pyplot as plt

from essentia.standard import *

import numpy as np

import theano
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix

from keras.layers import LSTM, Dense, TimeDistributed
from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
path = os.getcwd()
pa_train_path = path + "/PA/ASVspoof2019_PA_train/flac/"
pa_test_path = path + "/PA/ASVspoof2019_PA_dev/flac/"
pa_train_prot_path = path + "/PA/ASVspoof2019_PA_cm_protocols/ASVspoof2019.PA.cm.train.trn.txt"
pa_test_prot_path = path + "/PA/ASVspoof2019_PA_cm_protocols/ASVspoof2019.PA.cm.dev.trl.txt"

In [3]:
train = pd.read_csv(pa_train_prot_path, sep=" ", header=None, names=['speaker','audio_name',"method",'bad','output'],usecols=['speaker','audio_name',"method",'output'])
test = pd.read_csv(pa_test_prot_path, sep=" ", header=None, names=['speaker','audio_name',"method",'bad','output'],usecols=['speaker','audio_name',"method",'output'])

In [4]:
train.head()

Unnamed: 0,speaker,audio_name,method,output
0,PA_0079,PA_T_0000001,aaa,bonafide
1,PA_0079,PA_T_0000002,aaa,bonafide
2,PA_0079,PA_T_0000003,aaa,bonafide
3,PA_0079,PA_T_0000004,aaa,bonafide
4,PA_0079,PA_T_0000005,aaa,bonafide


In [5]:
test.head()

Unnamed: 0,speaker,audio_name,method,output
0,PA_0069,PA_D_0000001,aaa,bonafide
1,PA_0069,PA_D_0000002,aaa,bonafide
2,PA_0069,PA_D_0000003,aaa,bonafide
3,PA_0069,PA_D_0000004,aaa,bonafide
4,PA_0069,PA_D_0000005,aaa,bonafide


In [6]:
mfccs_files = []
melbands_files = []
melbands_log_files = []
audio_file_names = []

In [7]:
# we start by instantiating the audio loader:

count = 0

print len(train)

w = Windowing(type = 'hann')
spectrum = Spectrum()  # FFT() would return the complex FFT, here we just want the magnitude spectrum
mfcc = MFCC()

logNorm = UnaryOperator(type='log')
loader = essentia.standard.MonoLoader()
for audio_name in train['audio_name']:
    if (count % 1000 == 0):
        print count
    count = count + 1
    this_path = (pa_train_path + audio_name + ".flac")
    if (this_path not in audio_file_names):
        loader.configure(filename = this_path)

        # and then we actually perform the loading:
        audio = loader()

        frame = audio[44100 : 44100 + 1024]
        spec = spectrum(w(frame))
        mfcc_bands, mfcc_coeffs = mfcc(spec)

        mfccs = []
        melbands = []
        melbands_log = []

        for frame in FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
            mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
            mfccs.append(mfcc_coeffs)
            melbands.append(mfcc_bands)
            melbands_log.append(logNorm(mfcc_bands))

        # transpose to have it in a better shape
        # we need to convert the list to an essentia.array first (== numpy.array of floats)
        mfccs = essentia.array(mfccs).T
        melbands = essentia.array(melbands).T
        melbands_log = essentia.array(melbands_log).T

        mfccs_files.append(mfccs) 
        melbands_files.append(melbands) 
        melbands_log_files.append(melbands_log) 
        audio_file_names.append(this_path)
   
train["audio_file_name"] = audio_file_names
train["mfccs_files"] = mfccs_files
train["melbands_file"] = melbands_files
train["melbands_log_file"] =  melbands_log_files

54000
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000


In [8]:
train.to_csv("jake_data/train_all.csv")

In [10]:
train.to_pickle("jake_data/train_all.pkl")

In [15]:
print(len(test))

29700


In [11]:
# we start by instantiating the audio loader:

mfccs_files = []
melbands_files = []
melbands_log_files = []
audio_file_names = []

count = 0

print len(test)

w = Windowing(type = 'hann')
spectrum = Spectrum()  # FFT() would return the complex FFT, here we just want the magnitude spectrum
mfcc = MFCC()

loader = essentia.standard.MonoLoader()

logNorm = UnaryOperator(type='log')

for audio_name in test['audio_name']:
    if (count % 1000 == 0):
        print count
    count = count + 1
    this_path = (pa_test_path + audio_name + ".flac")
    if (this_path not in audio_file_names):
        loader.configure(filename = this_path)

    # and then we actually perform the loading:
    audio = loader()
    
    frame = audio[44100 : 44100 + 1024]
    spec = spectrum(w(frame))
    mfcc_bands, mfcc_coeffs = mfcc(spec)
    
    mfccs = []
    melbands = []
    melbands_log = []

    for frame in FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
        mfccs.append(mfcc_coeffs)
        melbands.append(mfcc_bands)
        melbands_log.append(logNorm(mfcc_bands))

    # transpose to have it in a better shape
    # we need to convert the list to an essentia.array first (== numpy.array of floats)
    mfccs = essentia.array(mfccs).T
    melbands = essentia.array(melbands).T
    melbands_log = essentia.array(melbands_log).T
    
    mfccs_files.append(mfccs) 
    melbands_files.append(melbands) 
    melbands_log_files.append(melbands_log) 
    audio_file_names.append(this_path)
   
test["audio_file_name"] = audio_file_names
test["mfccs_files"] = mfccs_files
test["melbands_file"] = melbands_files
test["melbands_log_file"] =  melbands_log_files

29700
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000


In [13]:
test.to_csv("jake_data/test_all.csv")
test.to_pickle("jake_data/test_all.pkl")