In [1]:
import pandas as pd
from importlib import resources
import numpy as np
import os
from SoundArtificialityDetection.ASVspoof2019protocol import PA_CM, PA_CM_Audio

# データセットのセッティング

- すでに ASVspoof2019のPA.zip を持っているならば，--> 1 へ
- 持っていないならば，--> 2 へ

1. 利用する python の `lib/site-package/SoundArtificialityDetection` フォルダに `PA.zip` を置く．
   - そこで展開する．`PA`フォルダが作成され，その下に各種データが置かれる．
   - その`PA` フォルダを `lib/site-package/SoundArtificialityDetection/_data/` フォルダに移動させる．
   
   結果として，`lib/site-package/SoundArtificialityDetection/_data/` にデータが展開される形になる．

2. SoundArtificialityDetection.ASVspoof2019protocol の set_data() メソッドを実行して，自動的にダウンロード・展開させる．（データセットが巨大なため，30時間ほどかかります．）

In [2]:
set_data()

Downloading PA.zip:   1%|          | 220M/17.7G [15:31<17:32:32, 276kB/s]  

KeyboardInterrupt: 

Downloading PA.zip:   1%|          | 220M/17.7G [15:50<17:32:32, 276kB/s]

# Phisical Attack, Counter Measure 



## protocolごとのデータリストの読み込み

In [3]:
train = PA_CM(protocol='train')
eval = PA_CM(protocol='eval')
dev = PA_CM(protocol='dev')


In [4]:
df_train = pd.DataFrame(dict(
    speaker = train.SPEAKER_names[train.SPEAKER],
    sfile = train.BASE_names,
    environment = train.ENVIRONMENT_names[train.ENVIRONMENT],
    attack = train.ATTACK_names[train.ATTACK],
    key = train.KEY_names[train.KEY]
))
df_eval = pd.DataFrame(dict(
    speaker = eval.SPEAKER_names[eval.SPEAKER],
    sfile = eval.BASE_names,
    environment = eval.ENVIRONMENT_names[eval.ENVIRONMENT],
    attack = eval.ATTACK_names[eval.ATTACK],
    key = eval.KEY_names[eval.KEY]
))
df_dev = pd.DataFrame(dict(
    speaker = dev.SPEAKER_names[dev.SPEAKER],
    sfile = dev.BASE_names,
    environment = dev.ENVIRONMENT_names[dev.ENVIRONMENT],
    attack = dev.ATTACK_names[dev.ATTACK],
    key = dev.KEY_names[dev.KEY]
))
df = pd.concat([df_train,df_eval,df_dev], ignore_index=True)

df.sample(frac=1).head(10)

Unnamed: 0,speaker,sfile,environment,attack,key
35242,PA_0094,PA_T_0035243,bcb,BB,spoof
125354,PA_0009,PA_E_0072766,bba,AA,spoof
68623,PA_0014,PA_E_0014908,bba,AB,spoof
84568,PA_0007,PA_E_0031176,aab,AA,spoof
150901,PA_0006,PA_E_0098843,cba,-,bonafide
96087,PA_0021,PA_E_0042921,ccb,CA,spoof
48717,PA_0084,PA_T_0048718,cca,AA,spoof
180106,PA_0044,PA_E_0128660,abc,CC,spoof
63075,PA_0028,PA_E_0009246,caa,AC,spoof
139432,PA_0025,PA_E_0087139,acc,CA,spoof


## 条件によるファイルリスト獲得

In [None]:
a = train.query_byIDname(key_name='spoof',attack_name='CC',environment_name='ccc') # 条件にあてはまるファイル(PA_CM_Audio形式)をNDArrayに出力

queries=array([None, 'ccc', 'CC', 'spoof'], dtype=object),qid={'environment': array([26]), 'attack': array([9]), 'key': array([1])}
sfile='PA_T_0053729'=>
=>sfile='PA_T_0053729'
sfile='PA_T_0053730'=>
=>sfile='PA_T_0053730'
sfile='PA_T_0053731'=>
=>sfile='PA_T_0053731'
sfile='PA_T_0053732'=>
=>sfile='PA_T_0053732'
sfile='PA_T_0053733'=>
=>sfile='PA_T_0053733'
sfile='PA_T_0053734'=>
=>sfile='PA_T_0053734'
sfile='PA_T_0053735'=>
=>sfile='PA_T_0053735'
sfile='PA_T_0053736'=>
=>sfile='PA_T_0053736'
sfile='PA_T_0053737'=>
=>sfile='PA_T_0053737'
sfile='PA_T_0053738'=>
=>sfile='PA_T_0053738'
sfile='PA_T_0053739'=>
=>sfile='PA_T_0053739'
sfile='PA_T_0053740'=>
=>sfile='PA_T_0053740'
sfile='PA_T_0053741'=>
=>sfile='PA_T_0053741'
sfile='PA_T_0053742'=>
=>sfile='PA_T_0053742'
sfile='PA_T_0053743'=>
=>sfile='PA_T_0053743'
sfile='PA_T_0053744'=>
=>sfile='PA_T_0053744'
sfile='PA_T_0053745'=>
=>sfile='PA_T_0053745'
sfile='PA_T_0053746'=>
=>sfile='PA_T_0053746'
sfile='PA_T_0053747'=>
=>sfile='PA_T_005

In [None]:
len(a)

272

In [None]:
a[0].show()

protocol: train
path: datadir/ASVspoof2019_PA_train/flac/PA_T_0053729.flac
sfile: PA_T_0053729
speaker_name: ['PA_0079']
attack_name: ['CC']
environment_name: ['ccc']
key_name: ['spoof']


In [None]:
a[1].show()

protocol: train
path: datadir/ASVspoof2019_PA_train/flac/PA_T_0053730.flac
sfile: PA_T_0053730
speaker_name: ['PA_0079']
attack_name: ['CC']
environment_name: ['ccc']
key_name: ['spoof']


## 音響信号ファイルの読み込み

In [None]:
# ID nameによる音響信号ファイルのPA_CM_Audio objectの作成
snd = PA_CM_Audio(train, speaker_name = 'PA_0079', environment_name = 'aaa', attack_name = 'CC', key_name = 'spoof')

# 音響信号ファイルの名前によるPA_CM_Audio objectの作成
snd2 = PA_CM_Audio(train, sfile='PA_T_0007085')

# 実際に音響信号ファイルを読み込むときは，readメソッド．self.xにデータが入る．
snd2.read()

0:PA_T_0007085
1:PA_T_0007086
2:PA_T_0007087
3:PA_T_0007088
4:PA_T_0007089
=>sfile='PA_T_0007086'
=>sfile='PA_T_0007085'
libsndfile echoes "Error : flac decoder lost sync.". then use audioread


## PA_CM_Audio objectの情報を表示

In [None]:
snd.show()

protocol: train
path: datadir/ASVspoof2019_PA_train/flac/PA_T_0007086.flac
sfile: PA_T_0007086
speaker_name: ['PA_0079']
attack_name: ['CC']
environment_name: ['aaa']
key_name: ['spoof']
