## Environment

2020.07 colab Environment based.

Ubuntu 18.04.3 LTS<br>
Python version 3.6.9<br>
pandas version 1.0.5<br>
praat-parselmouth 0.3.3<br>

### Install praat-parselmouth

In [None]:
!pip install praat-parselmouth

Collecting praat-parselmouth
[?25l  Downloading https://files.pythonhosted.org/packages/09/7b/9fa1172a63b6277603d27bb5613559b5a8888f58e68c1698017b87b0061d/praat_parselmouth-0.3.3-cp36-cp36m-manylinux1_x86_64.whl (9.0MB)
[K     |████████████████████████████████| 9.0MB 2.7MB/s 
Installing collected packages: praat-parselmouth
Successfully installed praat-parselmouth-0.3.3


In [None]:
import glob
import parselmouth
import pandas as pd
import os

### Get File directory (colab example)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
# 현재 디렉토리/sound 폴더 안에 .wav 파일이 존재함. 파일 총 개수 877개
len(glob.glob(os.getcwd()+'/sound/*.wav'))

877

In [None]:
# 해당 디렉토리의 파일명을 리스트로 저장.
lists = glob.glob(os.getcwd()+'/sound/*.wav')
lists.sort(key = lambda x: (len(x), x))
lists[:10]

['/content/drive/My Drive/성대계정/sound/0.wav',
 '/content/drive/My Drive/성대계정/sound/1.wav',
 '/content/drive/My Drive/성대계정/sound/2.wav',
 '/content/drive/My Drive/성대계정/sound/3.wav',
 '/content/drive/My Drive/성대계정/sound/4.wav',
 '/content/drive/My Drive/성대계정/sound/5.wav',
 '/content/drive/My Drive/성대계정/sound/6.wav',
 '/content/drive/My Drive/성대계정/sound/7.wav',
 '/content/drive/My Drive/성대계정/sound/8.wav',
 '/content/drive/My Drive/성대계정/sound/9.wav']

In [None]:
# 결과를 저장할 디렉토리
path = os.getcwd()+'/sound/'

In [None]:
# 실패한 파일이름을 저장하는 리스트
failed_list = []

# pitch / formant 값을 저장할 데이터프레임
data = pd.DataFrame({
    "times":[],
    "F0(pitch)":[],
    "F1":[],
    "F2":[],
    'F3':[],
    "F4":[],
    "F5":[],
    "filename":[]
    })

# Formant 1 ~ 5까지 값을 구하기 위한 parameter
formants_value = ['F1','F2',"F3","F4","F5"]

# 각각의 음원파일을 for문으로 수행
for iters, soundpath in enumerate(lists, 1):
    try:
        # .wav 파일을 분석 가능한 Sound객체로 변환
        Sound = parselmouth.Sound(soundpath)

        # 0.1초 단위로 Formant를 계산한 객체
        formant = Sound.to_formant_burg(time_step = 0.1)
        
        # F0(pitch) 값 계산한 객체
        pitch = Sound.to_pitch()
        
        # Formant에서 사용한 0.1초 단위 timestamp를 dataframe으로 저장
        df = pd.DataFrame({"times":formant.ts()})
        
        # Formant Timestamp(0.1초 단위) 별 F1 ~ F5값 연산
        for idx, col in enumerate(formants_value, 1):
            df[col] = df['times'].map(lambda x: formant.get_value_at_time(formant_number = idx, time = x))
        
        # Formant Timestamp 별 Pitch값 계산
        df['F0(pitch)'] = df['times'].map(lambda x: pitch.get_value_at_time(time = x))
        
        # 파일명 저장
        df['filename'] = soundpath.split(path)[-1]
        
        # dataframe 업데이트
        data = data.append(df)
        
        # 진행상황 확인을 위한 로그
        if iters % 10 == 0:
            print("success: ", soundpath.split(path)[-1])
        
        # iteration 100 단위로 지금까지 실행한 음원데이터를 csv로 저장
        # colab의 런타임이 길지 않기 때문에, 지금까지 실행한 결과가 서버에서 지워질 수 있기 때문.
        if iters % 100 == 0:
            print(data)
            data.to_csv(str(iters) + ".csv")
        
    except Exception as e:
        # Exception 원인 출력
        print(e)
        # 실패한 파일 이름 저장
        failed_list.append(soundpath.split(path)[-1])
        # 어떤 파일에 문제가 있는지 확인
        print("fail: ", soundpath.split(path)[-1])

# 모든 작업이 끝나면, 최종 데이터를 finish.csv라는 이름으로 저장
data.to_csv('finish.csv')

Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/0.wav”.
fail:  0.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/1.wav”.
fail:  1.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/2.wav”.
fail:  2.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/3.wav”.
fail:  3.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/4.wav”.
fail:  4.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/5.wav”.
fail:  5.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/6.wav”.
fail:  6.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/7.wav”.
fail:  7.wav
Not an audio file.
Sound not read from sound file “/content/drive/My Drive/성대계정/sound/8.wa

In [None]:
data
# pitch.get_value_at_time(time = 0.01)

Unnamed: 0,times,F0(pitch),F1,F2,F3,F4,F5,filename
0,0.060,,668.956919,1573.228732,2716.867429,3584.648268,,21.wav
1,0.160,235.742471,481.937873,2317.762229,3289.896208,3725.789058,,21.wav
2,0.260,200.052051,509.083329,1341.888272,1567.194317,3386.816847,3905.968303,21.wav
3,0.360,191.497127,721.980693,1718.222394,3060.782565,3561.439324,,21.wav
4,0.460,192.958886,413.574857,1452.808235,2201.513314,3574.820304,4006.706467,21.wav
...,...,...,...,...,...,...,...,...
1297,129.735,,724.083957,2025.094259,2964.735146,5449.009564,,22.wav
1298,129.835,,1499.099465,2490.380936,5437.141035,,,22.wav
1299,129.935,,1499.100308,2490.410592,5437.140847,,,22.wav
1300,130.035,,1499.100445,2490.441877,5437.140680,,,22.wav


In [None]:
# 실패했던 파일을 json 형식 파일로 저장.
import json
with open('failed_file.json', 'w') as f:
    json.dump(failed_list, f, indent = 4)