# "Match a determiner with F0"

> "Towards picking the most prominent using mean F0"

- toc: false
- branch: master
- hidden: true
- categories: [pyin, f0, determiners]

In [1]:
import librosa

In [2]:
import numpy as np

In [3]:
WAVFILE = "/content/spkslt_98.wav"

In [4]:
audio, sr = librosa.load(WAVFILE)

In [5]:
f0, voiced_flag, voiced_probs = librosa.pyin(y=audio,
                                             fmin=librosa.note_to_hz('C2'),
                                             fmax=librosa.note_to_hz('C7'),
                                             pad_mode='constant',
                                             n_thresholds = 10,
                                             max_transition_rate = 100,
                                             sr=sr)

In [6]:
onsets = librosa.onset.onset_detect(y=audio, sr=sr)

# Helpers

In [7]:
def load_tsv(filename):
    output = []
    with open(filename) as inf:
        for line in inf.readlines():
            parts = line.strip().split("\t")
            output.append((float(parts[0]), float(parts[1]), parts[2]))
    return output

In [8]:
def get_detdem(tsvish):
    determiners = ["this", "that", "these", "those"]

    output = []
    for part in tsvish:
        if part[2] in determiners:
            output.append(part)
    return output

# TSV data

In [9]:
tsvcontent = load_tsv("/content/spkslt_98.tsv")

In [10]:
get_detdem(tsvcontent)

[(0.78, 1.04, 'this'), (4.84, 5.05, 'that'), (21.58, 21.97, 'this')]

In [11]:
detdem = get_detdem(tsvcontent)

In [12]:
starts = np.array([x[0] for x in detdem])

In [13]:
ends = np.array([x[1] for x in detdem])

In [14]:
detdem

[(0.78, 1.04, 'this'), (4.84, 5.05, 'that'), (21.58, 21.97, 'this')]

# Frames vs. times

In [15]:
!ffprobe -i {WAVFILE} 2>&1|grep Duration

  Duration: 00:00:27.44, bitrate: 1058 kb/s


In [16]:
librosa.time_to_frames(np.array([0.0, 24.62, 27.44]), sr=sr)

array([   0, 1060, 1181])

In [17]:
len(f0)

1182

# Frames

In [18]:
frstarts = librosa.time_to_frames(starts, sr=sr)

In [19]:
frends = librosa.time_to_frames(ends, sr=sr)

In [20]:
frstarts, frends

(array([ 33, 208, 929]), array([ 44, 217, 946]))

In [21]:
for z in zip(frstarts, frends):
    print(np.nanmean(f0[z[0]:z[1]]))

193.80076757544543
172.74165512508745
201.8456084757203
