# Imports e definições

In [1]:
import plotly.express as px
import plotly.graph_objects as go
# from sklearn.model_selection import train_test_split
from tqdm import tqdm
# from torch.utils.data import Dataset, DataLoader
# import torch.optim as optim
# from torcheval.metrics.functional import multiclass_f1_score
# from torcheval.metrics.functional import binary_accuracy
# from torchinfo import summary
import torch
from torch import nn

In [2]:
import numpy as np
import pandas as pd
from scipy import signal
from scipy.io import wavfile
from scipy.signal import find_peaks, peak_widths
from scipy.special import softmax
import os
import pickle

In [3]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [140]:
def modelFon(x, W1, b1, W2, b2):
    N = x.shape[0]
    v1 = np.tanh(W1.dot(x.transpose()) + np.outer(b1, np.ones(N)))
    v2 = W2.dot(v1) + np.outer(b2, np.ones(N))
    return softmax(v2, axis=0).transpose()

In [141]:
def modelLet(x, W1, b1, W2, b2):
    N = x.shape[0]
    v1 = np.tanh(W1.dot(x.transpose()) + np.outer(b1, np.ones(N)))
    return sigmoid(W2.dot(v1) + np.outer(b2, np.ones(N))).flatten()

In [4]:
def LPC3janelas(s, fa):
    N = s.shape[0]
    if (N >= int(np.round(0.02*fa))) & (N <= int(np.round(0.2*fa))):
        Janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-Janela)/2))-1, N-Janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((Janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[k:k+Janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        return CP
    else:
        return []

In [5]:
def CP2vec(CP, fa):
    w = np.arange(5000/fa*np.pi, 50/fa*np.pi, -100/fa*np.pi)
    P = np.zeros((len(w), CP.shape[1]))
    for k in range(CP.shape[1]):
        h = np.hstack((1, -np.flipud(CP[:,k])))
        for i in range(len(w)):
            P[i,k] = abs(1/np.sum(np.exp(-1j*w[i]*np.arange(len(h)))*h))
    P = np.log10(P/P.max() + 0.01) + 2
    P = P.flatten(order='F')/np.sqrt(np.sum(P.flatten()**2))
    return P

In [6]:
def lempelziv76(s):
	K = len(np.unique(s))
	N = len(s)
	L = 1
	dic = [s[0]]
	p = 1
	L = L+1
	while p+L < N:
		pos = ''.join(s[:p+L-1]).find(''.join(s[p:p+L]))
		if pos == -1:
			dic.append(''.join(s[p:p+L]))
			p = p+L
			L = 1
		else:
			L = L+1
	dic.append(''.join(s[p:]))
	# Complexidade de Lempel-Ziv, em bits por símbolo:
	#cLZ = length(dic)/C
	cLZ = len(dic)*(np.log2(len(dic))+1)/N
	return dic, cLZ

In [7]:
def perfEner(s, fa):
    N = s.shape[0]
    janela = np.round(0.1*fa).astype(int)
    passo = np.round(0.03*fa).astype(int)
    N2 = (N-janela)//passo
    E = np.zeros(N2)
    for i in range(N2):
        saux = s[i*passo:i*passo+janela]
        E[i] = (saux**2).sum()
    return E

In [8]:
def segmenta(E, pfala):
    segs = np.empty((2,0), dtype=int)
    qp = np.zeros(pfala.shape[1])
    for i in range(pfala.shape[1]):
        aux = E[pfala[0,i]:pfala[1,i]]
        peaks, _ = find_peaks(aux, distance=4)
        if peaks.size == 0: continue
        proe = signal.peak_prominences(aux/max(aux), peaks)[0]
        if np.any(proe/max(proe)<0.01): peaks = peaks[proe>0.01]
        _, _, ini, fim = peak_widths(aux, peaks, rel_height=0.7)
        ini = np.round(ini).astype(int)
        fim = np.round(fim).astype(int)
        # if np.any((fim-ini)<4):
        #     peaks = peaks[(fim-ini)>3]
        #     _, _, ini, fim = peak_widths(aux, peaks, rel_height=0.7)
        #     ini = np.round(ini).astype(int)
        #     fim = np.round(fim).astype(int)
        # inds = np.nonzero(fim[:-1]>ini[1:])[0]
        # fim[inds] = ini[inds+1]
        qp[i] = len(peaks)
        segs = np.hstack((segs, np.stack((ini,fim)) + pfala[0,i]))
    return segs, qp

In [149]:
def wav2ener2fon2(s, fa):
    E = perfEner(s, fa)
    pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
    nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
    pz = np.nonzero(pz)[0]
    nz = np.nonzero(nz)[0] + 1
    if nz[0]<=pz[0]: nz = nz[1:]
    if nz[-1]<=pz[-1]: pz = pz[:-1]
    pfala = np.stack((pz,nz))
    segs, _ = segmenta(E, pfala)
    P2 = np.zeros((segs.shape[1], 150))
    for i in range(segs.shape[1]):
        na = np.round(segs[0,i]*fa*0.03).astype(int)
        N = np.round((segs[1,i]-segs[0,i])*fa*0.03).astype(int)
        janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-janela)/2))-1, N-janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[na+k:na+k+janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        P2[i,] = CP2vec(CP, fa).transpose()
    yp = modelFon(P2, Wf1, bf1, Wf2, bf2)
    inds = yp.argmax(axis=1)
    aux = F0.values[inds].flatten()
    fonemas = np.zeros(E.shape[0], dtype=np.str_)
    fonemas[:] = '0'
    for i in range(segs.shape[1]):
        fonemas[segs[0,i]:segs[1,i]] = aux[i]
    return fonemas

In [10]:
def filtrarSilencios(s, fa):
    E = perfEner(s, fa)
    passo = np.round(0.03*fa).astype(int)
    pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
    nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
    pz = np.nonzero(pz)[0]
    nz = np.nonzero(nz)[0] + 1
    if nz[0]<=pz[0]: nz = nz[1:]
    if nz[-1]<=pz[-1]: pz = pz[:-1]
    fmed = (nz-pz).mean()
    pausas = pz[1:] - nz[:-1]
    flags = np.ones(len(s), dtype=bool)
    for i in np.nonzero(pausas>fmed*2)[0]:
        ini = (nz[i]+2*int(fmed))*passo
        fin = int(pz[i+1])*passo
        flags[ini:fin] = False
    s = s[flags]
    return s

In [151]:
def taxaLetras(s, fa):
    E = perfEner(s, fa)
    pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
    nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
    pz = np.nonzero(pz)[0]
    nz = np.nonzero(nz)[0] + 1
    if nz[0]<=pz[0]: nz = nz[1:]
    if nz[-1]<=pz[-1]: pz = pz[:-1]
    pfala = np.stack((pz,nz))
    P2 = np.zeros((pfala.shape[1], 150))
    for i in range(pfala.shape[1]):
        na = np.round(pfala[0,i]*fa*0.03).astype(int)
        N = np.round((pfala[1,i]-pfala[0,i])*fa*0.03).astype(int)
        janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-janela)/2))-1, N-janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[na+k:na+k+janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        P2[i,] = CP2vec(CP, fa).transpose()
    yp = modelLet(P2, Wl1, bl1, Wl2, bl2)
    return (yp>0.5).sum()/len(yp)

In [12]:
class mlpFon(nn.Module):
    def __init__(self, raw_dim, dim1, dim2):
        super().__init__()
        self.all_layers = torch.nn.Sequential(
            # Encoder
            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim1),
            nn.Tanh(),
            nn.Linear(dim1, dim2),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        logits = self.all_layers(x)
        return logits

In [13]:
class mlpLetras(nn.Module):
    def __init__(self, raw_dim, dim1, dim2):
        super().__init__()
        self.all_layers = torch.nn.Sequential(
            # Encoder
            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim1),
            nn.Tanh(),
            nn.Linear(dim1, dim2),
            nn.Sigmoid()
        )

    def forward(self, x):
        logits = self.all_layers(x)
        return logits

In [116]:
raw_dim = 150
dim1 = 150
dim2 = 174
modelFon = mlpFon(raw_dim, dim1, dim2).to(device)
modelFon.load_state_dict(torch.load('modeloMSE.pht'))
modelFon.eval()

mlpFon(
  (all_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=150, out_features=150, bias=True)
    (2): Tanh()
    (3): Linear(in_features=150, out_features=174, bias=True)
    (4): Softmax(dim=1)
  )
)

In [114]:
raw_dim = 150
dim1 = 30
dim2 = 1
modelLet = mlpLetras(raw_dim, dim1, dim2).to(device)
modelLet.load_state_dict(torch.load('modeloLETRAS2.pht'))
modelLet.eval()

mlpLetras(
  (all_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=150, out_features=30, bias=True)
    (2): Tanh()
    (3): Linear(in_features=30, out_features=1, bias=True)
    (4): Sigmoid()
  )
)

In [None]:
Wl1 = modelLet.all_layers[1].state_dict()['weight'].cpu().detach().numpy()
bl1 = modelLet.all_layers[1].state_dict()['bias'].cpu().detach().numpy()
Wl2 = modelLet.all_layers[3].state_dict()['weight'].cpu().detach().numpy()
bl2 = modelLet.all_layers[3].state_dict()['bias'].cpu().detach().numpy()
with open('paramLet.pkl', 'wb') as f:
    pickle.dump([Wl1, bl1, Wl2, bl2], f)

In [None]:
Wf1 = modelFon.all_layers[1].state_dict()['weight'].cpu().detach().numpy()
bf1 = modelFon.all_layers[1].state_dict()['bias'].cpu().detach().numpy()
Wf2 = modelFon.all_layers[3].state_dict()['weight'].cpu().detach().numpy()
bf2 = modelFon.all_layers[3].state_dict()['bias'].cpu().detach().numpy()
with open('paramFon.pkl', 'wb') as f:
    pickle.dump([Wf1, bf1, Wf2, bf2], f)

In [None]:
with open('paramLet.pkl', 'rb') as f:
    Wl1, bl1, Wl2, bl2 = pickle.load(f)

In [None]:
with open('paramFon.pkl', 'rb') as f:
    Wf1, bf1, Wf2, bf2 = pickle.load(f)

In [16]:
path = 'C:\\Meu Drive\\dados\\'
lista = ['BaseFonemas.csv',
         'ConjuntoFonemas.csv',
         'RotulosFonemas.csv',
         'RotulosNumericosFonemas.csv']
F0 = pd.read_csv(path+lista[1], sep='\t', header=None)

In [17]:
with open('files.pkl', 'rb') as f:
    B, F = pickle.load(f)

In [18]:
with open('vects.pkl', 'rb') as f:
    vects, rotul = pickle.load(f)

# Linha de produção

In [19]:
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Fluente\\1 - Fluente.wav'
fs, s1 = wavfile.read(path)
print(fs, s1.shape[0]/fs)
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Silabou\\1-Silabou.wav'
fs, s2 = wavfile.read(path)
print(fs, s2.shape[0]/fs)
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Soletrou\\1 - Soletrou.wav'
fs, s3 = wavfile.read(path)
print(fs, s3.shape[0]/fs)

fa = 16000
s1 = signal.resample(s1, int(s1.shape[0]*fa/fs))
s2 = signal.resample(s2, int(s2.shape[0]*fa/fs))
s3 = signal.resample(s3, int(s3.shape[0]*fa/fs))

48000 55.14
48000 60.0
48000 59.7


In [26]:
path = 'C:\\Meu Drive\\dados\\audios rotulados\\'
pastas = os.listdir(path)
dados = []
rotul = []
fa = 16000
for pasta in pastas:
    files = os.listdir(path+pasta)
    for file in files:
        fs, s = wavfile.read(path+pasta+'\\'+file)
        dados.append(signal.resample(s, int(s.shape[0]*fa/fs)))
        rotul.append(file[:-4])

In [29]:
letras = []
for s in tqdm(dados):
    letras.append(taxaLetras(s, fa))

                                                 

In [31]:
fig = px.scatter(letras)
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(112),
        ticktext = rotul
    )
)
fig.show()

In [34]:
np.array(rotul)[np.array(letras)>0.4]

array(['fluente 02', 'fluente silabando 02', 'fluente silabando 03',
       'fluente silabando 05', 'fluente silabando 07',
       'fluente silabando 09', 'fluente silabando 10',
       'fluente silabando 11', 'fluente silabando 12',
       'fluente silabando 13', 'fluente silabando 15', 'silabou 03',
       'silabou 04', 'silabou 05', 'silabou 06', 'silabou 07',
       'silabou 09', 'silabou 10', 'silabou 12', 'silabou 13',
       'silabou 15', 'silabou 18', 'silabou 19', 'silabou 20',
       'fluente 01', 'fluente 15', 'fluente 16', 'silabou 01',
       'silabou 02', 'silabou 13', 'soletrou 05', 'soletrou 06',
       'soletrou 08', 'fluente 01', 'fluente 08', 'fluente 16',
       'silabou 01', 'soletrou 01', 'soletrou 04'], dtype='<U20')

In [38]:
y = rotul[40:80]
X = dados[40:80]

In [46]:
for s in X:
    s = filtrarSilencios(s, fa)
    fonemas = wav2ener2fon2(s, fa)
    _, T1 = lempelziv76(fonemas)
    T2 = taxaLetras(s, fa)
    if T1>0.65:
        if T2>0.52:
            yp = 'soletrou'
        else:
            yp = 'fluente'
    else:
        if T2>0.52:
            yp = 'soletrou'
        else:
            yp = 'silabou'
    print(yp)

soletrou
silabou
fluente
silabou
fluente
fluente
fluente
fluente
silabou
fluente
fluente
silabou
silabou
fluente
fluente
soletrou
fluente
fluente
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
silabou
fluente
silabou
silabou
silabou
soletrou
silabou
soletrou


In [48]:
yp = ['soletrou',
        'silabou',
        'fluente',
        'silabou',
        'fluente',
        'fluente',
        'fluente',
        'fluente',
        'silabou',
        'fluente',
        'fluente',
        'silabou',
        'silabou',
        'fluente',
        'fluente',
        'soletrou',
        'fluente',
        'fluente',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'silabou',
        'fluente',
        'silabou',
        'silabou',
        'silabou',
        'soletrou',
        'silabou',
        'soletrou']

In [53]:
y[0].find('fluente')

0

In [55]:
y[20].find('silabou')

0

In [56]:
conf = np.zeros((3,3))
for i, val in enumerate(y):
    if y[i].find('soletrou')==0:
        p = 0
    elif y[i].find('silabou')==0:
        p = 1
    elif y[i].find('fluente')==0:
        p = 2
    if yp[i].find('soletrou')==0:
        q = 0
    elif yp[i].find('silabou')==0:
        q = 1
    elif yp[i].find('fluente')==0:
        q = 2
    conf[p,q] += 1

In [61]:
print('        Sol  Sil  Flu')
conf

        Sol  Sil  Flu


array([[ 2.,  5.,  1.],
       [ 0., 13.,  0.],
       [ 2.,  6., 11.]])

In [20]:
E = perfEner(s3, fa)
pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
pz = np.nonzero(pz)[0]
nz = np.nonzero(nz)[0] + 1
if nz[0]<=pz[0]: nz = nz[1:]
if nz[-1]<=pz[-1]: pz = pz[:-1]
pfala = np.stack((pz,nz))
P2 = np.zeros((pfala.shape[1], 150))

In [21]:
for i in range(pfala.shape[1]):
    na = np.round(pfala[0,i]*fa*0.03).astype(int)
    N = np.round((pfala[1,i]-pfala[0,i])*fa*0.03).astype(int)
    janela = int(np.round(N/3))
    marcas = [0, int(np.round((N-janela)/2))-1, N-janela]
    Ordem = int(np.round(0.003*fa))
    CP = np.zeros((Ordem,3))
    S = np.zeros((janela-Ordem-1,Ordem+1))
    cont = 0
    for k in marcas:
        saux = s3[na+k:na+k+janela]
        for m in range(Ordem+1):
            S[:,m] = saux[m:m-Ordem-1]
        C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
        CP[:,cont] = C
        cont += 1
    P2[i,] = CP2vec(CP, fa).transpose()
x1 = torch.tensor(P2, dtype=torch.float32, device=device).detach()
yp = modelLet(x1)

In [115]:
Wl1 = modelLet.all_layers[1].state_dict()['weight'].cpu().detach().numpy()
bl1 = modelLet.all_layers[1].state_dict()['bias'].cpu().detach().numpy()
Wl2 = modelLet.all_layers[3].state_dict()['weight'].cpu().detach().numpy()
bl2 = modelLet.all_layers[3].state_dict()['bias'].cpu().detach().numpy()
with open('paramLet.pkl', 'wb') as f:
    pickle.dump([Wl1, bl1, Wl2, bl2], f)

In [118]:
with open('paramLet.pkl', 'rb') as f:
    Wl1, bl1, Wl2, bl2 = pickle.load(f)

In [117]:
Wf1 = modelFon.all_layers[1].state_dict()['weight'].cpu().detach().numpy()
bf1 = modelFon.all_layers[1].state_dict()['bias'].cpu().detach().numpy()
Wf2 = modelFon.all_layers[3].state_dict()['weight'].cpu().detach().numpy()
bf2 = modelFon.all_layers[3].state_dict()['bias'].cpu().detach().numpy()
with open('paramFon.pkl', 'wb') as f:
    pickle.dump([Wf1, bf1, Wf2, bf2], f)

In [119]:
with open('paramFon.pkl', 'rb') as f:
    Wf1, bf1, Wf2, bf2 = pickle.load(f)

# Tutor virtual

## Inicialização

In [190]:
import numpy as np
from scipy import signal
from scipy.io import wavfile
from scipy.signal import find_peaks, peak_widths
from scipy.special import softmax
import os
import pickle
import sys

with open('paramLet.pkl', 'rb') as f:
    Wl1, bl1, Wl2, bl2 = pickle.load(f)

with open('paramFon.pkl', 'rb') as f:
    Wf1, bf1, Wf2, bf2 = pickle.load(f)

F0 = np.genfromtxt('ConjuntoFonemas.csv', delimiter='\t', dtype=str)

def sigmoid(z):
    return 1/(1 + np.exp(-z))

def modelFon(x, W1, b1, W2, b2):
    N = x.shape[0]
    v1 = np.tanh(W1.dot(x.transpose()) + np.outer(b1, np.ones(N)))
    v2 = W2.dot(v1) + np.outer(b2, np.ones(N))
    return softmax(v2, axis=0).transpose()

def modelLet(x, W1, b1, W2, b2):
    N = x.shape[0]
    v1 = np.tanh(W1.dot(x.transpose()) + np.outer(b1, np.ones(N)))
    return sigmoid(W2.dot(v1) + np.outer(b2, np.ones(N))).flatten()

def LPC3janelas(s, fa):
    N = s.shape[0]
    if (N >= int(np.round(0.02*fa))) & (N <= int(np.round(0.2*fa))):
        Janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-Janela)/2))-1, N-Janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((Janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[k:k+Janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        return CP
    else:
        return []
    
def CP2vec(CP, fa):
    w = np.arange(5000/fa*np.pi, 50/fa*np.pi, -100/fa*np.pi)
    P = np.zeros((len(w), CP.shape[1]))
    for k in range(CP.shape[1]):
        h = np.hstack((1, -np.flipud(CP[:,k])))
        for i in range(len(w)):
            P[i,k] = abs(1/np.sum(np.exp(-1j*w[i]*np.arange(len(h)))*h))
    P = np.log10(P/P.max() + 0.01) + 2
    P = P.flatten(order='F')/np.sqrt(np.sum(P.flatten()**2))
    return P

def lempelziv76(s):
	K = len(np.unique(s))
	N = len(s)
	L = 1
	dic = [s[0]]
	p = 1
	L = L+1
	while p+L < N:
		pos = ''.join(s[:p+L-1]).find(''.join(s[p:p+L]))
		if pos == -1:
			dic.append(''.join(s[p:p+L]))
			p = p+L
			L = 1
		else:
			L = L+1
	dic.append(''.join(s[p:]))
	cLZ = len(dic)*(np.log2(len(dic))+1)/N
	return dic, cLZ

def perfEner(s, fa):
    N = s.shape[0]
    janela = np.round(0.1*fa).astype(int)
    passo = np.round(0.03*fa).astype(int)
    N2 = (N-janela)//passo
    E = np.zeros(N2)
    for i in range(N2):
        saux = s[i*passo:i*passo+janela]
        E[i] = (saux**2).sum()
    return E

def segmenta(E, pfala):
    segs = np.empty((2,0), dtype=int)
    qp = np.zeros(pfala.shape[1])
    for i in range(pfala.shape[1]):
        aux = E[pfala[0,i]:pfala[1,i]]
        peaks, _ = find_peaks(aux, distance=4)
        if peaks.size == 0: continue
        proe = signal.peak_prominences(aux/max(aux), peaks)[0]
        if np.any(proe/max(proe)<0.01): peaks = peaks[proe>0.01]
        _, _, ini, fim = peak_widths(aux, peaks, rel_height=0.7)
        ini = np.round(ini).astype(int)
        fim = np.round(fim).astype(int)
        qp[i] = len(peaks)
        segs = np.hstack((segs, np.stack((ini,fim)) + pfala[0,i]))
    return segs, qp

def filtrarSilencios(s, fa):
    E = perfEner(s, fa)
    passo = np.round(0.03*fa).astype(int)
    pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
    nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
    pz = np.nonzero(pz)[0]
    nz = np.nonzero(nz)[0] + 1
    if nz[0]<=pz[0]: nz = nz[1:]
    if nz[-1]<=pz[-1]: pz = pz[:-1]
    fmed = (nz-pz).mean()
    pausas = pz[1:] - nz[:-1]
    flags = np.ones(len(s), dtype=bool)
    for i in np.nonzero(pausas>fmed*2)[0]:
        ini = (nz[i]+2*int(fmed))*passo
        fin = int(pz[i+1])*passo
        flags[ini:fin] = False
    s = s[flags]
    return s

def wav2ener2fon2(s, fa):
    E = perfEner(s, fa)
    pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
    nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
    pz = np.nonzero(pz)[0]
    nz = np.nonzero(nz)[0] + 1
    if nz[0]<=pz[0]: nz = nz[1:]
    if nz[-1]<=pz[-1]: pz = pz[:-1]
    pfala = np.stack((pz,nz))
    segs, _ = segmenta(E, pfala)
    P2 = np.zeros((segs.shape[1], 150))
    for i in range(segs.shape[1]):
        na = np.round(segs[0,i]*fa*0.03).astype(int)
        N = np.round((segs[1,i]-segs[0,i])*fa*0.03).astype(int)
        janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-janela)/2))-1, N-janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[na+k:na+k+janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        P2[i,] = CP2vec(CP, fa).transpose()
    yp = modelFon(P2, Wf1, bf1, Wf2, bf2)
    inds = yp.argmax(axis=1)
    aux = F0[inds].flatten()
    fonemas = np.zeros(E.shape[0], dtype=np.str_)
    fonemas[:] = '0'
    for i in range(segs.shape[1]):
        fonemas[segs[0,i]:segs[1,i]] = aux[i]
    return fonemas

def taxaLetras(s, fa):
    E = perfEner(s, fa)
    pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
    nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
    pz = np.nonzero(pz)[0]
    nz = np.nonzero(nz)[0] + 1
    if nz[0]<=pz[0]: nz = nz[1:]
    if nz[-1]<=pz[-1]: pz = pz[:-1]
    pfala = np.stack((pz,nz))
    P2 = np.zeros((pfala.shape[1], 150))
    for i in range(pfala.shape[1]):
        na = np.round(pfala[0,i]*fa*0.03).astype(int)
        N = np.round((pfala[1,i]-pfala[0,i])*fa*0.03).astype(int)
        janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-janela)/2))-1, N-janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[na+k:na+k+janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        P2[i,] = CP2vec(CP, fa).transpose()
    yp = modelLet(P2, Wl1, bl1, Wl2, bl2)
    return (yp>0.5).sum()/len(yp)

## Testes

In [203]:
dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\indaiatuba 2ano texto'
# dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\interna 1ano palavras'
# dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\interna 1ano texto'
files = os.listdir(dataPath)
fa = 16000
colunas = ['sol', 'sil', 'flu']
df = pd.DataFrame(np.zeros((3,3)), columns=colunas, index=colunas)
for arq in files:
    if arq.find('.wav')>-1:
        print('Analisando o arquivo '+arq)
        y = colunas.index(arq[:3])
        fs, s = wavfile.read(dataPath+'\\'+arq)
        s = signal.resample(s, int(s.shape[0]*fa/fs))
        fonemas = wav2ener2fon2(s, fa)
        _, T1 = lempelziv76(fonemas)
        T1 = T1.round(2)
        T2 = taxaLetras(s, fa).round(2)
        if T1>0.65:
            if T2>0.52:
                yp = 'sol'
                yp = colunas.index(yp)
            else:
                yp = 'flu'
                yp = colunas.index(yp)
        else:
            if T2>0.52:
                yp = 'sol'
                yp = colunas.index(yp)
            else:
                yp = 'sil'
                yp = colunas.index(yp)
        df.iloc[y,yp] += 1

Analisando o arquivo fluente 01.wav
Analisando o arquivo fluente 02.wav
Analisando o arquivo fluente silabando 01.wav
Analisando o arquivo fluente silabando 02.wav
Analisando o arquivo fluente silabando 03.wav
Analisando o arquivo fluente silabando 04.wav
Analisando o arquivo fluente silabando 05.wav
Analisando o arquivo fluente silabando 06.wav
Analisando o arquivo fluente silabando 07.wav
Analisando o arquivo fluente silabando 08.wav
Analisando o arquivo fluente silabando 09.wav
Analisando o arquivo fluente silabando 10.wav
Analisando o arquivo fluente silabando 11.wav
Analisando o arquivo fluente silabando 12.wav
Analisando o arquivo fluente silabando 13.wav
Analisando o arquivo fluente silabando 14.wav
Analisando o arquivo fluente silabando 15.wav
Analisando o arquivo silabou 01.wav
Analisando o arquivo silabou 02.wav
Analisando o arquivo silabou 03.wav
Analisando o arquivo silabou 04.wav
Analisando o arquivo silabou 05.wav
Analisando o arquivo silabou 06.wav
Analisando o arquivo s

In [207]:
indaiatuba = df
indaiatuba

Unnamed: 0,sol,sil,flu
sol,0.0,0.0,0.0
sil,1.0,20.0,2.0
flu,0.0,0.0,17.0


In [208]:
# dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\indaiatuba 2ano texto'
dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\interna 1ano palavras'
# dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\interna 1ano texto'
files = os.listdir(dataPath)
fa = 16000
colunas = ['sol', 'sil', 'flu']
df = pd.DataFrame(np.zeros((3,3)), columns=colunas, index=colunas)
for arq in files:
    if arq.find('.wav')>-1:
        print('Analisando o arquivo '+arq)
        y = colunas.index(arq[:3])
        fs, s = wavfile.read(dataPath+'\\'+arq)
        s = signal.resample(s, int(s.shape[0]*fa/fs))
        fonemas = wav2ener2fon2(s, fa)
        _, T1 = lempelziv76(fonemas)
        T1 = T1.round(2)
        T2 = taxaLetras(s, fa).round(2)
        if T1>0.65:
            if T2>0.52:
                yp = 'sol'
                yp = colunas.index(yp)
            else:
                yp = 'flu'
                yp = colunas.index(yp)
        else:
            if T2>0.52:
                yp = 'sol'
                yp = colunas.index(yp)
            else:
                yp = 'sil'
                yp = colunas.index(yp)
        df.iloc[y,yp] += 1
df

Analisando o arquivo fluente 01.wav
Analisando o arquivo fluente 02.wav
Analisando o arquivo fluente 03.wav
Analisando o arquivo fluente 04.wav
Analisando o arquivo fluente 05.wav
Analisando o arquivo fluente 06.wav
Analisando o arquivo fluente 07.wav
Analisando o arquivo fluente 08.wav
Analisando o arquivo fluente 09.wav
Analisando o arquivo fluente 10.wav
Analisando o arquivo fluente 11.wav
Analisando o arquivo fluente 12.wav
Analisando o arquivo fluente 13.wav
Analisando o arquivo fluente 14.wav
Analisando o arquivo fluente 15.wav
Analisando o arquivo fluente 16.wav
Analisando o arquivo fluente 17.wav
Analisando o arquivo fluente 18.wav
Analisando o arquivo fluente 19.wav
Analisando o arquivo silabou 01.wav
Analisando o arquivo silabou 02.wav
Analisando o arquivo silabou 03.wav
Analisando o arquivo silabou 04.wav
Analisando o arquivo silabou 05.wav
Analisando o arquivo silabou 06.wav
Analisando o arquivo silabou 07.wav
Analisando o arquivo silabou 08.wav
Analisando o arquivo silabou

Unnamed: 0,sol,sil,flu
sol,7.0,1.0,0.0
sil,2.0,11.0,0.0
flu,1.0,7.0,11.0


In [209]:
# dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\indaiatuba 2ano texto'
# dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\interna 1ano palavras'
dataPath = 'C:\\Meu Drive\\dados\\audios rotulados\\interna 1ano texto'
files = os.listdir(dataPath)
fa = 16000
colunas = ['sol', 'sil', 'flu']
df = pd.DataFrame(np.zeros((3,3)), columns=colunas, index=colunas)
for arq in files:
    if arq.find('.wav')>-1:
        print('Analisando o arquivo '+arq)
        y = colunas.index(arq[:3])
        fs, s = wavfile.read(dataPath+'\\'+arq)
        s = signal.resample(s, int(s.shape[0]*fa/fs))
        fonemas = wav2ener2fon2(s, fa)
        _, T1 = lempelziv76(fonemas)
        T1 = T1.round(2)
        T2 = taxaLetras(s, fa).round(2)
        if T1>0.65:
            if T2>0.52:
                yp = 'sol'
                yp = colunas.index(yp)
            else:
                yp = 'flu'
                yp = colunas.index(yp)
        else:
            if T2>0.52:
                yp = 'sol'
                yp = colunas.index(yp)
            else:
                yp = 'sil'
                yp = colunas.index(yp)
        df.iloc[y,yp] += 1
df

Analisando o arquivo fluente 01.wav
Analisando o arquivo fluente 02.wav
Analisando o arquivo fluente 03.wav
Analisando o arquivo fluente 04.wav
Analisando o arquivo fluente 05.wav
Analisando o arquivo fluente 06.wav
Analisando o arquivo fluente 07.wav
Analisando o arquivo fluente 08.wav
Analisando o arquivo fluente 09.wav
Analisando o arquivo fluente 10.wav
Analisando o arquivo fluente 11.wav
Analisando o arquivo fluente 12.wav
Analisando o arquivo fluente 13.wav
Analisando o arquivo fluente 14.wav
Analisando o arquivo fluente 15.wav
Analisando o arquivo fluente 16.wav
Analisando o arquivo fluente 17.wav
Analisando o arquivo fluente 18.wav
Analisando o arquivo fluente 19.wav
Analisando o arquivo fluente 20.wav
Analisando o arquivo fluente 21.wav
Analisando o arquivo fluente 22.wav
Analisando o arquivo fluente 23.wav
Analisando o arquivo silabou 01.wav
Analisando o arquivo silabou 02.wav
Analisando o arquivo silabou 03.wav
Analisando o arquivo silabou 04.wav
Analisando o arquivo silabou

Unnamed: 0,sol,sil,flu
sol,3.0,1.0,0.0
sil,1.0,4.0,0.0
flu,0.0,0.0,23.0
