# Imports e definições

In [1]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from scipy import signal
from scipy.io import wavfile
import os
import pickle
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import random

In [2]:
def meuLPC(s,fa):
	Janela = int(round(0.03*fa))
	Avanco = int(round(0.01*fa))
	Ordem = int(round(0.001*fa))
	marcas = np.arange(0, s.shape[0]-Janela-1, Avanco)
	CP = np.zeros((Ordem, len(marcas)))
	Pot = np.zeros(len(marcas))	
	TCZ = np.zeros(len(marcas))
	S = np.zeros((Janela-Ordem-1, Ordem+1))
	cont = 0
	for k in marcas:
		saux = s[k:k+Janela]
		Pot[cont] = np.sum(saux**2)/Janela
		pz = np.logical_and(saux[1:] > 0, saux[:-1] < 0)
		TCZ[cont] = np.sum(pz)*fa/Janela
		for m in range(Ordem+1):
			S[:,m] = saux[m:-(Ordem+1-m)]
		C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
		CP[:,cont] = C
		cont = cont +1
	return CP, Pot, TCZ

In [3]:
def specLPC(CP, fa):
    w = np.arange(5000/fa*np.pi, 150/fa*np.pi, -50/fa*np.pi)
    P = np.zeros(len(w), CP.shape[1])
    for k in range(CP.shape[1]):
        h = np.hstack((1, -np.flipud(CP[:,k])))
        for i in range(len(w)):
            P[i,k] = abs(1/np.sum(np.exp(-1j*w[i]*np.arange(len(h)))*h))
        # P[:,k] = P[:,k]/np.sum(P[:,k])
    # P = P/np.max(P)
    for k in range(len(w)):
        P[k,:] = P[k,:]/np.max(P[k,:])
    return P

In [4]:
def espectrograma(x, fa):
	p = np.arange(np.log2(80), np.log2(fa/2), 4/24)
	f = (2*np.ones(len(p)))**p
	Janela = int(np.round(0.025*fa))
	blackman = np.blackman(Janela)
	B = np.zeros((len(f), Janela), dtype=complex)
	t = np.arange(Janela)/fa
	for k in range(len(f)):
		B[k,:] = np.exp(1j*2*np.pi*f[k]*t)*blackman
	avanco = int(np.round(Janela/4))
	marcas = np.arange(0, len(x)-Janela, avanco)
	M = len(marcas)
	S = np.zeros((len(f), M))
	cont = 0
	for n in marcas:
		S[:,cont] = abs(B.dot(x[n:n+Janela]))
		cont += 1
	for canal in range(len(f)):
		S[canal,:] = S[canal,:] - np.min(S[canal,:]) + 1e-6
		S[canal,:] = (1/np.max(S))*S[canal,:]
	return S

In [5]:
def geraFrases(S):
	n = S.shape[0]
	q = np.zeros(n, dtype=str)
	frase = np.empty(S.shape[1], dtype='<U'+str(n))
	for col in range(S.shape[1]):		
		pp = np.nonzero(S[:,col] > 0.5)[0]
		q[pp] = "a"
		pp = np.nonzero(np.logical_and(S[:,col] > 0.25, S[:,col] <= 0.5))[0]
		q[pp] = "b"
		pp = np.nonzero(np.logical_and(S[:,col] > 0.125, S[:,col] <= 0.25))[0]
		q[pp] = "c"
		pp = np.nonzero(S[:,col] <= 0.125)[0]
		q[pp] = "d"
		palavra = ""
		for k in range(len(q)):
			palavra = palavra+q[k]
		frase[col] = palavra
	return frase

In [6]:
def lempelziv76(s):
	K = len(np.unique(s))
	N = len(s)
	L = 1
	dic = [s[0]]
	p = 1
	L = L+1
	while p+L < N:
		pos = ''.join(s[:p+L-1]).find(''.join(s[p:p+L]))
		if pos == -1:
			dic.append(''.join(s[p:p+L]))
			p = p+L
			L = 1
		else:
			L = L+1
	dic.append(''.join(s[p:]))
	# Complexidade de Lempel-Ziv, em bits por símbolo:
	#cLZ = length(dic)/C
	cLZ = len(dic)*(np.log2(len(dic))+1)/N
	return dic, cLZ

# Carregando dados

In [7]:
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Fluente\\1 - Fluente.wav'
fs, s1 = wavfile.read(path)
print(fs, s1.shape[0]/fs)
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Silabou\\1-Silabou.wav'
fs, s2 = wavfile.read(path)
print(fs, s2.shape[0]/fs)
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Soletrou\\1 - Soletrou.wav'
fs, s3 = wavfile.read(path)
print(fs, s3.shape[0]/fs)

fa = 16000
s1 = signal.resample(s1, int(s1.shape[0]*fa/fs))
s2 = signal.resample(s2, int(s2.shape[0]*fa/fs))
s3 = signal.resample(s3, int(s3.shape[0]*fa/fs))

48000 55.14
48000 60.0
48000 59.7


In [159]:
P = espectrograma(s1, fa)
frase = geraFrases(P)
dic, c = lempelziv76(frase)
print(c)

4.75425255670369


In [295]:
P = espectrograma(s2, fa)
frase = geraFrases(P)
dic, c = lempelziv76(frase)
print(c)

3.161218904228805


In [296]:
P = espectrograma(s3, fa)
frase = geraFrases(P)
dic, c = lempelziv76(frase)
print(c)

2.4517375430872232


In [301]:
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Fluente'
files = os.listdir(path)
vals = []
for file in files:
    fs, s = wavfile.read(path+'\\'+file)
    fa = 16000
    s = signal.resample(s, int(s.shape[0]*fa/fs))
    P = espectrograma(s, fa)
    frase = geraFrases(P)
    dic, c = lempelziv76(frase)
    vals.append(c)
print(vals)

[4.75425255670369, 4.906803891982043, 3.006616321680851, 4.185558759933058, 3.370511138175557, 3.6664325621267695, 3.9543777017535544]


In [302]:
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Silabou'
files = os.listdir(path)
vals = []
for file in files:
    fs, s = wavfile.read(path+'\\'+file)
    fa = 16000
    s = signal.resample(s, int(s.shape[0]*fa/fs))
    P = espectrograma(s, fa)
    frase = geraFrases(P)
    dic, c = lempelziv76(frase)
    vals.append(c)
print(vals)

[3.161218904228805, 1.7231486464807513, 2.5470621435083296, 2.027551999772481, 3.1069552046587345, 1.5808831387832014, 1.769033966887016]


In [303]:
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\Audios\\Soletrou'
files = os.listdir(path)
vals = []
for file in files:
    fs, s = wavfile.read(path+'\\'+file)
    fa = 16000
    s = signal.resample(s, int(s.shape[0]*fa/fs))
    P = espectrograma(s, fa)
    frase = geraFrases(P)
    dic, c = lempelziv76(frase)
    vals.append(c)
print(vals)

[2.4517375430872232, 2.075541477965096, 2.2077200635478484, 2.5890963375992273, 2.342405615366654, 2.230418150758207, 2.6522767622695422]


# Lendos as labels da spoltech

In [9]:
def ler_fon(path):
    df = pd.read_csv(path, delimiter=' ')  
    ini = df.index[1:].values.astype(int)
    fim = df.iloc[1:,0].values.astype(int)
    fon = df.iloc[1:,1].values
    return ini, fim, fon

In [13]:
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\labels\\'
ini, fim, fon = ler_fon(path+'BR_00001'+'\\'+'BR_00001.balsen1.phn')

In [345]:
path = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\github\\projeto-leitura\\labels\\'
pastas = os.listdir(path)
dur = []
for pasta in pastas:
    files = os.listdir(path+pasta)
    for file in files:
        ini, fim, fon = ler_fon(path+pasta+'\\'+file)
        if ini.size>0:
            dur.append(fim-ini)

In [346]:
vals = np.array(dur[0])
for i in dur[1:]:
    vals = np.concatenate((vals, np.array(i)))

In [347]:
px.scatter(vals)

In [8]:
px.histogram(vals)

NameError: name 'vals' is not defined

In [349]:
fon

array(['.pau', 'dc', 'd', 'u', 'kc', 'k', 'i', 'tc', 't', 'e', 'r', 'a',
       'kc', 'k', 'E', 'l', 'a', 'v', 'E', 'L', 'o', 'pc', 'p', 'i', 'n',
       'i~', 'axw~', 'f', '>', 'x', 'm', 'a', 'dc', 'd', 'a', 's', 'o',
       'bc', 'b', 'r', 'i', 'tc', 't', 'u', 'dc', 'd', 'u', '.pau'],
      dtype=object)

In [None]:
vals = np.array(dur[0])
for i in dur[1:]:
    vals = np.concatenate((vals, np.array(i)))

# Novos passos 29jul

In [8]:
def LPC3janelas(s, fa):
    N = s.shape[0]
    if (N >= int(np.round(0.02*fa))) & (N <= int(np.round(0.2*fa))):
        Janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-Janela)/2))-1, N-Janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((Janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[k:k+Janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        return CP
    else:
        return []

In [9]:
def CP2vec(CP, fa):
    w = np.arange(5000/fa*np.pi, 50/fa*np.pi, -100/fa*np.pi)
    P = np.zeros((len(w), CP.shape[1]))
    for k in range(CP.shape[1]):
        h = np.hstack((1, -np.flipud(CP[:,k])))
        for i in range(len(w)):
            P[i,k] = abs(1/np.sum(np.exp(-1j*w[i]*np.arange(len(h)))*h))
    P = np.log10(P/P.max() + 0.01) + 2
    P = P.flatten(order='F')/np.sqrt(np.sum(P.flatten()**2))
    return P

In [12]:
def ler_wavs():
    path = 'C:\\Meu Drive\\dados\\cslu_spolltech_port_LDC2006S16\\'
    pastas = os.listdir(path+'labels\\')
    B = []
    F = []
    for pasta in pastas:
        files = os.listdir(path+'labels\\'+pasta)
        for file in files:
            ini, fim, fon = ler_fon(path+'labels\\'+pasta+'\\'+file)
            if ini.size>0:
                print(file[:-4])
                try:
                    fs, s = wavfile.read(path+'speech\\'+pasta+'\\'+file[:-3]+'wav')
                    fa = 16000
                    s = signal.resample(s, int(s.shape[0]*fa/fs))
                    for i in range(len(fon)):
                        p1 = (ini[i]*fa/1000).round().astype(int)
                        p2 = (fim[i]*fa/1000).round().astype(int)
                        p2 = min(p2, len(s))
                        saux = s[p1:p2]
                        CP = LPC3janelas(saux, fa)
                        if len(CP) > 0:
                            B.append(CP2vec(CP, fa))
                            F.append(fon[i])
                except:
                    print('Arquivo inexistente.')
    return B, F

In [72]:
B, F = ler_wavs()

BR_00001.age
BR_00001.balsen1
BR_00001.balsen2
BR_00001.balsen3
BR_00001.balsen4
BR_00001.balsen5
BR_00001.balsen6
BR_00001.balsen7
BR_00001.birtdate
BR_00001.birtplac
BR_00001.date
BR_00001.didulike
BR_00001.doustudy
BR_00001.douwork
BR_00001.food
BR_00001.time
BR_00001.travel
BR_00001.zipcode
BR_00002.age
BR_00002.balsen1
BR_00002.balsen2
BR_00002.balsen3
BR_00002.balsen4
BR_00002.balsen5
BR_00002.balsen6
BR_00002.balsen7
BR_00002.birtdate
BR_00002.birtplac
BR_00002.date
BR_00002.didulike
BR_00002.doustudy
BR_00002.douwork
BR_00002.food
BR_00002.question065
BR_00002.time
BR_00002.travel
BR_00002.zipcode
BR_00003.age
BR_00003.balsen1
BR_00003.balsen2
BR_00003.balsen3
BR_00003.balsen4
BR_00003.balsen5
BR_00003.balsen6
BR_00003.balsen7
BR_00003.birtdate
BR_00003.birtplac
BR_00003.date
BR_00003.didulike
BR_00003.doustudy
BR_00003.douwork
BR_00003.food
BR_00003.time
BR_00003.travel
BR_00003.zipcode
BR_00004.age
BR_00004.balsen1
BR_00004.balsen2
BR_00004.balsen3
BR_00004.balsen4
BR_00004.b

In [480]:
with open('files.pkl', 'wb') as f:
    pickle.dump([B, F], f)

In [17]:
with open('files.pkl', 'rb') as f:
    B, F = pickle.load(f)

## Dados de Jugurta

In [10]:
path = 'C:\\Meu Drive\\dados\\'
lista = ['BaseFonemas.csv',
         'ConjuntoFonemas.csv',
         'RotulosFonemas.csv',
         'RotulosNumericosFonemas.csv']
B = pd.read_csv(path+lista[0], sep='\t', header=None)
F0 = pd.read_csv(path+lista[1], sep='\t', header=None)
F = pd.read_csv(path+lista[2], sep='\t', header=None)
F2 = pd.read_csv(path+lista[3], sep='\t', header=None)

In [19]:
px.histogram(F)

# Vamos construir a rede neural

## init

In [12]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
# import torchvision
# from torchvision import datasets
# from torchvision.transforms import ToTensor
from torcheval.metrics.functional import multiclass_f1_score
from torcheval.metrics.functional import binary_accuracy
from torchinfo import summary

In [13]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [142]:
class mlpFon(nn.Module):
    def __init__(self, raw_dim, dim1, dim2):
        super().__init__()
        self.all_layers = torch.nn.Sequential(
            # Encoder
            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim1),
            nn.Tanh(),
            nn.Linear(dim1, dim2),
            # nn.Softmax(dim=1)
        )

    def forward(self, x):
        logits = self.all_layers(x)
        return logits

In [9]:
y = np.eye(F2.values.astype(int).max())[F2.values.astype(int)-1].reshape(106741,174)
X_train, X_test, y_train, y_test = train_test_split(B.values.transpose(), y, test_size=0.3, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32, device=device).detach()
X_test = torch.tensor(X_test, dtype=torch.float32, device=device).detach()
y_train = torch.tensor(y_train, dtype=torch.float32, device=device).detach()
y_test = torch.tensor(y_test, dtype=torch.float32, device=device).detach()
N = X_train.shape[0]

In [10]:
raw_dim = 150
dim1 = 150
dim2 = 174
model = mlpFon(raw_dim, dim1, dim2).to(device)
batch_size = 512
summary(model, input_size=(batch_size, 150))

NameError: name 'mlpFon' is not defined

In [146]:
loss_fn = nn.CrossEntropyLoss()
# loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 50
batch_size = 4096
batch_start = torch.arange(0, N, batch_size)
trainLoss = []
valLoss = []

for epoch in range(n_epochs):
    model.train()
    inds = np.random.permutation(N)
    with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[inds,:][start:start+batch_size,:]
            y_batch = y_train[inds,:][start:start+batch_size,:]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(loss=float(loss))
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    y_pred = nn.functional.softmax(y_pred, dim=1)
    inds = y_pred.argmax(dim=1)
    target = y_test.argmax(dim=1)
    valLoss.append(float(multiclass_f1_score(inds, target, num_classes=174)))
    # valLoss.append(float(loss_fn(y_pred, y_test)))
    y_pred = model(X_train)
    trainLoss.append(float(loss_fn(y_pred, y_train)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
fig.show()

Epoch 0: 100%|██████████| 19/19 [00:00<00:00, 24.89batch/s, loss=2.59]
Epoch 1: 100%|██████████| 19/19 [00:00<00:00, 26.44batch/s, loss=2.64]
Epoch 2: 100%|██████████| 19/19 [00:00<00:00, 25.86batch/s, loss=2.53]
Epoch 3: 100%|██████████| 19/19 [00:00<00:00, 29.71batch/s, loss=2.49]
Epoch 4: 100%|██████████| 19/19 [00:00<00:00, 28.28batch/s, loss=2.45]
Epoch 5: 100%|██████████| 19/19 [00:00<00:00, 24.17batch/s, loss=2.45]
Epoch 6: 100%|██████████| 19/19 [00:00<00:00, 30.14batch/s, loss=2.48]
Epoch 7: 100%|██████████| 19/19 [00:00<00:00, 29.13batch/s, loss=2.41]
Epoch 8: 100%|██████████| 19/19 [00:00<00:00, 31.49batch/s, loss=2.28]
Epoch 9: 100%|██████████| 19/19 [00:00<00:00, 28.63batch/s, loss=2.37]
Epoch 10: 100%|██████████| 19/19 [00:00<00:00, 31.07batch/s, loss=2.37]
Epoch 11: 100%|██████████| 19/19 [00:00<00:00, 28.56batch/s, loss=2.37]
Epoch 12: 100%|██████████| 19/19 [00:00<00:00, 31.43batch/s, loss=2.25]
Epoch 13: 100%|██████████| 19/19 [00:00<00:00, 27.94batch/s, loss=2.21]
Ep

In [147]:
px.line(valLoss)

In [133]:
px.line(valLoss)

In [134]:
px.line(trainLoss)

In [20]:
def ler_wav():
    path = 'C:\\Meu Drive\\dados\\cslu_spolltech_port_LDC2006S16\\'
    pastas = os.listdir(path+'labels\\')
    pasta = random.choice(pastas)
    files = os.listdir(path+'labels\\'+pasta)
    file = random.choice(files)
    ini, fim, fon = ler_fon(path+'labels\\'+pasta+'\\'+file)
    B = []
    F = []
    if ini.size>0:
        print(file[:-4])
        fs, s = wavfile.read(path+'speech\\'+pasta+'\\'+file[:-3]+'wav')
        fa = 16000
        s = signal.resample(s, int(s.shape[0]*fa/fs))
        for i in range(len(fon)):
            p1 = (ini[i]*fa/1000).round().astype(int)
            p2 = (fim[i]*fa/1000).round().astype(int)
            p2 = min(p2, len(s))
            saux = s[p1:p2]
            CP = LPC3janelas(saux, fa)
            if len(CP) > 0:
                B.append(CP2vec(CP, fa))
                F.append(fon[i])
    return np.array(B), np.array(F), s

In [21]:
x1, y1, s1 = ler_wav()
x1 = torch.tensor(x1, dtype=torch.float32, device=device).detach()
yp = model(x1)
inds = yp.argmax(dim=1)
target = np.zeros_like(y1)
for i in range(len(y1)):
    target[i] = np.where(F0 == y1[i])[0][0]
print(target)
print(y1)
print(inds)

BR_00289.food
['22' '52' '29' '31' '13' '9' '47']
['f' 'ej' 'Z' 'o' 'a' 'dc' 'ax~']
tensor([ 3, 52, 29, 16, 13,  9,  3])


In [27]:
F0.iloc[inds.cpu().detach().numpy().astype(int)].values.flatten()

array(['s', 'ej', 'Z', 'u', 'a', 'dc', 's'], dtype=object)

In [22]:
multiclass_f1_score(inds, torch.tensor(target.astype(int)), num_classes=174)

tensor(0.5714)

In [29]:
N1 = len(s3)
janela = np.round(0.1*fa).astype(int)
J = np.floor(N1/janela).astype(int)
P = np.zeros((J-1, 150))
for i in range(J-1):
    saux = s3[i*janela:i*janela+janela]
    CP = LPC3janelas(saux, fa)
    P[i,:] = CP2vec(CP, fa).transpose()

In [30]:
P = torch.tensor(P, dtype=torch.float32, device=device).detach()
yp = model(P)
inds = yp.argmax(dim=1)

In [31]:
fonemas = F0.values[inds.cpu().detach().numpy()].flatten()
''.join(fonemas)

'tSiirakumsi.paubkrtcktSSSiejejeeEkaadajeEi~urdcdctckkctcdcdcdca.pau.pau.pautctc.paukctciwtSttdc.paukcdcojei~uo~ewuuueeeewurEeo~imdctcewn~mmuauii~uiunz>wordummtcaaammmaaatSEEEEEmeeei~erZeeewuumreeiuuatfkcttcaam.pau.pau.pau.pau.pau.pau.pau.paueeEewuroooouuuumejieewrimtctkcukmazakckcddcsudcojeeeuiinimn.paukcddcuuumitcmatcamkcmtcStSim.pauojSi~>mavadauktcaatctc.pau.paupoo>louojSi~mn~in~muaauuewooukuuuarm.pau.pau.paummrakci~muaavSi~iieeeErmmrrtc.pau.pautckmiiiuuuZEeeEi~uiuudcui.pauskckckcauuien~ejeiiuuumtcdcdcatm.pauudcdcmukkkckc.paukcktcaratc.pau.pautctctc.pautcdctc.pau.pautcautStSiiitcaatcrSZiEejEEEramaatc.pau.pau.pau.pautc.pautc.paurEEEEEEadcmtc.paukckc.paudcdca.pauaiittcar.paumEEEEEi~rt.pautcrk.pau.pauo.paukamkcmuauruurSiiiiouurmdcgouuidcumdcrmm.paui.paum.pautmmk.pautctc.paummkcdueuumuojiiEei~ermmatmm.paummoo>>>oErmamtcitcmm.pauEiEEoruuueieEEEojtStSiiejeiu.pau.pautcfdcdctcautcummrutc'

In [28]:
def histo(fones):
    hist = np.zeros(F0.values.shape[0])
    for i in range(F0.values.shape[0]):
        hist[i] = np.sum(fonemas==F0.values[i])
    return hist

In [59]:
F0.values[np.argsort(hist)[-20:]]

array([['oj'],
       ['d'],
       ['ew'],
       ['S'],
       ['tS'],
       ['t'],
       ['i~'],
       ['k'],
       ['o'],
       ['kc'],
       ['dc'],
       ['r'],
       ['E'],
       ['e'],
       ['tc'],
       ['i'],
       ['a'],
       ['.pau'],
       ['m'],
       ['u']], dtype=object)

In [55]:
hist = histo(fonemas)
px.line(x=F0.values.reshape(174), y=hist)

## duas redes

In [17]:
class mlpFon(nn.Module):
    def __init__(self, raw_dim, dim1, dim2):
        super().__init__()
        self.all_layers = torch.nn.Sequential(
            # Encoder
            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim1),
            nn.Tanh(),
            nn.Linear(dim1, dim2),
            # nn.Softmax(dim=1)
        )

    def forward(self, x):
        logits = self.all_layers(x)
        return logits

In [16]:
y = np.eye(F2.values.astype(int).max())[F2.values.astype(int)-1].reshape(106741,174)
X_train, X_test, y_train, y_test = train_test_split(B.values.transpose(), y, test_size=0.3, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32, device=device).detach()
X_test = torch.tensor(X_test, dtype=torch.float32, device=device).detach()
y_train = torch.tensor(y_train, dtype=torch.float32, device=device).detach()
y_test = torch.tensor(y_test, dtype=torch.float32, device=device).detach()
N = X_train.shape[0]
raw_dim = 150
dim1 = 150
dim2 = 174
model = mlpFon(raw_dim, dim1, dim2).to(device)
batch_size = 512
summary(model, input_size=(batch_size, 150))

Layer (type:depth-idx)                   Output Shape              Param #
mlpFon                                   [512, 174]                --
├─Sequential: 1-1                        [512, 174]                --
│    └─Flatten: 2-1                      [512, 150]                --
│    └─Linear: 2-2                       [512, 150]                22,650
│    └─Tanh: 2-3                         [512, 150]                --
│    └─Linear: 2-4                       [512, 174]                26,274
│    └─Softmax: 2-5                      [512, 174]                --
Total params: 48,924
Trainable params: 48,924
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 25.05
Input size (MB): 0.31
Forward/backward pass size (MB): 1.33
Params size (MB): 0.20
Estimated Total Size (MB): 1.83

In [24]:
loss_fn = nn.CrossEntropyLoss()
# loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 50
batch_size = 4096
batch_start = torch.arange(0, N, batch_size)
trainLoss = []
valLoss = []

for epoch in range(n_epochs):
    model.train()
    inds = np.random.permutation(N)
    with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[inds,:][start:start+batch_size,:]
            y_batch = y_train[inds,:][start:start+batch_size,:]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(loss=float(loss))
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    y_pred = nn.functional.softmax(y_pred, dim=1)
    inds = y_pred.argmax(dim=1)
    target = y_test.argmax(dim=1)
    valLoss.append(float(multiclass_f1_score(inds, target, num_classes=174)))
    # valLoss.append(float(loss_fn(y_pred, y_test)))
    y_pred = model(X_train)
    trainLoss.append(float(loss_fn(y_pred, y_train)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
fig.show()

  0%|          | 0/19 [00:00<?, ?batch/s]

TypeError: 'tqdm' object does not support the context manager protocol

In [14]:
class mlpFon(nn.Module):
    def __init__(self, raw_dim, dim1, dim2):
        super().__init__()
        self.all_layers = torch.nn.Sequential(
            # Encoder
            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim1),
            nn.Tanh(),
            nn.Linear(dim1, dim2),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        logits = self.all_layers(x)
        return logits

In [20]:
model = mlpFon(raw_dim, dim1, dim2).to(device)
summary(model, input_size=(batch_size, 150))

Layer (type:depth-idx)                   Output Shape              Param #
mlpFon                                   [512, 174]                --
├─Sequential: 1-1                        [512, 174]                --
│    └─Flatten: 2-1                      [512, 150]                --
│    └─Linear: 2-2                       [512, 150]                22,650
│    └─Tanh: 2-3                         [512, 150]                --
│    └─Linear: 2-4                       [512, 174]                26,274
│    └─Softmax: 2-5                      [512, 174]                --
Total params: 48,924
Trainable params: 48,924
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 25.05
Input size (MB): 0.31
Forward/backward pass size (MB): 1.33
Params size (MB): 0.20
Estimated Total Size (MB): 1.83

In [21]:
# loss_fn = nn.CrossEntropyLoss()
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 50
batch_size = 4096
batch_start = torch.arange(0, N, batch_size)
trainLoss = []
valLoss = []

for epoch in range(n_epochs):
    model.train()
    inds = np.random.permutation(N)
    with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[inds,:][start:start+batch_size,:]
            y_batch = y_train[inds,:][start:start+batch_size,:]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(loss=float(loss))
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    # y_pred = nn.functional.softmax(y_pred, dim=1)
    inds = y_pred.argmax(dim=1)
    target = y_test.argmax(dim=1)
    valLoss.append(float(multiclass_f1_score(inds, target, num_classes=174)))
    # valLoss.append(float(loss_fn(y_pred, y_test)))
    y_pred = model(X_train)
    trainLoss.append(float(loss_fn(y_pred, y_train)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
fig.show()

  0%|          | 0/19 [00:00<?, ?batch/s]

TypeError: 'tqdm' object does not support the context manager protocol

In [22]:
# loss_fn = nn.CrossEntropyLoss()
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 50
batch_size = 4096
batch_start = torch.arange(0, N, batch_size)
trainLoss = []
valLoss = []

for epoch in tqdm(range(n_epochs)):
    model.train()
    inds = np.random.permutation(N)
    for start in batch_start:
        # take a batch
        X_batch = X_train[inds,:][start:start+batch_size,:]
        y_batch = y_train[inds,:][start:start+batch_size,:]
        # forward pass
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    # y_pred = nn.functional.softmax(y_pred, dim=1)
    inds = y_pred.argmax(dim=1)
    target = y_test.argmax(dim=1)
    valLoss.append(float(multiclass_f1_score(inds, target, num_classes=174)))
    # valLoss.append(float(loss_fn(y_pred, y_test)))
    y_pred = model(X_train)
    trainLoss.append(float(loss_fn(y_pred, y_train)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
fig.show()

                                               

In [23]:
torch.save(model.state_dict(), 'modeloMSE.pht')

In [17]:
model = mlpFon(raw_dim, dim1, dim2).to(device)
model.load_state_dict(torch.load('modeloMSE.pht'))
model.eval()

mlpFon(
  (all_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=150, out_features=150, bias=True)
    (2): Tanh()
    (3): Linear(in_features=150, out_features=174, bias=True)
    (4): Softmax(dim=1)
  )
)

# Testes com a base de dados silabando

In [67]:
path = 'C:\\Meu Drive\\dados\\audios rotulados\\'
pastas = os.listdir(path)
dados = []
rotul = []
fa = 16000
for pasta in pastas:
    files = os.listdir(path+pasta)
    for file in files:
        fs, s = wavfile.read(path+pasta+'\\'+file)
        dados.append(signal.resample(s, int(s.shape[0]*fa/fs)))
        rotul.append(file[:-7])

In [40]:
def wav2vec(s, fa):
    N = s.shape[0]
    janela = np.round(0.1*fa).astype(int)
    passo = np.round(0.03*fa).astype(int)
    N2 = (N-janela)//passo
    P = np.zeros((N2, 150))
    for i in range(N2):
        saux = s[i*passo:i*passo+janela]
        CP = LPC3janelas(saux, fa)
        P[i,:] = CP2vec(CP, fa).transpose()
    return P

In [77]:
vects = []
for s in tqdm(dados):
    vects.append(wav2vec(s, fa))

                                                 

In [116]:
with open('vects.pkl', 'wb') as f:
    pickle.dump([vects, rotul], f)

In [93]:
with open('vects.pkl', 'rb') as f:
    vects, rotul = pickle.load(f)

In [29]:
hists = []
for vec in vects:
    x1 = torch.tensor(vec, dtype=torch.float32, device=device).detach()
    yp = model(x1)
    inds = yp.argmax(dim=1).cpu().detach().numpy()
    fonemas = F0.values[inds].flatten()
    hists.append(histo(fonemas))  

In [30]:
px.line(np.array(hists).transpose())

In [31]:
fig = go.Figure()
cores = ['blue', 'brown', 'green', 'gold']
labels = np.unique(rotul)
for i in range(len(hists)):
    vals = np.argsort(hists[i][:60])[-5:]
    fig.add_trace(go.Scatter(y=vals, mode="markers", name=rotul[i], line=dict(color=cores[np.nonzero(labels == rotul[i])[0][0]])))
fig.show()

In [81]:
fonemas = F0.values[inds.cpu().detach().numpy()].flatten()
hist = histo(fonemas)
F0.values[np.argsort(hist)[-20:]]

array([['aw'],
       ['tc'],
       ['tS'],
       ['kc'],
       ['ej'],
       ['ew'],
       ['t'],
       ['r'],
       ['oj'],
       ['o~'],
       ['>w'],
       ['o'],
       ['i'],
       ['E'],
       ['>'],
       ['m'],
       ['k'],
       ['u'],
       ['a'],
       ['.pau']], dtype=object)

In [76]:
''.join(F0.iloc[inds.cpu().detach().numpy().astype(int)].values.flatten())

'>aakkk>>>aaaajaEE>kakguuuuuooo>rooooo~>a>a>>auuuukkl>axw~ojmewgurLiiejojiiimi~mtkaaaaauaa>wuuuuuooozooouuaaka.pau.pau.pau.paukca.paum>>>>a>wuuuostkkktckumtb>wuuojejriiummuiiiiiiitSitStStStStSi~E.pauttmtcakkuzdaaaaaaaaakckckc.pau.pautStSe~Ei~i~mmaaakuuo>>a>akaar.pau.paupEEEEEEEEEEEEaaaaarrrEEErrejZeEEEEojatttdb>>kuaa>>waxw~axw~ojmd>ako~kkiewewouojmdduuojojuuouuojukguimm>EEkkki~mmtStStSiSi~i~iwb>wo~oouuojuejuejzuaauojeeewoomuoeeuejiejeeeuukckc.pau.pau.pauraaa.pau.pau.pau.pauojm>kak.paumaaakm>aajkktkk>kkaaka.pau.pau.pautckcm>w>>>waxw~axw~ojojiiejiuiiewoa>>l>w>wojooj.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.paukmmuvtStc.pauffiwmkttduaxw~vojejejitStStSSSEEEmewl>wojojejojojuaEE>wo~o~>w>w>w>>o~EE>aaaaxw~ajojabboojuuuuuuuukuejii~eguuuuiuuLkc.pau.pau.paudcdc.paudcdcdcdcdcs.pau.pautctcf.pautctc.pau.paupb>wawkojkudduumu>akkawaxw~ojartcstbb>w>w>l>wouuaxw~ojajojrEpEew>ooewuuuuuiiiiiiiuojojiiii~nummmdl>w>w>woojunpctSStStStStSSStStSttc.paukdmmuejejejiiiimi~>mmkkawkruumooao~rrrmst.pau.pau.

In [141]:
for i in range(len(hists)):
    hists[i] = hists[i]/hists[i].sum()

In [152]:
fig = px.line(y=-np.sum(np.array(hists) * np.log2(np.array(hists)+np.finfo(float).eps), axis=1))
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(112),
        ticktext = rotul
    )
)
fig.show()

In [168]:
np.array(rotul)[np.array(rotul)=='silabou']

array(['silabou', 'silabou', 'silabou', 'silabou', 'silabou', 'silabou',
       'silabou', 'silabou', 'silabou', 'silabou', 'silabou', 'silabou',
       'silabou', 'silabou', 'silabou', 'silabou', 'silabou', 'silabou',
       'silabou', 'silabou', 'silabou', 'silabou', 'silabou', 'silabou',
       'silabou', 'silabou', 'silabou', 'silabou', 'silabou', 'silabou',
       'silabou', 'silabou', 'silabou', 'silabou', 'silabou', 'silabou',
       'silabou', 'silabou', 'silabou', 'silabou', 'silabou'],
      dtype='<U17')

In [170]:
rotul = np.array(rotul)
hists = np.array(hists)

In [199]:
X1 = hists[rotul=='silabou'][:4,:60]
y1 = np.ones(4)
X2 = hists[rotul=='soletrou'][:4,:60]
y2 = -np.ones(4)
X = np.vstack((X1, X2))
y = np.hstack((y1,y2))

In [200]:
w = np.linalg.pinv(X).dot(y)

In [201]:
fig = px.scatter(hists[:,:60].dot(w))
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(112),
        ticktext = rotul
    )
)
fig.show()

In [39]:
''.join(np.array(F0)[inds].flatten())

'kckcstckcstakckckctc.pau.pausSestStSiSieiiiieeeeeeiwiwlllaaaarrrrrrreEEaureeuieewooooooooouuuooEEEeeejeEEereurrsss.pautcrSs.pau.paudZtiiiiiiiidcmmmdcdc.pautc.pau.pautc.paukc.pau.pautcftckcttca.pau.pau.pau.pau.pautctckc.pau.paurdczdcdcdcaaakcdZtSss.pau.pautctcsdZiiiiidZiiSSitcsrsdcs.pautcarsdtaaaadduzrtcaaa.pau.pausst.pau.pau.pau.paus.paurtctcartStSt.pautcrdamawawoooooo~rEEaatctcrss.paustc.pau.pau.pau.pau.pau.pau.pautckeeeeeEe~eejiiitdmmmtctcmadcaakctc.pautc.pau.pauajtSEeeeeeejejejZeSdZdZdcdcfpdctcrftddctcaaakcmoEo~o~ooooooooaaaaaaa.pau.pau.pau.pautc.pau.pau.pau.pau.pau.pau.pau.paudZiiiiiiizinumumtcdctcaurtcssdmddctctc.pautc.pau.pau.pauZewooooooouuzdcrrrrSeejSSSSSSSe.paus.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pautc.pau.pau.pau.pautctSdZejiiiiiiiiinnntcaojStt.pautc.pausttctctcdcrstcdaaaaaadcdcdcdcdcdcdckckctctc.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pau.pautcrdtadcrsdZi~iiitcsaaatduuuuuuuiiuzinudczndcdcdctctctcadckckcdZstctc.pautSEeeeejejetcr.pautSdZtiiziinntcaaaa

In [94]:
lempC = []
for vec in tqdm(vects):
    x1 = torch.tensor(vec, dtype=torch.float32, device=device).detach()
    yp = model(x1)
    inds = yp.argmax(dim=1).cpu().detach().numpy()
    dic, c = lempelziv76(np.array(F0)[inds].flatten())
    lempC.append(c)

                                                 

In [44]:
fig = px.scatter(lempC)
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(112),
        ticktext = rotul
    )
)
fig.show()

# Perfil de energia

In [18]:
def perfEner(s, fa):
    N = s.shape[0]
    janela = np.round(0.1*fa).astype(int)
    passo = np.round(0.03*fa).astype(int)
    N2 = (N-janela)//passo
    E = np.zeros(N2)
    for i in range(N2):
        saux = s[i*passo:i*passo+janela]
        E[i] = (saux**2).sum()
    return E

In [23]:
E = perfEner(s1, fa)
pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
pz = np.logical_or(pz, np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100))
pontos = np.nonzero(pz)[0]
fig = px.line(E)
fig.add_trace(go.Scatter(x=pontos, y=E[pontos], mode="markers"))
fig.show()

In [77]:
pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
print(np.diff(np.nonzero(pz)[0]).mean())
px.histogram(np.diff(np.nonzero(pz)[0]), nbins=10)

33.129629629629626


In [24]:
E = perfEner(s2, fa)
pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
pz = np.logical_or(pz, np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100))
pontos = np.nonzero(pz)[0]
fig = px.line(E)
fig.add_trace(go.Scatter(x=pontos, y=E[pontos], mode="markers"))
fig.show()

In [25]:
pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
print(np.diff(np.nonzero(pz)[0]).mean())
px.histogram(np.diff(np.nonzero(pz)[0]), nbins=10)

37.90384615384615


In [28]:
E = perfEner(s3, fa)
pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
zc = np.logical_or(pz, nz)
pz = np.nonzero(pz)[0]
nz = np.nonzero(nz)[0]
fig = px.line(E)
fig.add_trace(go.Scatter(x=pz, y=E[pz], mode="markers"))
fig.add_trace(go.Scatter(x=nz, y=E[nz], mode="markers"))
fig.show()

In [81]:
pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
print(np.diff(np.nonzero(pz)[0]).mean())
px.histogram(np.diff(np.nonzero(pz)[0]), nbins=10)

36.283018867924525


In [43]:
if nz[0]<pz[0]: nz = nz[1:]
if nz[-1]<pz[-1]: pz = pz[:-1]
pfala = np.stack((pz,nz))

In [41]:
P = wav2vec(s3, fa)

In [61]:
P2 = np.zeros((pfala.shape[1], 150))
for i in range(pfala.shape[1]):
    na = np.round(pfala[0,i]*fa*0.03).astype(int)
    N = np.round((pfala[1,i]-pfala[0,i])*fa*0.03).astype(int)
    janela = int(np.round(N/3))
    marcas = [0, int(np.round((N-janela)/2))-1, N-janela]
    Ordem = int(np.round(0.003*fa))
    CP = np.zeros((Ordem,3))
    S = np.zeros((janela-Ordem-1,Ordem+1))
    cont = 0
    for k in marcas:
        saux = s3[na+k:na+k+janela]
        for m in range(Ordem+1):
            S[:,m] = saux[m:m-Ordem-1]
        C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
        CP[:,cont] = C
        cont += 1
    P2[i,] = CP2vec(CP, fa).transpose()

In [65]:
x1 = torch.tensor(P2, dtype=torch.float32, device=device).detach()
yp = model(x1)
inds = yp.argmax(dim=1).cpu().detach().numpy()
fonemas2 = F0.values[inds].flatten()
''.join(fonemas2)

'eeki~kctcewieumeoEei~eee~oivtcai~kcSewoi~vtceie~kci~kreE.pauejEkci~ewie>Eei~k'

In [63]:
x1 = torch.tensor(P, dtype=torch.float32, device=device).detach()
yp = model(x1)
inds = yp.argmax(dim=1).cpu().detach().numpy()
fonemas = F0.values[inds].flatten()
''.join(fonemas)

'tSiejejiejZiiirZpewauakkkummmtctcstctcmitcmtctctcumuojkttctckckckctcpckkukcdZtSSStctStSStStSieeeeeeejeeeEeeeojEEewkkaaaaaaaaddaaaajreeeeEeei~i~ewuuuuriiidcdcdcdcdcntckcaakckckckckckcttckckctaakckckcdctdckc.pau.paukc.pau.pau.pau.pau.pau.paus.pautctctc.pau.pautctctc.pau.pau.pau.paukcmmtckcdZmtctStStStSffstc.pau.pau.pautcdcdc.pau.paukcskckctctcdctc.pauaZeeeiimewnuuo~ouojEewewuvuunumuuzieeei~eeeeieeewuouuureEErEei~ewewo~ueueii~uumdcdckcuviewuavn~uimnmmmmmakkaamdcuveiejei~i~ewoouLiiunumnutckczmoo~o~xooooruuzauuuakmudcmmmtctctcaaaaauaaaaukcmmmmmmmmaaaaaaatctcrojtSe~EEEEEEEEEEEEEEEEaamuvieeeeeeeeei~eweirurrojojZi~eeeeeeeeeewrriuuuzmmmtcojreeeeeeeeii~iuuurakruajrzdcdctcsf.paukckckcaaaskctctcaaaaadckcmtc.pautctctc.pautckc.pautc.pau.pautctctckckc.paukctckctc.paukckctc.pauojeieieeEEEEeEewooooolooooooooooouoouuuumuuruuuuruumvieeiiiii~eiuuuuojriiiuzmmmmtctckckcddckckckckcukckckckckcmtc.pauadcdctczdkaakckckckckckctcfddcdcdcdckcsstmummudcmmojZi~eeeeejeeeeeeuuuiuuiiinZtctSii~mmmmntctckc.paukckckckcfd

In [66]:
fig = px.line(E)
fig.add_trace(go.Scatter(x=pz, y=E[pz], mode="markers"))
fig.add_trace(go.Scatter(x=nz, y=E[nz], mode="markers"))
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(len(E)),
        ticktext = fonemas
    )
)
fig.add_trace(go.Scatter(
    x=pfala[0,:],
    y=E[pfala[0,:]],
    mode="markers+text",
    name="Markers and Text",
    text=fonemas2,
    textposition="bottom center"
))
fig.show()

In [85]:
def wav2ener2fon(s, fa):
    E = perfEner(s, fa)
    pz = np.logical_and(E[1:] > E.max()/100, E[:-1] < E.max()/100)
    nz = np.logical_and(E[1:] < E.max()/100, E[:-1] > E.max()/100)
    pz = np.nonzero(pz)[0]
    nz = np.nonzero(nz)[0]
    if nz[0]<pz[0]: nz = nz[1:]
    if nz[-1]<pz[-1]: pz = pz[:-1]
    pfala = np.stack((pz,nz))
    P2 = np.zeros((pfala.shape[1], 150))
    for i in range(pfala.shape[1]):
        na = np.round(pfala[0,i]*fa*0.03).astype(int)
        N = np.round((pfala[1,i]-pfala[0,i])*fa*0.03).astype(int)
        janela = int(np.round(N/3))
        marcas = [0, int(np.round((N-janela)/2))-1, N-janela]
        Ordem = int(np.round(0.003*fa))
        CP = np.zeros((Ordem,3))
        S = np.zeros((janela-Ordem-1,Ordem+1))
        cont = 0
        for k in marcas:
            saux = s[na+k:na+k+janela]
            for m in range(Ordem+1):
                S[:,m] = saux[m:m-Ordem-1]
            C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
            CP[:,cont] = C
            cont += 1
        P2[i,] = CP2vec(CP, fa).transpose()
    x1 = torch.tensor(P2, dtype=torch.float32, device=device).detach()
    yp = model(x1)
    inds = yp.argmax(dim=1).cpu().detach().numpy()
    aux = F0.values[inds].flatten()
    fonemas = np.zeros(E.shape[0], dtype=np.str_)
    fonemas[:] = '0'
    for i in range(pfala.shape[1]):
        fonemas[pfala[0,i]:pfala[1,i]] = aux[i]
    return fonemas

In [88]:
vals = []
for s in tqdm(dados):
    fonemas = wav2ener2fon(s, fa)
    dic, c = lempelziv76(fonemas)
    vals.append(c)

                                                 

In [96]:
fig = px.scatter(vals)
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(len(vals)),
        ticktext = rotul
    )
)

In [97]:
fig = px.scatter(lempC)
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(len(vals)),
        ticktext = rotul
    )
)

In [99]:
fig = px.scatter(np.array(vals)*np.array(lempC))
fig.update_layout(
    xaxis = dict(
        tickvals = np.arange(len(vals)),
        ticktext = rotul
    )
)