# Inicio

In [5]:
import numpy as np
import pandas as pd
import os
import pickle
import plotly.express as px
import plotly.graph_objects as go
from tqdm import tqdm
from scipy.io import wavfile
from IPython.display import Audio

import torch
from torch import nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torchsummary import summary

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [26]:
def LPC3janelas(s, fa):
    N = s.shape[0]
    Janela = int(np.round(N/3))
    marcas = [0, int(np.round((N-Janela)/2))-1, N-Janela]
    Ordem = int(np.round(0.003*fa))
    CP = np.zeros((Ordem,3))
    S = np.zeros((Janela-Ordem-1,Ordem+1))
    cont = 0
    for k in marcas:
        saux = s[k:k+Janela]
        for m in range(Ordem+1):
            S[:,m] = saux[m:m-Ordem-1]
        C = np.linalg.pinv(S[:,:-1]).dot(S[:,-1])
        CP[:,cont] = C
        cont += 1
    return CP

In [4]:
def CP2vec(CP, fa):
    w = np.arange(5000/fa*np.pi, 50/fa*np.pi, -100/fa*np.pi)
    P = np.zeros((len(w), CP.shape[1]))
    for k in range(CP.shape[1]):
        h = np.hstack((1, -np.flipud(CP[:,k])))
        for i in range(len(w)):
            P[i,k] = abs(1/np.sum(np.exp(-1j*w[i]*np.arange(len(h)))*h))
    P = np.log10(P/P.max() + 0.01) + 2
    P = P.flatten(order='F')/np.sqrt(np.sum(P.flatten()**2))
    return P

In [8]:
pasta = '/content/base'
arqs = os.listdir(pasta)

In [19]:
fs, s = wavfile.read(pasta + '/' + arqs[-100])
print(arqs[-100])
Audio(s,rate=fs)

-1296.wav


In [33]:
aux = []
for arq in tqdm(arqs):
    fs, s = wavfile.read(pasta + '/' + arq)
    CP = LPC3janelas(s, fs)
    P = CP2vec(CP, fs)
    aux.append(P)

100%|██████████| 2136/2136 [01:30<00:00, 23.48it/s]


In [45]:
rotulos = [i[0] for i in arqs]
letras = np.unique(rotulos)
inds = [np.where(letras == i)[0][0] for i in rotulos]
y = np.zeros((len(rotulos), len(letras)))
y[np.arange(len(rotulos)), inds] = 1

In [48]:
X = np.array(aux)

In [51]:
class mlp(nn.Module):
    def __init__(self, raw_dim, dim1, dim2):
        super().__init__()
        self.all_layers = torch.nn.Sequential(
            # Encoder
            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim1),
            nn.Tanh(),
            nn.Linear(dim1, dim2),
            # nn.Softmax(dim=1)
        )

    def forward(self, x):
        logits = self.all_layers(x)
        return logits

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32, device=device).detach()
X_test = torch.tensor(X_test, dtype=torch.float32, device=device).detach()
y_train = torch.tensor(y_train, dtype=torch.float32, device=device).detach()
y_test = torch.tensor(y_test, dtype=torch.float32, device=device).detach()
N = X_train.shape[0]

In [66]:
raw_dim = 150
dim1 = 150
dim2 = letras.shape[0]
model = mlp(raw_dim, dim1, dim2).to(device)
batch_size = 128
summary(model, (150,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                  [-1, 150]               0
            Linear-2                  [-1, 150]          22,650
              Tanh-3                  [-1, 150]               0
            Linear-4                   [-1, 18]           2,718
Total params: 25,368
Trainable params: 25,368
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.10
Estimated Total Size (MB): 0.10
----------------------------------------------------------------


In [70]:
from torcheval.metrics.functional import multiclass_f1_score

In [72]:
loss_fn = nn.CrossEntropyLoss()
# loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 50
batch_size = 128
batch_start = torch.arange(0, N, batch_size)
trainLoss = []
valLoss = []

for epoch in range(n_epochs):
    model.train()
    inds = np.random.permutation(N)
    with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[inds,:][start:start+batch_size,:]
            y_batch = y_train[inds,:][start:start+batch_size,:]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(loss=float(loss))
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    y_pred = nn.functional.softmax(y_pred, dim=1)
    inds = y_pred.argmax(dim=1)
    target = y_test.argmax(dim=1)
    valLoss.append(float(multiclass_f1_score(inds, target, num_classes=18)))
    # valLoss.append(float(loss_fn(y_pred, y_test)))
    y_pred = model(X_train)
    trainLoss.append(float(loss_fn(y_pred, y_train)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
fig.show()

Epoch 0: 100%|██████████| 14/14 [00:00<00:00, 113.30batch/s, loss=0.397]
Epoch 1: 100%|██████████| 14/14 [00:00<00:00, 105.31batch/s, loss=0.468]
Epoch 2: 100%|██████████| 14/14 [00:00<00:00, 91.42batch/s, loss=0.374]
Epoch 3: 100%|██████████| 14/14 [00:00<00:00, 82.19batch/s, loss=0.0406]
Epoch 4: 100%|██████████| 14/14 [00:00<00:00, 91.20batch/s, loss=0.031] 
Epoch 5: 100%|██████████| 14/14 [00:00<00:00, 88.46batch/s, loss=0.303]
Epoch 6: 100%|██████████| 14/14 [00:00<00:00, 89.86batch/s, loss=0.201]
Epoch 7: 100%|██████████| 14/14 [00:00<00:00, 85.10batch/s, loss=0.296]
Epoch 8: 100%|██████████| 14/14 [00:00<00:00, 94.25batch/s, loss=0.261]
Epoch 9: 100%|██████████| 14/14 [00:00<00:00, 83.91batch/s, loss=0.104]
Epoch 10: 100%|██████████| 14/14 [00:00<00:00, 93.16batch/s, loss=0.196]
Epoch 11: 100%|██████████| 14/14 [00:00<00:00, 79.99batch/s, loss=0.298]
Epoch 12: 100%|██████████| 14/14 [00:00<00:00, 86.85batch/s, loss=0.264]
Epoch 13: 100%|██████████| 14/14 [00:00<00:00, 95.12batch