# Inicialização

In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.optim as optim
from torcheval.metrics.functional import multiclass_f1_score
from torchinfo import summary

import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import copy
from tqdm import tqdm
import time
import os
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from natsort import natsorted

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
# Vamos definir a pasta que está o dataset
workspace = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\Dataset\\'
root = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\Dataset\\RealWorld\\'

# Lista de usuários e atividades
users = natsorted(os.listdir(root))
tarefas = ['climbingdown', 'climbingup', 'jumping', 'lying', 'running', 'sitting', 'standing', 'walking']
SAC = ['sitting', 'standing', 'walking', 'climbingup', 'climbingdown', 'running']
posicao = ['chest', 'forearm', 'head', 'shin', 'thigh', 'upperarm', 'waist']
posicaopt = ['peito', 'antebraço', 'cabeça', 'canela', 'coxa', 'braço', 'cintura']

In [4]:
pasta = 'C:\\Meu Drive\\Doutorado Unicamp\\Projeto\\Dataset\\realworld views\\'
D = [None] * 7
D[0] = pd.read_csv(pasta+posicao[0]+'_nova50hz.csv')
D[1] = pd.read_csv(pasta+posicao[1]+'_nova50hz.csv')
D[2] = pd.read_csv(pasta+posicao[2]+'_nova50hz.csv')
D[3] = pd.read_csv(pasta+posicao[3]+'_nova50hz.csv')
D[4] = pd.read_csv(pasta+posicao[4]+'_nova50hz.csv')
D[5] = pd.read_csv(pasta+posicao[5]+'_nova50hz.csv')
D[6] = pd.read_csv(pasta+posicao[6]+'_nova50hz.csv')

# Definição dos dados e modelos

In [5]:
X = D[0].values[:,:450] / 30
y = D[0].values[:,-3]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=1, stratify=y_train)
X_train = torch.tensor(X_train, dtype=torch.float32, device=device).detach()
X_test = torch.tensor(X_test, dtype=torch.float32, device=device).detach()
X_val = torch.tensor(X_val, dtype=torch.float32, device=device).detach()

In [6]:
class automlpacc(nn.Module):
    def __init__(self, dim):
        super().__init__()
        raw_dim = 450
        self.encoder = torch.nn.Sequential(

            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim),
            # nn.ReLU(),
            # nn.LeakyReLU(),
            nn.Tanh()
        )

        self.decoder = nn.Linear(dim, raw_dim)

    def forward(self, x):
        x = self.encoder(x)
        logits = self.decoder(x)
        return logits

In [161]:
class automlp(nn.Module):
    def __init__(self):
        super().__init__()
        dim = 600
        self.encoder = torch.nn.Sequential(

            nn.Flatten(start_dim=1),
            nn.Linear(900, dim),
            # nn.ReLU(),
            # nn.LeakyReLU(),
            nn.Tanh()
        )

        self.decoder = nn.Linear(dim, 900)

    def forward(self, x):
        x = self.encoder(x)
        logits = self.decoder(x)
        return logits

In [52]:
model = automlpacc().to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 20   # number of epochs to run
batch_size = 150  # size of each batch
batch_start = torch.arange(0, len(X_train), batch_size)
trainLoss = []
valLoss = []

In [64]:
summary(model, input_size=(batch_size, 450))

Layer (type:depth-idx)                   Output Shape              Param #
automlpacc                               [150, 450]                --
├─Sequential: 1-1                        [150, 100]                --
│    └─Flatten: 2-1                      [150, 450]                --
│    └─Linear: 2-2                       [150, 100]                45,100
│    └─Tanh: 2-3                         [150, 100]                --
├─Linear: 1-2                            [150, 450]                45,450
Total params: 90,550
Trainable params: 90,550
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 13.58
Input size (MB): 0.27
Forward/backward pass size (MB): 0.66
Params size (MB): 0.36
Estimated Total Size (MB): 1.29

# Treinamento de modelos automlp de uma camada

In [8]:
lat_dim = [10, 20, 30, 50, 70, 100, 150, 200, 250, 300, 350, 400]
models1 = [None] * len(lat_dim)

In [None]:
for i, dim in enumerate(lat_dim):
    models1[i] = automlpacc(dim).to(device)
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(models1[i].parameters(), lr=0.001)
    n_epochs = 20   # number of epochs to run
    batch_size = 150  # size of each batch
    batch_start = torch.arange(0, len(X_train), batch_size)
    trainLoss = []
    valLoss = []
    for epoch in range(n_epochs):
        models1[i].train()
        with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                # take a batch
                X_batch = X_train[start:start+batch_size,:]
                # forward pass
                Xh = models1[i](X_batch)
                loss = loss_fn(Xh, X_batch)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
                # print progress
                bar.set_postfix(loss=float(loss))
        # evaluate accuracy at end of each epoch
        models1[i].eval()
        Xh = models1[i](X_val)
        valLoss.append(float(loss_fn(Xh, X_val)))
        Xh = models1[i](X_train)
        trainLoss.append(float(loss_fn(Xh, X_train)))
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
    fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
    fig.show()

In [10]:
valLoss = []
for model in models1:
    model.eval()
    Xh = model(X_val)
    valLoss.append(float(loss_fn(Xh, X_val)))
px.bar(x=lat_dim, y=valLoss, title='MSE de reconstrução por dimensão', labels={'y':'MSE', 'x':'Dimensão'}, width=1000)

In [11]:
aux = X_val.cpu().detach().numpy()
i = 12
print(tarefas[int(y_val[i])])
fig = go.Figure()
fig.add_trace(go.Scatter(y=aux[i,:], mode="lines", showlegend=True, name='original'))
for model in models1:
    Xh = model(X_val).cpu().detach().numpy()
    fig.add_trace(go.Scatter(y=Xh[i,:], mode="lines", showlegend=True, name='reconstrução '+str(lat_dim[models1.index(model)])))
fig.show()

running


# Treinamento de modelos a partir do espaço latente dos automlp

In [12]:
emb_train = [None] * len(lat_dim)
emb_val = [None] * len(lat_dim)
for i, model in enumerate(models1):
    emb_train[i] = model.encoder(X_train).detach()
    emb_val[i] = model.encoder(X_val).detach()

In [12]:
lat_dim = [10, 20, 30, 50, 70, 100, 150, 200, 250, 300, 350, 400]
models2 = [None] * len(lat_dim)

In [13]:
class automlp2(nn.Module):
    def __init__(self, dim1, dim2):
        super().__init__()
        self.encoder = torch.nn.Sequential(

            nn.Flatten(start_dim=1),
            nn.Linear(dim1, dim2),
            # nn.ReLU(),
            # nn.LeakyReLU(),
            nn.Tanh()
        )

        self.decoder = nn.Linear(dim2, dim1)

    def forward(self, x):
        x = self.encoder(x)
        logits = self.decoder(x)
        return logits

In [None]:
lossMat = np.zeros((len(lat_dim), len(lat_dim)))
for i, dim1 in enumerate(lat_dim):
    X_train = emb_train[i]
    X_val = emb_val[i]
    for j, dim2 in enumerate(lat_dim):
        print(dim1,dim2)
        models2[j] = automlp2(dim1, dim2).to(device)
        loss_fn = nn.MSELoss()
        optimizer = optim.Adam(models2[j].parameters(), lr=0.001)
        n_epochs = 20   # number of epochs to run
        batch_size = 150  # size of each batch
        batch_start = torch.arange(0, len(X_train), batch_size)
        trainLoss = []
        valLoss = []
        for epoch in range(n_epochs):
            models2[j].train()
            with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
                bar.set_description(f"Epoch {epoch}")
                for start in bar:
                    # take a batch
                    X_batch = X_train[start:start+batch_size,:]
                    # forward pass
                    Xh = models2[j](X_batch)
                    loss = loss_fn(Xh, X_batch)
                    # backward pass
                    optimizer.zero_grad()
                    loss.backward()
                    # update weights
                    optimizer.step()
                    # print progress
                    bar.set_postfix(loss=float(loss))
            # evaluate accuracy at end of each epoch
            models2[j].eval()
            Xh = models2[j](X_val)
            valLoss.append(float(loss_fn(Xh, X_val)))
            Xh = models2[j](X_train)
            trainLoss.append(float(loss_fn(Xh, X_train)))
        # fig = go.Figure()
        # fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
        # fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
        # fig.show()
    valLoss = []
    for model in models2:
        model.eval()
        Xh = model(X_val)
        valLoss.append(float(loss_fn(Xh, X_val)))
    lossMat[:,i] = np.array(valLoss)
    # px.bar(x=lat_dim, y=valLoss, title='MSE de reconstrução por dimensão', labels={'y':'MSE', 'x':'Dimensão'}, width=1000)

In [34]:
px.imshow(lossMat, zmin=0, width=800, height=600,
    title='MSEs',
    labels=dict(x="Dimensão 1", y="Dimensão 2", color="MSE"),
        y=np.array(lat_dim).astype(str),
        x=np.array(lat_dim).astype(str))

In [25]:
dim1 = 350
dim2 = 200
model2 = automlp2(dim1, dim2).to(device)
X_train = emb_train[lat_dim.index(dim1)]
X_val = emb_val[lat_dim.index(dim1)]
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model2.parameters(), lr=0.001)
n_epochs = 20   # number of epochs to run
batch_size = 150  # size of each batch
batch_start = torch.arange(0, len(X_train), batch_size)
trainLoss = []
valLoss = []


for epoch in range(n_epochs):
    model2.train()
    with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[start:start+batch_size,:]
            # forward pass
            Xh = model2(X_batch)
            loss = loss_fn(Xh, X_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(loss=float(loss))
    # evaluate accuracy at end of each epoch
    model2.eval()
    Xh = model2(X_val)
    valLoss.append(float(loss_fn(Xh, X_val)))
    Xh = model2(X_train)
    trainLoss.append(float(loss_fn(Xh, X_train)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
fig.show()

Epoch 0:   2%|▏         | 2/109 [00:00<00:01, 94.25batch/s, loss=0.00993]

Epoch 0: 100%|██████████| 109/109 [00:00<00:00, 155.09batch/s, loss=0.00178]
Epoch 1: 100%|██████████| 109/109 [00:00<00:00, 194.32batch/s, loss=0.000949]
Epoch 2: 100%|██████████| 109/109 [00:00<00:00, 232.60batch/s, loss=0.000571]
Epoch 3: 100%|██████████| 109/109 [00:00<00:00, 236.55batch/s, loss=0.000389]
Epoch 4: 100%|██████████| 109/109 [00:00<00:00, 230.70batch/s, loss=0.000302]
Epoch 5: 100%|██████████| 109/109 [00:00<00:00, 234.33batch/s, loss=0.000257]
Epoch 6: 100%|██████████| 109/109 [00:00<00:00, 232.66batch/s, loss=0.000232]
Epoch 7: 100%|██████████| 109/109 [00:00<00:00, 235.99batch/s, loss=0.000216]
Epoch 8: 100%|██████████| 109/109 [00:00<00:00, 203.34batch/s, loss=0.000206]
Epoch 9: 100%|██████████| 109/109 [00:00<00:00, 230.23batch/s, loss=0.0002]  
Epoch 10: 100%|██████████| 109/109 [00:00<00:00, 228.45batch/s, loss=0.000195]
Epoch 11: 100%|██████████| 109/109 [00:00<00:00, 222.09batch/s, loss=0.000192]
Epoch 12: 100%|██████████| 109/109 [00:00<00:00, 208.36batch/s,

In [23]:
type(emb_train)

list

In [17]:
model.parameters.weights

AttributeError: 'function' object has no attribute 'weights'

# Agora um modelo com 4 camadas (2 encod e 2 decod)

In [26]:
class automlp4c(nn.Module):
    def __init__(self, dim1, dim2):
        super().__init__()
        raw_dim = 450
        self.all_layers = torch.nn.Sequential(

            # Encoder
            nn.Flatten(start_dim=1),
            nn.Linear(raw_dim, dim1),
            # nn.ReLU(),
            # nn.LeakyReLU(),
            nn.Tanh(),
            nn.Linear(dim1, dim2),
            nn.Tanh(),

            # Decoder
            nn.Linear(dim2, dim1),
            nn.Tanh(),
            nn.Linear(dim1, raw_dim),
        )

    def forward(self, x):
        logits = self.all_layers(x)
        return logits

In [27]:
X = D[0].values[:,:450] / 30
y = D[0].values[:,-3]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=1, stratify=y_train)
X_train = torch.tensor(X_train, dtype=torch.float32, device=device).detach()
X_test = torch.tensor(X_test, dtype=torch.float32, device=device).detach()
X_val = torch.tensor(X_val, dtype=torch.float32, device=device).detach()

In [28]:
dim1 = 350
dim2 = 200
model = automlp4c(dim1, dim2).to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 20   # number of epochs to run
batch_size = 150  # size of each batch
batch_start = torch.arange(0, len(X_train), batch_size)
trainLoss = []
valLoss = []


for epoch in range(n_epochs):
    model.train()
    with tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[start:start+batch_size,:]
            # forward pass
            Xh = model(X_batch)
            loss = loss_fn(Xh, X_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            bar.set_postfix(loss=float(loss))
    # evaluate accuracy at end of each epoch
    model.eval()
    Xh = model(X_val)
    valLoss.append(float(loss_fn(Xh, X_val)))
    Xh = model(X_train)
    trainLoss.append(float(loss_fn(Xh, X_train)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=valLoss, mode="lines", showlegend=True, name='validação'))
fig.add_trace(go.Scatter(y=trainLoss, mode="lines", showlegend=True, name='treino'))
fig.show()

Epoch 0: 100%|██████████| 109/109 [00:00<00:00, 154.45batch/s, loss=0.00171]
Epoch 1: 100%|██████████| 109/109 [00:00<00:00, 182.79batch/s, loss=0.00103]
Epoch 2: 100%|██████████| 109/109 [00:00<00:00, 204.06batch/s, loss=0.000673]
Epoch 3: 100%|██████████| 109/109 [00:00<00:00, 192.86batch/s, loss=0.000487]
Epoch 4: 100%|██████████| 109/109 [00:00<00:00, 196.41batch/s, loss=0.000395]
Epoch 5: 100%|██████████| 109/109 [00:00<00:00, 193.38batch/s, loss=0.000329]
Epoch 6: 100%|██████████| 109/109 [00:00<00:00, 193.23batch/s, loss=0.000284]
Epoch 7: 100%|██████████| 109/109 [00:00<00:00, 204.00batch/s, loss=0.00026] 
Epoch 8: 100%|██████████| 109/109 [00:00<00:00, 204.31batch/s, loss=0.000235]
Epoch 9: 100%|██████████| 109/109 [00:00<00:00, 202.01batch/s, loss=0.000223]
Epoch 10: 100%|██████████| 109/109 [00:00<00:00, 191.91batch/s, loss=0.000207]
Epoch 11: 100%|██████████| 109/109 [00:00<00:00, 188.80batch/s, loss=0.000194]
Epoch 12: 100%|██████████| 109/109 [00:00<00:00, 182.83batch/s, 