### import important packages

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils import data

import numpy as np

import os
import sys

import matplotlib.pyplot as plt

sys.path.append(os.path.abspath('..'))
from LoadRealRunningJumping import *
from LoadSyntheticRunningJumping import *

from GANModels import *

from dataLoader import *

import torch.fft as fft
from torchsummary import summary

from EvaluationFunctions import EncoderEvaluation

from sklearn.manifold import TSNE

# Encoder Study

## Load data and GAN generator and discriminator from checkpoint

In [None]:
models_path = '../pre-trained-models/daghar_split_dataset/seq_len_30/3axis/'
data_path = '../DAGHAR_split_25_10/train/data/KuHar_DAGHAR_Multiclass.csv'
label_path = '../DAGHAR_split_25_10/train/label/KuHar_Label_Multiclass.csv'

original_set = daghar_load_dataset_with_label(class_name= "kuhar",
                                              seq_len=30,
                                              data_path=data_path,
                                              label_path=label_path,
                                              channels=3)

gen_model = Generator(seq_len=30, channels=3).cuda()
running_model = Discriminator(seq_len=30, in_channels=3)
running_ckp = torch.load(models_path + 'KuHar_DAGHAR_Multiclass_50000_D_30_2024_10_25_02_42_43/Model/checkpoint')
running_model.load_state_dict(running_ckp['dis_state_dict'])

In [None]:
summary(gen_model, (1, 3, 100))

In [None]:
model = Discriminator(seq_len=30, in_channels=3).cuda()
summary(model, (3, 1, 30))

## Set discriminator to encoder

In [None]:
keys_to_remove = list(running_ckp['dis_state_dict'].keys())[-4:]  # Obtém as últimas 4 chaves

print(f"original keys: {running_ckp['dis_state_dict'].keys()}")
# Remove as chaves do dicionário original
for key in keys_to_remove:
    del running_ckp['dis_state_dict'][key]

# Verificando o resultado
print(f"ajusted keys: {running_ckp['dis_state_dict'].keys()}")

In [None]:
class Encoder(nn.Sequential):
    def __init__(self, 
                 in_channels=3,
                 patch_size=15,
                 emb_size=50, 
                 seq_len = 150,
                 depth=3, 
                 n_classes=1, 
                 **kwargs):
        super().__init__(
            PatchEmbedding_Linear(in_channels, patch_size, emb_size, seq_len),
            Dis_TransformerEncoder(depth, emb_size=emb_size, drop_p=0.5, forward_drop_p=0.5, **kwargs)
        )

In [None]:
encoder = Encoder(in_channels=3, seq_len=30).cuda()
summary(encoder, (3, 1, 30))

In [None]:
encoder.load_state_dict(running_ckp['dis_state_dict'])

In [None]:
samples = torch.from_numpy(original_set[:][0]).float().to('cuda')
labels = torch.from_numpy(original_set[:][1])
print(samples.shape)
forward = encoder(samples)
print(forward.shape)

In [None]:
def TSNE_visualization(data, labels, title='t-SNE plot', show=True, save_path=''):
    '''

    Espera-se que os dados originais estejam na forma (batch, channels, timeframe), por exemplo, (2784, 3, 50)
    
    '''
    colors = [
        "#FF5733",  # Vermelho
        "#337BFF",  # Azul
        "#33FF57",  # Verde
        "#FFD133",  # Amarelo
        "#9B33FF",  # Roxo
        "#33FFF6"   # Ciano
        ]
    actions = ['sit', 'stand', 'walk', 'upstairs', 'downstairs', 'run']

    # Garantir que os dados são numpy array e embaralhar com labels
    data = np.asarray(data)
    labels = np.asarray(labels)
    l = len(data)
    idx = np.random.permutation(l)
    data, labels = data[idx], labels[idx]

        # Pré-processamento: média ao longo da dimensão dos canais (dim=1)
        # Para cada batch, reduzimos para uma representação média de forma (2784, 50)
    prep = np.mean(data, axis=1)
    print("Shape após a média por canal:", prep.shape)

    # Análise TSNE
    tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
    tsne_results = tsne.fit_transform(prep)
    print("Shape do resultado TSNE:", tsne_results.shape)

    # Plotagem
    if not show:
        return tsne_results

    f, ax = plt.subplots(1, figsize=(12,6))

    scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], 
                          c=[colors[label] for label in labels], alpha=0.6)
        # Criar uma legenda customizada para as classes
    handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[i], markersize=10) for i in range(len(actions))]
    ax.legend(handles, actions, title="Actions")

    ax.set_title(title)
    ax.set_xlabel('x-tsne')
    ax.set_ylabel('y-tsne')
    if save_path:
        f.savefig(save_path + title + '.png')

    return tsne_results

In [None]:
TSNE_visualization(forward.cpu().detach().numpy(),labels, title = 'tsne: kuhar', show=True,
                   save_path='')

# Observing output

In [139]:
class Dis_TransformerEncoderBlock(nn.Sequential):
    def __init__(self,
                 emb_size=100,
                 num_heads=5,
                 drop_p=0.,
                 forward_expansion=4,
                 forward_drop_p=0.):
        super().__init__(
            ResidualAdd(nn.Sequential(nn.LayerNorm(emb_size),
                                      MultiHeadAttention(emb_size, num_heads, drop_p),
                                      nn.Dropout(drop_p))),

            ResidualAdd(nn.Sequential(nn.LayerNorm(emb_size),
                                      FeedForwardBlock(emb_size, expansion=forward_expansion, drop_p=forward_drop_p),
                                      nn.Dropout(drop_p)))
                )



class Dis_TransformerEncoder(nn.Sequential):
    def __init__(self, depth=8, **kwargs):
        super().__init__(*[Dis_TransformerEncoderBlock(**kwargs) for _ in range(depth)])

class PatchEmbedding_Linear(nn.Module):
    #what are the proper parameters set here?
    def __init__(self, in_channels = 21, patch_size = 16, emb_size = 100, seq_len = 1024):
        # self.patch_size = patch_size
        super().__init__()
        #change the conv2d parameters here
        self.rearrange = Rearrange('b c (h s1) (w s2) -> b (h w) (s1 s2 c)',s1 = 1, s2 = patch_size)
        self.linear = nn.Linear(patch_size*in_channels, emb_size)
        self.projection = nn.Sequential(
            Rearrange('b c (h s1) (w s2) -> b (h w) (s1 s2 c)',s1 = 1, s2 = patch_size),
            nn.Linear(patch_size*in_channels, emb_size)
        )
        self.cls_token = nn.Parameter(torch.randn(1, 1, emb_size))
        self.positions = nn.Parameter(torch.randn((seq_len // patch_size) + 1, emb_size))

    def forward(self, x: Tensor) -> Tensor:
        print(f'forward in shape: {x.shape}')
        b, _, _, _ = x.shape
        x = self.rearrange(x)
        print(f'After REARRANGE shape: {x.shape}')
        x = self.linear(x)
        print(f'after Linear layer shape: {x.shape}')
        print(f'\n cls_token initial shape: {self.cls_token.shape}')
        cls_tokens = repeat(self.cls_token, '() n e -> b n e', b=b)
        #cls_tokens = self.cls_token.repeat(b, 1, 1) #Personal repeat from pytorch to transfer from einops
        #prepend the cls token to the input
        print(f' cls_tokens shape: {cls_tokens.shape}')
        x = torch.cat([cls_tokens, x], dim=1)
        # position
        x += self.positions
        return x        
        
class Encoder(nn.Sequential):
    def __init__(self, 
                 in_channels=3,
                 patch_size=15,
                 emb_size=50, 
                 seq_len = 150,
                 depth=3,  
                 **kwargs):
        super().__init__(
            PatchEmbedding_Linear(in_channels, patch_size, emb_size, seq_len),
            Dis_TransformerEncoder(depth, emb_size=emb_size, drop_p=0.5, forward_drop_p=0.5, **kwargs),
        )

In [140]:
torch.manual_seed(0)
test_encoder = Encoder(in_channels=3, patch_size=15, emb_size=50, seq_len=30, depth=3)

In [141]:
torch.manual_seed(0)
print(f'data original shape in: {original_set[:2][0].shape}')
loggits = test_encoder(torch.from_numpy(original_set[:2][0]).float())
loggits

data original shape in: (2, 3, 1, 30)
forward in shape: torch.Size([2, 3, 1, 30])
After REARRANGE shape: torch.Size([2, 2, 45])
after Linear layer shape: torch.Size([2, 2, 50])

 cls_token initial shape: torch.Size([1, 1, 50])
 cls_tokens shape: torch.Size([2, 1, 50])


tensor([[[ 2.0571e+00, -5.8690e-01, -7.2749e-01,  6.6194e-02,  1.8601e-01,
           6.1001e-01, -1.7726e+00, -4.5558e+00, -4.3648e-01, -2.1551e+00,
          -1.5407e+00, -7.6107e-01, -9.7441e-01,  1.7416e+00, -9.8297e-01,
           3.5182e-01, -1.7474e-01, -9.9338e-01, -5.4105e-01, -1.8193e+00,
           5.0723e-03,  2.4914e+00,  1.7360e-01, -3.5390e-01, -9.0261e-01,
          -6.7927e-01,  2.3131e+00, -1.0954e+00,  2.6914e+00,  9.8820e-01,
           2.7738e-01, -5.4304e-01,  5.8782e-01,  4.9031e-01, -1.6927e+00,
          -2.2878e+00,  4.8508e+00, -1.8930e+00,  3.9537e-01, -9.9192e-01,
          -5.8826e-01, -1.0439e+00,  5.4816e-01, -1.0574e+00, -1.1124e-01,
          -1.0655e+00,  1.9237e-02,  3.0961e+00,  8.4294e-01,  3.8517e-01],
         [ 2.1679e-01, -4.0678e-01, -2.6577e-01,  6.6641e-01, -1.5241e+00,
          -5.5468e-01, -1.0177e+00, -2.4124e-01, -5.1565e-01, -4.2608e+00,
           1.1483e+00,  7.5558e-01,  1.2840e+00, -1.0479e+00,  1.0254e+00,
          -7.2666e-01,  

In [142]:
lol == loggits

tensor([[[True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True],
         [True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True],
         [True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, True,
          True, True, True, True, True, True, True, True, True, True, 

# Encoder Evaluation

In [None]:
import re

# Função para converter até o primeiro número
def convert_string(s):
    # Encontrar a parte da string antes do primeiro número
    match = re.match(r'([^\d]+)(\d+)', s)
    if match:
        # Extrair a parte antes e o número
        prefix = match.group(1).lower()  # Parte antes do número, convertida para minúsculas
        number = match.group(2)          # Primeiro número encontrado
        return f"{prefix}{number}"       # Retorna no formato desejado
    return s  # Retorna a string original se não houver números

## 3axis seq_len 30 split dataset

In [None]:
models_path = '../pre-trained-models/daghar_split_dataset/seq_len_30/3axis/'
data_path = '../DAGHAR_split_25_10/train/'
directorys = os.listdir(models_path)

first_parts = [item.split("Multiclass")[0] + "Multiclass" for item in directorys]
class_names = [item.split("_DAGHAR")[0] for item in directorys]
label_paths = [s.replace('DAGHAR', 'Label') for s in first_parts] 
directorys, class_names, first_parts, label_paths

In [None]:
print('Starting Encoder Evaluation')
print('---------------------------------------------------------------------------------------')
for i in range(len(class_names)):
    Eval = EncoderEvaluation(
                 models_path = models_path + directorys[i] + '/Model/checkpoint',
                 class_name = class_names[i],
                 seq_len = 30,
                 channels = 3,
                 save_path = '../Notebooks/Encoder_View/split_dataset/seq_len_30/3axis/' ,
                 data_path = data_path + 'data/' + first_parts[i] + '.csv',
                 label_path = data_path + 'label/' + label_paths[i] + '.csv',
                 show=False)
    print('---------------------------------------------------------------------------------------')

## 6axis seq_len 30 split dataset

In [None]:
models_path = '../pre-trained-models/daghar_split_dataset/seq_len_30/6axis/'
data_path = '../DAGHAR_split_25_10/train/'
directorys = os.listdir(models_path)

first_parts = [item.split("Multiclass")[0] + "Multiclass" for item in directorys]
class_names = [item.split("_DAGHAR")[0] for item in directorys]
label_paths = [s.replace('DAGHAR', 'Label') for s in first_parts] 
directorys, class_names, first_parts, label_paths

In [None]:
print('Starting Encoder Evaluation')
print('---------------------------------------------------------------------------------------')
for i in range(len(class_names)):
    Eval = EncoderEvaluation(
                 models_path = models_path + directorys[i] + '/Model/checkpoint',
                 class_name = class_names[i],
                 seq_len = 30,
                 channels = 6,
                 save_path = '../Notebooks/Encoder_View/split_dataset/seq_len_30/6axis/' ,
                 data_path = data_path + 'data/' + first_parts[i] + '.csv',
                 label_path = data_path + 'label/' + label_paths[i] + '.csv',
                 show=False)
    print('---------------------------------------------------------------------------------------')

## 3axis seq_len 60 split dataset

In [None]:
models_path = '../pre-trained-models/daghar_split_dataset/seq_len_60/3axis/'
data_path = '../DAGHAR_split_25_10/train/'
directorys = os.listdir(models_path)

first_parts = [item.split("Multiclass")[0] + "Multiclass" for item in directorys]
class_names = [item.split("_DAGHAR")[0] for item in directorys]
label_paths = [s.replace('DAGHAR', 'Label') for s in first_parts] 
directorys, class_names, first_parts, label_paths

In [18]:
print('Starting Encoder Evaluation')
print('---------------------------------------------------------------------------------------')
for i in range(len(class_names)):
    Eval = EncoderEvaluation(
                 models_path = models_path + directorys[i] + '/Model/checkpoint',
                 class_name = class_names[i],
                 seq_len = 60,
                 channels = 3,
                 save_path = '../Notebooks/Encoder_View/split_dataset/seq_len_60/3axis/' ,
                 data_path = data_path + 'data/' + first_parts[i] + '.csv',
                 label_path = data_path + 'label/' + label_paths[i] + '.csv',
                 show=False)
    print('---------------------------------------------------------------------------------------')

Shape do resultado TSNE: (8748, 2)
---------------------------------------------------------------------------------------
class name : UCI
Data path is located in: ../DAGHAR_split_25_10/train/data/UCI_DAGHAR_Multiclass.csv
Label path is located in: ../DAGHAR_split_25_10/train/label/UCI_Label_Multiclass.csv
Models path is located in: ../pre-trained-models/daghar_split_dataset/seq_len_60/3axis/UCI_DAGHAR_Multiclass_50000_D_60_2024_10_25_15_53_51/Model/checkpoint
dataset: Daghar
 
 Starting Encoder Evaluation
Original Set:
return single class data and labels, class is UCI
data shape is (2420, 3, 1, 60)
label shape is (2420,)
1: torch.Size([2420, 3, 1, 60])
2: torch.Size([2420, 4, 50])
forward shape: (2420, 5, 50)
Shape após a média por canal: (2420, 50)


  ckp = torch.load(self.models_path, map_location=torch.device("cpu"))


Shape do resultado TSNE: (2420, 2)
---------------------------------------------------------------------------------------


## 6axis seq_len 60 split dataset

In [19]:
models_path = '../pre-trained-models/daghar_split_dataset/seq_len_60/6axis/'
data_path = '../DAGHAR_split_25_10/train/'
directorys = os.listdir(models_path)

first_parts = [item.split("Multiclass")[0] + "Multiclass" for item in directorys]
class_names = [item.split("_DAGHAR")[0] for item in directorys]
label_paths = [s.replace('DAGHAR', 'Label') for s in first_parts] 
directorys, class_names, first_parts, label_paths

(['KuHar_DAGHAR_Multiclass_50000_D_60_2024_10_30_03_01_46',
  'MotionSense_DAGHAR_Multiclass_50000_D_60_2024_10_30_00_07_49',
  'RealWorld_thigh_DAGHAR_Multiclass_50000_D_60_2024_10_30_00_43_14',
  'UCI_DAGHAR_Multiclass_50000_D_60_2024_10_30_01_50_17',
  'WISDM_DAGHAR_Multiclass_50000_D_60_2024_10_30_01_16_54',
  'RealWorld_waist_DAGHAR_Multiclass_50000_D_60_2024_10_30_02_28_08'],
 ['KuHar',
  'MotionSense',
  'RealWorld_thigh',
  'UCI',
  'WISDM',
  'RealWorld_waist'],
 ['KuHar_DAGHAR_Multiclass',
  'MotionSense_DAGHAR_Multiclass',
  'RealWorld_thigh_DAGHAR_Multiclass',
  'UCI_DAGHAR_Multiclass',
  'WISDM_DAGHAR_Multiclass',
  'RealWorld_waist_DAGHAR_Multiclass'],
 ['KuHar_Label_Multiclass',
  'MotionSense_Label_Multiclass',
  'RealWorld_thigh_Label_Multiclass',
  'UCI_Label_Multiclass',
  'WISDM_Label_Multiclass',
  'RealWorld_waist_Label_Multiclass'])

In [20]:
print('Starting Encoder Evaluation')
print('---------------------------------------------------------------------------------------')
for i in range(len(class_names)):
    Eval = EncoderEvaluation(
                 models_path = models_path + directorys[i] + '/Model/checkpoint',
                 class_name = class_names[i],
                 seq_len = 60,
                 channels = 6,
                 save_path = '../Notebooks/Encoder_View/split_dataset/seq_len_60/6axis/' ,
                 data_path = data_path + 'data/' + first_parts[i] + '.csv',
                 label_path = data_path + 'label/' + label_paths[i] + '.csv',
                 show=False)
    print('---------------------------------------------------------------------------------------')

Starting Encoder Evaluation
---------------------------------------------------------------------------------------
class name : KuHar
Data path is located in: ../DAGHAR_split_25_10/train/data/KuHar_DAGHAR_Multiclass.csv
Label path is located in: ../DAGHAR_split_25_10/train/label/KuHar_Label_Multiclass.csv
Models path is located in: ../pre-trained-models/daghar_split_dataset/seq_len_60/6axis/KuHar_DAGHAR_Multiclass_50000_D_60_2024_10_30_03_01_46/Model/checkpoint
dataset: Daghar
 
 Starting Encoder Evaluation
Original Set:
return single class data and labels, class is KuHar
data shape is (1392, 6, 1, 60)
label shape is (1392,)
1: torch.Size([1392, 6, 1, 60])
2: torch.Size([1392, 4, 50])
forward shape: (1392, 5, 50)
Shape após a média por canal: (1392, 50)


  ckp = torch.load(self.models_path, map_location=torch.device("cpu"))


Shape do resultado TSNE: (1392, 2)
---------------------------------------------------------------------------------------
class name : MotionSense
Data path is located in: ../DAGHAR_split_25_10/train/data/MotionSense_DAGHAR_Multiclass.csv
Label path is located in: ../DAGHAR_split_25_10/train/label/MotionSense_Label_Multiclass.csv
Models path is located in: ../pre-trained-models/daghar_split_dataset/seq_len_60/6axis/MotionSense_DAGHAR_Multiclass_50000_D_60_2024_10_30_00_07_49/Model/checkpoint
dataset: Daghar
 
 Starting Encoder Evaluation
Original Set:
return single class data and labels, class is MotionSense
data shape is (3558, 6, 1, 60)
label shape is (3558,)
1: torch.Size([3558, 6, 1, 60])
2: torch.Size([3558, 4, 50])
forward shape: (3558, 5, 50)
Shape após a média por canal: (3558, 50)


  ckp = torch.load(self.models_path, map_location=torch.device("cpu"))


Shape do resultado TSNE: (3558, 2)
---------------------------------------------------------------------------------------
class name : RealWorld_thigh
Data path is located in: ../DAGHAR_split_25_10/train/data/RealWorld_thigh_DAGHAR_Multiclass.csv
Label path is located in: ../DAGHAR_split_25_10/train/label/RealWorld_thigh_Label_Multiclass.csv
Models path is located in: ../pre-trained-models/daghar_split_dataset/seq_len_60/6axis/RealWorld_thigh_DAGHAR_Multiclass_50000_D_60_2024_10_30_00_43_14/Model/checkpoint
dataset: Daghar
 
 Starting Encoder Evaluation
Original Set:
return single class data and labels, class is RealWorld_thigh
data shape is (10338, 6, 1, 60)
label shape is (10338,)
1: torch.Size([10338, 6, 1, 60])
2: torch.Size([10338, 4, 50])
forward shape: (10338, 5, 50)
Shape após a média por canal: (10338, 50)


  ckp = torch.load(self.models_path, map_location=torch.device("cpu"))


Shape do resultado TSNE: (10338, 2)
---------------------------------------------------------------------------------------
class name : UCI
Data path is located in: ../DAGHAR_split_25_10/train/data/UCI_DAGHAR_Multiclass.csv
Label path is located in: ../DAGHAR_split_25_10/train/label/UCI_Label_Multiclass.csv
Models path is located in: ../pre-trained-models/daghar_split_dataset/seq_len_60/6axis/UCI_DAGHAR_Multiclass_50000_D_60_2024_10_30_01_50_17/Model/checkpoint
dataset: Daghar
 
 Starting Encoder Evaluation
Original Set:
return single class data and labels, class is UCI
data shape is (2420, 6, 1, 60)
label shape is (2420,)
1: torch.Size([2420, 6, 1, 60])
2: torch.Size([2420, 4, 50])
forward shape: (2420, 5, 50)
Shape após a média por canal: (2420, 50)


  ckp = torch.load(self.models_path, map_location=torch.device("cpu"))


Shape do resultado TSNE: (2420, 2)
---------------------------------------------------------------------------------------
class name : WISDM
Data path is located in: ../DAGHAR_split_25_10/train/data/WISDM_DAGHAR_Multiclass.csv
Label path is located in: ../DAGHAR_split_25_10/train/label/WISDM_Label_Multiclass.csv
Models path is located in: ../pre-trained-models/daghar_split_dataset/seq_len_60/6axis/WISDM_DAGHAR_Multiclass_50000_D_60_2024_10_30_01_16_54/Model/checkpoint
dataset: Daghar
 
 Starting Encoder Evaluation
Original Set:
return single class data and labels, class is WISDM
data shape is (8748, 6, 1, 60)
label shape is (8748,)
1: torch.Size([8748, 6, 1, 60])
2: torch.Size([8748, 4, 50])
forward shape: (8748, 5, 50)
Shape após a média por canal: (8748, 50)


  ckp = torch.load(self.models_path, map_location=torch.device("cpu"))


Shape do resultado TSNE: (8748, 2)
---------------------------------------------------------------------------------------
class name : RealWorld_waist
Data path is located in: ../DAGHAR_split_25_10/train/data/RealWorld_waist_DAGHAR_Multiclass.csv
Label path is located in: ../DAGHAR_split_25_10/train/label/RealWorld_waist_Label_Multiclass.csv
Models path is located in: ../pre-trained-models/daghar_split_dataset/seq_len_60/6axis/RealWorld_waist_DAGHAR_Multiclass_50000_D_60_2024_10_30_02_28_08/Model/checkpoint
dataset: Daghar
 
 Starting Encoder Evaluation
Original Set:
return single class data and labels, class is RealWorld_waist
data shape is (10332, 6, 1, 60)
label shape is (10332,)
1: torch.Size([10332, 6, 1, 60])
2: torch.Size([10332, 4, 50])
forward shape: (10332, 5, 50)
Shape após a média por canal: (10332, 50)


  ckp = torch.load(self.models_path, map_location=torch.device("cpu"))


Shape do resultado TSNE: (10332, 2)
---------------------------------------------------------------------------------------
