In [3]:
import torch
print(torch.cuda.is_available())  
print(torch.cuda.current_device())  
print(torch.cuda.get_device_name(torch.cuda.current_device())) 

True
0
NVIDIA GeForce RTX 4060 Laptop GPU


In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

CUDA is available. Using GPU.


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from pytorch_tabnet.pretraining import TabNetPretrainer
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import numpy as np


In [6]:
df_train = pd.DataFrame(np.random.rand(700, 42), columns=[f"feature_{i}" for i in range(42)])
df_test = pd.DataFrame(np.random.rand(300, 42), columns=[f"feature_{i}" for i in range(42)])

y_train = (np.random.rand(700) > 0.5).astype(np.float32)
y_test = (np.random.rand(300) > 0.5).astype(np.float32)


scaler = MinMaxScaler()
X_train = scaler.fit_transform(df_train.values)
X_test = scaler.transform(df_test.values)

# y_train = y_train.values.astype(np.float32)
# y_test = y_test.values.astype(np.float32)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


torch.Size([700, 42]) torch.Size([700]) torch.Size([300, 42]) torch.Size([300])


In [7]:
y_train

tensor([1., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 0., 1., 1.,
        0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1.,
        1., 0., 1., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1.,
        0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 1., 0.,
        1., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0.,
        1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1.,
        0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1., 0., 1.,
        0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1.,
        0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 1.,
        1., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 1., 0., 0.,
        1., 0., 0., 1., 0., 0., 1., 1., 

In [8]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [9]:
tabnet_params = {
    "n_d": 16,
    "n_a": 16,
    "n_steps": 3,
    "n_shared": 2,
    "n_independent": 2,
    "gamma": 1.3,
    "epsilon": 1e-15,
    "momentum": 0.98,
    "mask_type": "sparsemax",
    "lambda_sparse": 1e-3,
    "device_name": "cuda" if torch.cuda.is_available() else "cpu"
}


unsupervised_model = TabNetPretrainer(
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2),
    **tabnet_params
)
 

unsupervised_model.fit(
    X_train,
    eval_set=[X_test],  
    pretraining_ratio=0.8,
    max_epochs=101,
    patience=10,
    batch_size=1024,
    virtual_batch_size=128,
    num_workers=0,
    drop_last=False
)



epoch 0  | loss: 60.91917| val_0_unsup_loss_numpy: 12.30090045928955|  0:00:00s
epoch 1  | loss: 43.68689| val_0_unsup_loss_numpy: 10.328229904174805|  0:00:00s
epoch 2  | loss: 32.23457| val_0_unsup_loss_numpy: 7.11821985244751|  0:00:00s
epoch 3  | loss: 24.33612| val_0_unsup_loss_numpy: 5.316720008850098|  0:00:00s
epoch 4  | loss: 17.89371| val_0_unsup_loss_numpy: 4.35368013381958|  0:00:00s
epoch 5  | loss: 13.73012| val_0_unsup_loss_numpy: 4.090789794921875|  0:00:00s
epoch 6  | loss: 11.33558| val_0_unsup_loss_numpy: 3.6928300857543945|  0:00:00s
epoch 7  | loss: 9.38129 | val_0_unsup_loss_numpy: 2.977440118789673|  0:00:00s
epoch 8  | loss: 7.46308 | val_0_unsup_loss_numpy: 2.6637299060821533|  0:00:01s
epoch 9  | loss: 5.88853 | val_0_unsup_loss_numpy: 2.575589895248413|  0:00:01s
epoch 10 | loss: 4.74838 | val_0_unsup_loss_numpy: 2.1205599308013916|  0:00:01s
epoch 11 | loss: 4.06542 | val_0_unsup_loss_numpy: 1.9705699682235718|  0:00:01s
epoch 12 | loss: 3.24445 | val_0_unsu



In [10]:
# Truy cập vào mô hình TabNet bên trong
from torchinfo import summary

tabnet_model = unsupervised_model.network.to(device)

summary(tabnet_model, input_size=X_train.shape) 

Layer (type:depth-idx)                                       Output Shape              Param #
TabNetPretraining                                            [700, 42]                 --
├─EmbeddingGenerator: 1-1                                    [700, 42]                 --
├─TabNetEncoder: 1-2                                         [700, 16]                 --
│    └─BatchNorm1d: 2-1                                      [700, 42]                 84
│    └─FeatTransformer: 2-2                                  [700, 32]                 4,352
│    │    └─GLU_Block: 3-1                                   [700, 32]                 4,992
│    └─ModuleList: 2-12                                      --                        (recursive)
│    │    └─FeatTransformer: 3-17                            --                        (recursive)
│    └─FeatTransformer: 2-6                                  --                        (recursive)
│    │    └─GLU_Block: 3-5                                   -

In [11]:
encoder = tabnet_model.encoder

print("\nEncoder Summary:")
print(encoder)




Encoder Summary:
TabNetEncoder(
  (initial_bn): BatchNorm1d(42, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)
  (initial_splitter): FeatTransformer(
    (shared): GLU_Block(
      (shared_layers): ModuleList(
        (0): Linear(in_features=42, out_features=64, bias=False)
        (1): Linear(in_features=32, out_features=64, bias=False)
      )
      (glu_layers): ModuleList(
        (0): GLU_Layer(
          (fc): Linear(in_features=42, out_features=64, bias=False)
          (bn): GBN(
            (bn): BatchNorm1d(64, eps=1e-05, momentum=0.98, affine=True, track_running_stats=True)
          )
        )
        (1): GLU_Layer(
          (fc): Linear(in_features=32, out_features=64, bias=False)
          (bn): GBN(
            (bn): BatchNorm1d(64, eps=1e-05, momentum=0.98, affine=True, track_running_stats=True)
          )
        )
      )
    )
    (specifics): GLU_Block(
      (glu_layers): ModuleList(
        (0-1): 2 x GLU_Layer(
          (fc): Linear(in_fea

In [12]:
decoder = tabnet_model.decoder

print("\nDecoder Summary:")
print(decoder)


Decoder Summary:
TabNetDecoder(
  (feat_transformers): ModuleList(
    (0-2): 3 x FeatTransformer(
      (shared): GLU_Block(
        (shared_layers): ModuleList(
          (0): Linear(in_features=16, out_features=32, bias=False)
        )
        (glu_layers): ModuleList(
          (0): GLU_Layer(
            (fc): Linear(in_features=16, out_features=32, bias=False)
            (bn): GBN(
              (bn): BatchNorm1d(32, eps=1e-05, momentum=0.98, affine=True, track_running_stats=True)
            )
          )
        )
      )
      (specifics): GLU_Block(
        (glu_layers): ModuleList(
          (0): GLU_Layer(
            (fc): Linear(in_features=16, out_features=32, bias=False)
            (bn): GBN(
              (bn): BatchNorm1d(32, eps=1e-05, momentum=0.98, affine=True, track_running_stats=True)
            )
          )
        )
      )
    )
  )
  (reconstruction_layer): Linear(in_features=16, out_features=42, bias=False)
)


In [13]:
sample_input = torch.tensor(X_train[:5]).to(device)  

try:
    result = tabnet_model.encoder(sample_input)
    if isinstance(result, tuple):
        print(f'TabNet encoder trả về {len(result)} giá trị.')
        for i, res in enumerate(result):
            print(f'Giá trị {i + 1} shape: {res.shape}')
    else:
        print('TabNet encoder chỉ trả về một giá trị.')
        print(f'Giá trị shape: {result.shape}')
except Exception as e:
    print(f'Đã xảy ra lỗi: {e}')

TabNet encoder trả về 2 giá trị.
Đã xảy ra lỗi: 'list' object has no attribute 'shape'


  sample_input = torch.tensor(X_train[:5]).to(device)


In [14]:
class Sampling(nn.Module):
    def __init__(self, seed=1337):
        super(Sampling, self).__init__()
        self.seed = seed

    def forward(self, inputs):
        z_mean, z_log_var = inputs
        batch = z_mean.size(0)
        dim = z_mean.size(1)
        # print(batch, dim)
        epsilon = torch.randn(batch, dim, generator=torch.Generator().manual_seed(self.seed)).to(device)
        return z_mean + torch.exp(0.5 * z_log_var) * epsilon

In [15]:
class VAE_Encoder(nn.Module):
    def __init__(self, latent_dim):
        super(VAE_Encoder, self).__init__()
        self.tabnet_encoder = tabnet_model.encoder
        self.mlp = nn.Sequential(
            nn.Linear(16, 128),  
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 96),
            nn.ReLU(),
            nn.Linear(96, latent_dim)
        ).to(device)
        self.fc_mean = nn.Linear(latent_dim, latent_dim).to(device)
        self.fc_log_var = nn.Linear(latent_dim, latent_dim).to(device)
        self.sampling = Sampling().to(device)

    def forward(self, x):
        x = x.to(device)
        steps_output, _ = self.tabnet_encoder(x)
        encoded = steps_output[-1]
        # print("Shape of encoded tensor:", encoded.shape)
        encoded = self.mlp(encoded)
        z_mean = self.fc_mean(encoded)
        z_log_var = self.fc_log_var(encoded)
        z = self.sampling((z_mean, z_log_var))
        # print(f'Shape of z: {z.shape} - {z_log_var.shape} -{z_log_var.shape}')
        return z_mean, z_log_var, z


In [16]:
class VAE_Decoder(nn.Module):
    def __init__(self, latent_dim,encoded_dim, output_dim):
        super(VAE_Decoder, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(latent_dim, 32),   
            nn.ReLU(),
            nn.Linear(32, 96),
            nn.ReLU(),
            nn.Linear(96, 96),
            nn.ReLU(),
            nn.Linear(96, encoded_dim),  
        )
        self.tabnet_decoder = tabnet_model.decoder
        self.reshape = nn.Unflatten(1, (encoded_dim,))
        self.output_dim=output_dim


    def forward(self, z):
        x = F.relu(self.mlp(z))

        # print("Shape before reshape:", x.shape)
        # x = self.reshape(x)
        x = x[None, ...]

        # print("Shape after reshape:", x.shape)
        # x = x.view(x.size(0), output_dim)
        
        output = self.tabnet_decoder(x)
        # print(output.shape)
        # print("Shape of output from tabnet_decoder:", output.shape)
        output = torch.sigmoid(output)
        output = output.view(-1, self.output_dim)
        return output

In [17]:
def check_data_range(tensor, name):
    if not torch.all((tensor >= 0) & (tensor <= 1)):
        print(f"{name} contains values outside the range [0, 1]")
        print(f"{name} min: {tensor.min()}, max: {tensor.max()}")

In [None]:
class VAE_Tabnet_MLPS(nn.Module):
    def __init__(self, encoder, decoder, classifier):
        super(VAE_Tabnet_MLPS, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.classifier = classifier
        self.total_loss_tracker = []
        self.reconstruction_loss_tracker = []
        self.kl_loss_tracker = []
        self.classification_loss_tracker = []
        self.accuracy_tracker = []

    def forward(self, x):
        z_mean, z_log_var, z = self.encoder(x)
        reconstruction = self.decoder(z)
        classification_output = self.classifier(z)
        return reconstruction, z_mean, z_log_var, classification_output

    def train_step(self, data, labels, optimizer):
        optimizer.zero_grad()
        # z_mean, z_log_var, z = self.encoder(data)
        # reconstruction = self.decoder(z)
        reconstruction, z_mean, z_log_var, classification_output = self.forward(data)
        # print('classifi',classification_output.shape)
        # print(check_data_range(data, 'data'))
        # print(check_data_range(reconstruction, 'reconstruction'))
        # reconstruction_loss = torch.mean(
        #     torch.sum(
        #         F.binary_cross_entropy(reconstruction, data, reduction='none'),
        #         dim=1
        #     )
        # )
        reconstruction_loss = torch.mean(
            torch.sum(
                F.binary_cross_entropy_with_logits(reconstruction, data, reduction='none'),
                dim=1
                # dim=(1, 2)
                )  
        )
        classification_loss = torch.mean(
            torch.sum(
                F.binary_cross_entropy_with_logits(classification_output, labels, reduction='none'),
                # dim=1
                # dim=(1, 2)
                )  
        )
        kl_loss = -0.5 * torch.sum(1 + z_log_var - z_mean.pow(2) - z_log_var.exp(), dim=1)
        kl_loss = torch.mean(torch.sum(kl_loss))
        total_loss = reconstruction_loss + kl_loss + classification_loss
        total_loss.backward()
        optimizer.step()

        self.total_loss_tracker.append(total_loss.item())
        self.reconstruction_loss_tracker.append(reconstruction_loss.item())
        self.kl_loss_tracker.append(kl_loss.item())
        self.classification_loss_tracker.append(classification_loss.item())

        preds = torch.sigmoid(classification_output)
        correct = ((preds > 0.5) == labels).float().sum()
        accuracy = correct / labels.size(0)
        self.accuracy_tracker.append(accuracy.item())

        return {
            "loss": total_loss.item(),
            "reconstruction_loss": reconstruction_loss.item(),
            "kl_loss": kl_loss.item(),
            "classification_loss": classification_loss.item(),
            "accuracy": accuracy.item()
        }

In [19]:
latent_dim = 64
encoded_dim = 16
output_dim = X_train.shape[1]
input_dim = X_train.shape[1]
print(input_dim)


42


In [20]:
class SimpleClassifier(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleClassifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim),
            nn.Sigmoid()  
        )

    def forward(self, x):
        # print('input: ',x.shape)
        output = self.fc(x)
        output = output.view(-1)
        # print('output',output.shape)
        return output

In [21]:
classifier = SimpleClassifier(latent_dim, output_dim=1).to(device)


In [22]:
def check_output(model, input_tensor):
    with torch.no_grad():  
        output = model(input_tensor)
        print(f"Input size: {input_tensor.size()}")
        print(f"Output size: {output.size()}")
        print(f"Output: {output}")

model = SimpleClassifier(latent_dim, output_dim=1)

input_tensor = torch.randn(32,latent_dim)  

check_output(model, input_tensor)

Input size: torch.Size([32, 64])
Output size: torch.Size([32])
Output: tensor([0.4634, 0.4261, 0.4812, 0.4578, 0.4735, 0.4766, 0.4581, 0.4933, 0.4660,
        0.4409, 0.4369, 0.4910, 0.4414, 0.4114, 0.4475, 0.4577, 0.4821, 0.4838,
        0.4424, 0.4795, 0.4572, 0.4620, 0.4737, 0.4427, 0.4271, 0.4568, 0.4764,
        0.4853, 0.4551, 0.4886, 0.4910, 0.4757])


In [23]:
vae_encoder = VAE_Encoder(latent_dim=latent_dim)
print("Encoder Summary:")
# vae_encoder.to(device)

summary(vae_encoder, input_size=(32, input_dim), device=device)

Encoder Summary:


Layer (type:depth-idx)                                       Output Shape              Param #
VAE_Encoder                                                  [32, 64]                  --
├─TabNetEncoder: 1-1                                         [32, 16]                  --
│    └─BatchNorm1d: 2-1                                      [32, 42]                  84
│    └─FeatTransformer: 2-2                                  [32, 32]                  4,352
│    │    └─GLU_Block: 3-1                                   [32, 32]                  4,992
│    └─ModuleList: 2-12                                      --                        (recursive)
│    │    └─FeatTransformer: 3-17                            --                        (recursive)
│    └─FeatTransformer: 2-6                                  --                        (recursive)
│    │    └─GLU_Block: 3-5                                   --                        (recursive)
│    └─ModuleList: 2-12                              

In [24]:
x = torch.randn(800, 42).to(device)
steps_output, _ = tabnet_model.encoder(x)
encoded = steps_output[-1]
print(f"Encoded shape: {encoded.shape}")

Encoded shape: torch.Size([800, 16])


In [25]:
import torch

x = torch.randn(800, 42).to(device)  # Đầu vào có kích thước (batch_size, features)

steps_output, _ = tabnet_model.encoder(x)
print("Shape of encoder output:", [output.shape for output in steps_output])

decoder_input = steps_output[-1]  
decoder_input = decoder_input[None, ...]
try:
    decoder_output = tabnet_model.decoder(decoder_input)
    print(f"Decoder shape: {decoder_output.shape}")
except ValueError as e:
    print(f"Error: {e}")


Shape of encoder output: [torch.Size([800, 16]), torch.Size([800, 16]), torch.Size([800, 16])]
Decoder shape: torch.Size([800, 42])


In [26]:
vae_decoder = VAE_Decoder(latent_dim=latent_dim, encoded_dim=encoded_dim, output_dim=output_dim).to(device)
print("Decoder Summary:")
summary(vae_decoder, input_size=(32, latent_dim), device=device)

Decoder Summary:


Layer (type:depth-idx)                                       Output Shape              Param #
VAE_Decoder                                                  [32, 42]                  --
├─Sequential: 1-1                                            [32, 16]                  --
│    └─Linear: 2-1                                           [32, 32]                  2,080
│    └─ReLU: 2-2                                             [32, 32]                  --
│    └─Linear: 2-3                                           [32, 96]                  3,168
│    └─ReLU: 2-4                                             [32, 96]                  --
│    └─Linear: 2-5                                           [32, 96]                  9,312
│    └─ReLU: 2-6                                             [32, 96]                  --
│    └─Linear: 2-7                                           [32, 16]                  1,552
├─TabNetDecoder: 1-2                                         [32, 42]              

In [27]:
vae = VAE_Tabnet_MLPS(encoder=vae_encoder, decoder=vae_decoder,classifier=classifier).to(device)
summary(vae, input_size=(32, input_dim), device=device)

Layer (type:depth-idx)                                            Output Shape              Param #
VAE_Tabnet_MLPS                                                   [32, 42]                  --
├─VAE_Encoder: 1-1                                                [32, 64]                  --
│    └─TabNetEncoder: 2-1                                         [32, 16]                  --
│    │    └─BatchNorm1d: 3-1                                      [32, 42]                  84
│    │    └─FeatTransformer: 3-2                                  [32, 32]                  9,344
│    │    └─ModuleList: 3-12                                      --                        (recursive)
│    │    └─FeatTransformer: 3-6                                  --                        (recursive)
│    │    └─ModuleList: 3-12                                      --                        (recursive)
│    │    └─FeatTransformer: 3-6                                  --                        (recursive)
│    │

In [28]:
learning_rate = 0.0001
optimizer = optim.Adam(vae.parameters(), lr=learning_rate)
num_epochs = 10

for epoch in range(num_epochs):
    vae.train()
    train_loss = 0
    rec_loss = 0
    kl_loss = 0
    classification_loss = 0
    accuracy = 0

    for batch_data, batch_labels in train_loader:
        batch_data = batch_data.to(device)
        batch_labels = batch_labels.to(device)
        results = vae.train_step(batch_data, batch_labels, optimizer)
        
        train_loss += results["loss"]
        rec_loss += results["reconstruction_loss"]
        kl_loss += results["kl_loss"]
        classification_loss += results["classification_loss"]
        accuracy += results["accuracy"]

    train_loss /= len(train_loader)
    rec_loss /= len(train_loader)
    kl_loss /= len(train_loader)
    classification_loss /= len(train_loader)
    accuracy /= len(train_loader)

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss:.4f}, Reconstruction Loss: {rec_loss:.4f}, KL Loss: {kl_loss:.4f}, Classification Loss: {classification_loss:.4f}, Accuracy: {accuracy:.4f}")


Epoch 1/10, Loss: 61.4475, Reconstruction Loss: 30.6784, KL Loss: 7.7328, Classification Loss: 23.0363, Accuracy: 0.5002
Epoch 2/10, Loss: 58.9608, Reconstruction Loss: 30.6223, KL Loss: 5.3508, Classification Loss: 22.9877, Accuracy: 0.5006
Epoch 3/10, Loss: 57.0393, Reconstruction Loss: 30.5961, KL Loss: 3.4634, Classification Loss: 22.9797, Accuracy: 0.4996
Epoch 4/10, Loss: 55.3225, Reconstruction Loss: 30.5742, KL Loss: 1.8678, Classification Loss: 22.8805, Accuracy: 0.5008
Epoch 5/10, Loss: 54.2315, Reconstruction Loss: 30.5489, KL Loss: 0.8267, Classification Loss: 22.8559, Accuracy: 0.4992
Epoch 6/10, Loss: 53.6408, Reconstruction Loss: 30.5359, KL Loss: 0.3066, Classification Loss: 22.7983, Accuracy: 0.5006
Epoch 7/10, Loss: 53.4424, Reconstruction Loss: 30.5177, KL Loss: 0.1460, Classification Loss: 22.7787, Accuracy: 0.5004
Epoch 8/10, Loss: 53.3401, Reconstruction Loss: 30.5035, KL Loss: 0.0918, Classification Loss: 22.7447, Accuracy: 0.5000
Epoch 9/10, Loss: 53.2159, Recon

In [29]:
for param in vae.encoder.parameters():
    param.requires_grad = False

In [30]:
vae_new = VAE_Tabnet_MLPS(vae.encoder, vae.decoder, vae.classifier).to(device)
for param in vae_new.encoder.parameters():
    param.requires_grad = False

optimizer = optim.Adam(filter(lambda p: p.requires_grad, vae_new.parameters()), lr=learning_rate)
for epoch in range(num_epochs):
    vae_new.train()
    train_loss = 0
    rec_loss = 0
    kl_loss = 0
    classification_loss = 0
    accuracy = 0

    for batch_data, batch_labels in train_loader:
        batch_data = batch_data.to(device)
        batch_labels = batch_labels.to(device)
        results = vae.train_step(batch_data, batch_labels, optimizer)
        
        train_loss += results["loss"]
        rec_loss += results["reconstruction_loss"]
        kl_loss += results["kl_loss"]
        classification_loss += results["classification_loss"]
        accuracy += results["accuracy"]

    train_loss /= len(train_loader)
    rec_loss /= len(train_loader)
    kl_loss /= len(train_loader)
    classification_loss /= len(train_loader)
    accuracy /= len(train_loader)

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss:.4f}, Reconstruction Loss: {rec_loss:.4f}, KL Loss: {kl_loss:.4f}, Classification Loss: {classification_loss:.4f}, Accuracy: {accuracy:.4f}")

Epoch 1/10, Loss: 53.1120, Reconstruction Loss: 30.4644, KL Loss: 0.0400, Classification Loss: 22.6076, Accuracy: 0.4992
Epoch 2/10, Loss: 53.0209, Reconstruction Loss: 30.4537, KL Loss: 0.0387, Classification Loss: 22.5284, Accuracy: 0.5002
Epoch 3/10, Loss: 52.9978, Reconstruction Loss: 30.4485, KL Loss: 0.0430, Classification Loss: 22.5062, Accuracy: 0.4990
Epoch 4/10, Loss: 52.9031, Reconstruction Loss: 30.4347, KL Loss: 0.0404, Classification Loss: 22.4280, Accuracy: 0.4998
Epoch 5/10, Loss: 52.9155, Reconstruction Loss: 30.4229, KL Loss: 0.0395, Classification Loss: 22.4531, Accuracy: 0.4998
Epoch 6/10, Loss: 52.8807, Reconstruction Loss: 30.4122, KL Loss: 0.0409, Classification Loss: 22.4276, Accuracy: 0.5000
Epoch 7/10, Loss: 52.8405, Reconstruction Loss: 30.4025, KL Loss: 0.0428, Classification Loss: 22.3952, Accuracy: 0.5002
Epoch 8/10, Loss: 52.8140, Reconstruction Loss: 30.3933, KL Loss: 0.0420, Classification Loss: 22.3787, Accuracy: 0.5000
Epoch 9/10, Loss: 52.7752, Recon

In [102]:
class conbr_block(nn.Module):
    def __init__(self, in_layer, out_layer, kernel_size, stride, dilation):
        super(conbr_block, self).__init__()

        self.conv1 = nn.Conv1d(in_layer, out_layer, kernel_size=kernel_size, stride=stride, dilation = dilation, padding = 3, bias=True)
        self.bn = nn.BatchNorm1d(out_layer)
        self.relu = nn.ReLU()
    
    def forward(self,x):
        x = self.conv1(x)
        x = self.bn(x)
        out = self.relu(x)
        
        return out       

class se_block(nn.Module):
    def __init__(self,in_layer, out_layer):
        super(se_block, self).__init__()
        
        self.conv1 = nn.Conv1d(in_layer, out_layer//8, kernel_size=1, padding=0)
        self.conv2 = nn.Conv1d(out_layer//8, in_layer, kernel_size=1, padding=0)
        self.fc = nn.Linear(1,out_layer//8)
        self.fc2 = nn.Linear(out_layer//8,out_layer)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self,x):

        x_se = nn.functional.adaptive_avg_pool1d(x,1)
        x_se = self.conv1(x_se)
        x_se = self.relu(x_se)
        x_se = self.conv2(x_se)
        x_se = self.sigmoid(x_se)
        
        x_out = torch.add(x, x_se)
        return x_out

class re_block(nn.Module):
    def __init__(self, in_layer, out_layer, kernel_size, dilation):
        super(re_block, self).__init__()
        
        self.cbr1 = conbr_block(in_layer,out_layer, kernel_size, 1, dilation)
        self.cbr2 = conbr_block(out_layer,out_layer, kernel_size, 1, dilation)
        self.seblock = se_block(out_layer, out_layer)
    
    def forward(self,x):

        x_re = self.cbr1(x)
        x_re = self.cbr2(x_re)
        x_re = self.seblock(x_re)
        x_out = torch.add(x, x_re)
        return x_out          

class UNET_1D(nn.Module):
    def __init__(self, input_dim, layer_n, kernel_size, depth):
        super(UNET_1D, self).__init__()
        self.input_dim = input_dim
        self.layer_n = layer_n
        self.kernel_size = kernel_size
        self.depth = depth

        # AvgPool layers
        self.AvgPool1D1 = nn.AvgPool1d(input_dim, stride=5)
        self.AvgPool1D2 = nn.AvgPool1d(input_dim, stride=25)
        self.AvgPool1D3 = nn.AvgPool1d(input_dim, stride=125)

        # Encoder layers
        self.layer1 = self.down_layer(self.input_dim, self.layer_n, self.kernel_size, 1, 2)
        self.layer2 = self.down_layer(self.layer_n, int(self.layer_n*2), self.kernel_size, 5, 2)
        self.layer3 = self.down_layer(int(self.layer_n*2)+int(self.input_dim), int(self.layer_n*3), self.kernel_size, 5, 2)
        self.layer4 = self.down_layer(int(self.layer_n*3)+int(self.input_dim), int(self.layer_n*4), self.kernel_size, 5, 2)
        self.layer5 = self.down_layer(int(self.layer_n*4)+int(self.input_dim), int(self.layer_n*5), self.kernel_size, 4, 2)

        # Decoder layers
        self.cbr_up1 = conbr_block(int(self.layer_n*7), int(self.layer_n*3), self.kernel_size, 1, 1)
        self.cbr_up2 = conbr_block(int(self.layer_n*5), int(self.layer_n*2), self.kernel_size, 1, 1)
        self.cbr_up3 = conbr_block(int(self.layer_n*3), self.layer_n, self.kernel_size, 1, 1)

        self.upsample = nn.Upsample(scale_factor=5, mode='nearest')
        self.upsample1 = nn.Upsample(scale_factor=5, mode='nearest')

        self.outcov = nn.Conv1d(self.layer_n, 11, kernel_size=self.kernel_size, stride=1, padding=3)

    def down_layer(self, input_layer, out_layer, kernel, stride, depth):
        layers = []
        # Layer đầu tiên
        layers.append(conbr_block(input_layer, out_layer, kernel, stride, 1))
        # Các re_block tiếp theo
        for _ in range(depth):
            layers.append(re_block(out_layer, out_layer, kernel, 1))
        return nn.ModuleList(layers)

    def forward(self, x, t_emb):
        pool_x1 = self.AvgPool1D1(x)
        pool_x2 = self.AvgPool1D2(x)
        pool_x3 = self.AvgPool1D3(x)

        ############# Encoder #####################
        # layer1
        x_enc = x
        for layer in self.layer1:
            if isinstance(layer, conbr_block):
                x_enc = layer(x_enc, t_emb)
            else:
                x_enc = layer(x_enc)  # nếu có lớp khác không cần t_emb

        out_0 = x_enc

        # layer2
        x_enc = out_0
        for layer in self.layer2:
            if isinstance(layer, conbr_block):
                x_enc = layer(x_enc, t_emb)
            else:
                x_enc = layer(x_enc)

        out_1 = x_enc

        # layer3
        x = torch.cat([out_1, pool_x1], dim=1)
        x_enc = x
        for layer in self.layer3:
            if isinstance(layer, conbr_block):
                x_enc = layer(x_enc, t_emb)
            else:
                x_enc = layer(x_enc)

        out_2 = x_enc

        # layer4
        x = torch.cat([out_2, pool_x2], dim=1)
        x_enc = x
        for layer in self.layer4:
            if isinstance(layer, conbr_block):
                x_enc = layer(x_enc, t_emb)
            else:
                x_enc = layer(x_enc)

        x = x_enc

        ############# Decoder ####################
        up = self.upsample1(x)
        up = torch.cat([up, out_2], dim=1)
        up = self.cbr_up1(up, t_emb)

        up = self.upsample(up)
        up = torch.cat([up, out_1], dim=1)
        up = self.cbr_up2(up, t_emb)

        up = self.upsample(up)
        up = torch.cat([up, out_0], dim=1)
        up = self.cbr_up3(up, t_emb)

        out = self.outcov(up)
        noise_pred = out.squeeze(1)  # shape: [B, latent_dim]
        return noise_pred

In [149]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
class conbr_block(nn.Module):
    def __init__(self, in_layer, out_layer, kernel_size, stride, dilation):
        super(conbr_block, self).__init__()
        self.conv1 = nn.Conv1d(in_layer, out_layer, kernel_size=kernel_size,
                               stride=stride, dilation=dilation, padding=3, bias=True)
        self.bn = nn.BatchNorm1d(out_layer)
        self.relu = nn.ReLU()

        # Time embedding projection
        self.time_mlp = nn.Linear(128, out_layer)

    def forward(self, x, t_emb):
        x = self.conv1(x)
        x = self.bn(x)
        t_emb = self.time_mlp(t_emb)
        print('conbr_block t_emb shape:', t_emb.shape)
        # print(t_emb.shape, x.shape)  # Check shapes before broadcasting
        t_emb_proj = t_emb.unsqueeze(-1) 
        print('t_emb_proj shape:', t_emb_proj.shape)
        
        print(t_emb_proj.shape, x.shape)  # Project time embedding to match out_layer
        x = x + t_emb_proj  # broadcasting
        out = self.relu(x)
        print('conbr_block out shape:', out.shape)
        return out


class re_block(nn.Module):
    def __init__(self, in_layer, out_layer, kernel_size, dilation):
        super(re_block, self).__init__()
        self.cbr1 = conbr_block(in_layer, out_layer, kernel_size, 1, dilation)
        self.cbr2 = conbr_block(out_layer, out_layer, kernel_size, 1, dilation)
        self.seblock = se_block(out_layer, out_layer)

    def forward(self, x, t_emb):
        x_re = self.cbr1(x, t_emb)
        x_re = self.cbr2(x_re, t_emb)
        x_re = self.seblock(x_re)

        # Resize x_re nếu temporal dim không khớp
        if x_re.shape[-1] != x.shape[-1]:
            x_re = F.interpolate(x_re, size=x.shape[-1], mode='nearest')

        x_out = torch.add(x, x_re)
        return x_out

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=10000):
        super().__init__()
        self.d_model = d_model

    def forward(self, time):
        device = time.device
        half_d = self.d_model // 2
        embeddings = math.log(10000) / (half_d - 1)
        embeddings = torch.exp(torch.arange(half_d, device=device) * -embeddings)
        embeddings = time[:, None] * embeddings[None, :]
        embeddings = torch.cat([torch.sin(embeddings), torch.cos(embeddings)], dim=-1)
        return embeddings 

class UNET_1D_Diffusion(nn.Module):
    def __init__(self, latent_dim, time_emb_dim=128):
        super().__init__()
        self.latent_dim = latent_dim

        # Time embedding
        self.time_mlp = nn.Sequential(
            PositionalEncoding(time_emb_dim),
            nn.Linear(time_emb_dim, time_emb_dim),
            nn.ReLU(),
            nn.Linear(time_emb_dim, time_emb_dim)
        )

        # Encoder
        self.enc1_cbr = conbr_block(1, 64, kernel_size=5, stride=1, dilation=1)
        self.enc1_re = re_block(64, 64, kernel_size=5, dilation=1)

        self.enc2_cbr = conbr_block(64, 128, kernel_size=5, stride=5, dilation=1)
        self.enc2_re = re_block(128, 128, kernel_size=5, dilation=1)

        self.enc3_cbr = conbr_block(128, 192, kernel_size=5, stride=5, dilation=1)
        self.enc3_re = re_block(192, 192, kernel_size=5, dilation=1)

        # Bottleneck
        self.bottleneck_cbr = conbr_block(192, 256, kernel_size=3, stride=4, dilation=1)
        self.bottleneck_re = re_block(256, 256, kernel_size=3, dilation=1)

        # Decoder
        self.dec3_up = nn.Upsample(scale_factor=5, mode='nearest')
        self.dec3_cbr = conbr_block(256 + 192, 192, kernel_size=3, stride=1, dilation=1)
        self.dec3_re = re_block(192, 192, kernel_size=3, dilation=1)

        self.dec2_up = nn.Upsample(scale_factor=5, mode='nearest')
        self.dec2_cbr = conbr_block(192 + 128, 128, kernel_size=5, stride=1, dilation=1)
        self.dec2_re = re_block(128, 128, kernel_size=5, dilation=1)

        self.dec1_up = nn.Upsample(scale_factor=5, mode='nearest')
        self.dec1_cbr = conbr_block(128 + 64, 64, kernel_size=5, stride=1, dilation=1)
        self.dec1_re = re_block(64, 64, kernel_size=5, dilation=1)

        self.final_conv = nn.Conv1d(64, 1, kernel_size=1)

        # Beta schedule
        beta = torch.linspace(0.0001, 0.02, 1000)
        alpha = 1. - beta
        alpha_bar = torch.cumprod(alpha, dim=0)
        self.register_buffer('beta', beta)
        self.register_buffer('alpha', alpha)
        self.register_buffer('alpha_bar', alpha_bar)

    def forward(self, z_t, t):
        # Embedding timestep
        t_emb = self.time_mlp(t)
        print('t_emb shape diffusion:', t_emb.shape)
        print('z_t shape:', z_t.shape)

        # Thêm chiều channel
        x = z_t.unsqueeze(1)  # shape: [B, 1, latent_dim]
        print('x shape:', x.shape)

        # Encoder
        e1 = self.enc1_cbr(x, t_emb)
        e1 = self.enc1_re(e1, t_emb)
        # print('e1 shape:', e1.shape)
        print('e1 diffusion shape:', e1.shape)

        e2 = self.enc2_cbr(e1, t_emb)
        print("lỗi 1")
        e2 = self.enc2_re(e2, t_emb)
        print("lỗi 2")
        print('e2 diffusion shape:', e2.shape)
        print('t_emv', t_emb.shape)

        e3 = self.enc3_cbr(e2, t_emb)
        print('lỗi 3')
        e3 = self.enc3_re(e3, t_emb)
        print('e3 diffusion shape:', e3.shape)
        print('lỗi 4')
        b = self.bottleneck_cbr(e3, t_emb)
        print('bottleneck shape:', b.shape)
        print('lỗi 5')
        b = self.bottleneck_re(b, t_emb)

        # Decoder
        up3 = self.dec3_up(b)
        print('up3 shape:', up3.shape)
        print('e3 shape:', e3.shape)
        if e3.shape[-1] != up3.shape[-1]:
            e3 = F.interpolate(e3, size=up3.shape[-1], mode='nearest')
        up3 = torch.cat([up3, e3], dim=1)
        d3 = self.dec3_cbr(up3, t_emb)
        d3 = self.dec3_re(d3, t_emb)

        up2 = self.dec2_up(d3)
        print('up2 shape:', up2.shape)
        print('e2 shape:', e2.shape)
        if e2.shape[-1] != up2.shape[-1]:
            e2 = F.interpolate(e2, size=up2.shape[-1], mode='nearest')
        up2 = torch.cat([up2, e2], dim=1)
        d2 = self.dec2_cbr(up2, t_emb)
        d2 = self.dec2_re(d2, t_emb)

        up1 = self.dec1_up(d2)

        print('up1 shape:', up1.shape)
        print('e1 shape:', e1.shape)
        if e1.shape[-1] != up1.shape[-1]:
            e1 = F.interpolate(e1, size=up1.shape[-1], mode='nearest')
        up1 = torch.cat([up1, e1], dim=1)
        d1 = self.dec1_cbr(up1, t_emb)
        d1 = self.dec1_re(d1, t_emb)

        out = self.final_conv(d1).squeeze(1)  # shape: [B, latent_dim]
        return out

In [83]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleDiffusionModel(nn.Module):
    def __init__(self, latent_dim, time_steps=1000):
        super().__init__()
        self.time_steps = time_steps
        self.latent_dim = latent_dim
        
        # Tạo các beta_schedule tuyến tính
        beta = torch.linspace(0.0001, 0.02, time_steps)
        alpha = 1. - beta
        alpha_bar = torch.cumprod(alpha, dim=0)

        self.register_buffer('beta', beta)
        self.register_buffer('alpha', alpha)
        self.register_buffer('alpha_bar', alpha_bar)

        # Mạng neural đơn giản để dự đoán nhiễu
        self.model = nn.Sequential(
            nn.Linear(latent_dim + 1, 512),
            nn.ReLU(),
            nn.Linear(512, latent_dim)
        )

    def forward(self, z, t):
        noise = torch.randn_like(z)
        
        # Đảm bảo t là long type và shape phù hợp
        if isinstance(t, torch.Tensor):
            t = t.to(dtype=torch.long)
        else:
            t = torch.tensor([t], device=z.device, dtype=torch.long).expand(z.shape[0])

        sqrt_alpha_bar = torch.sqrt(self.alpha_bar[t])[:, None]
        sqrt_one_minus_alpha_bar = torch.sqrt(1 - self.alpha_bar[t])[:, None]
        noisy_z = sqrt_alpha_bar * z + sqrt_one_minus_alpha_bar * noise

        predicted_noise = self.model(torch.cat([noisy_z, t.unsqueeze(1)], dim=1))
        loss = F.mse_loss(predicted_noise, noise)
        return loss

    def sample(self, num_samples):
        z = torch.randn(num_samples, self.latent_dim).to(next(self.parameters()).device)
        for i in reversed(range(self.time_steps)):
            t = torch.full((num_samples,), i, device=z.device, dtype=torch.long)
            z = self.denoise_step(z, t)
        return z
    
    def denoise_step(self, z, t):
        timestep = t.item() if isinstance(t, torch.Tensor) else t
        t_batch = torch.full((z.shape[0],), timestep, device=z.device, dtype=torch.long)

        predicted_noise = self.model(torch.cat([z, t_batch.unsqueeze(1)], dim=1))

        alpha = self.alpha[timestep]
        alpha_bar = self.alpha_bar[timestep]
        beta = self.beta[timestep]

        z = (1 / torch.sqrt(alpha)) * (z - ((1 - alpha) / torch.sqrt(1 - alpha_bar)) * predicted_noise)
        if timestep > 0:
            noise = torch.randn_like(z)
            z += torch.sqrt(beta) * noise
        return z

In [65]:
# Thay đoạn lỗi này:
# latent_dim = vae.encoder[-1].out_features

# Bằng đoạn này:
with torch.no_grad():
    vae_new.encoder.eval()
    dummy_input = torch.randn(1, input_dim).to(device)  # Thay input_dim theo đúng dữ liệu của bạn
    z_mean, z_log_var, _ = vae_new.encoder(dummy_input)
    latent_dim = z_mean.shape[1]

In [66]:
# latent_dim = vae.encoder[-1].out_features  # Kích thước latent z
diffusion_model = SimpleDiffusionModel(latent_dim=latent_dim).to(device)
diffusion_optimizer = torch.optim.Adam(diffusion_model.parameters(), lr=1e-3)

In [87]:
from tqdm import tqdm
def train_diffusion(vae, diffusion_model, dataloader, optimizer, device, time_steps=1000, epochs=20):
    diffusion_model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_data, _ in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            batch_data = batch_data.to(device)
            with torch.no_grad():
                z_mean, _, z = vae.encoder(batch_data)

            t = torch.randint(0, time_steps, (z.shape[0],), device=device).long()
            noise = torch.randn_like(z)
            sqrt_alpha_bar = torch.sqrt(diffusion_model.alpha_bar[t])[:, None]
            sqrt_one_minus_alpha_bar = torch.sqrt(1 - diffusion_model.alpha_bar[t])[:, None]
            noisy_z = sqrt_alpha_bar * z + sqrt_one_minus_alpha_bar * noise

            predicted_noise = diffusion_model(noisy_z, t)
            loss = F.mse_loss(predicted_noise, noise)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"[Diffusion Train] Epoch {epoch+1}, Loss: {total_loss/len(dataloader):.4f}")

In [None]:
# from tqdm import tqdm
# def train_diffusion(vae, diffusion_model, dataloader, optimizer, device, time_steps=1000, epochs=20):
#     diffusion_model.train()
#     for epoch in range(epochs):
#         total_loss = 0
#         for batch_data, _ in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
#             batch_data = batch_data.to(device)
#             with torch.no_grad():
#                 z_mean, z_log_var, z = vae.encoder(batch_data)

#             t = torch.randint(0, time_steps, (z.shape[0],), device=device).long()
#             loss = diffusion_model(z, t)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             total_loss += loss.item()

#         print(f"Epoch {epoch+1}, Diffusion Loss: {total_loss/len(dataloader):.4f}")

In [88]:
def evaluate_diffusion_with_classifier(vae, diffusion_model, classifier, test_loader, device, time_steps=1000):
    diffusion_model.eval()
    classifier.eval()
    
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_data, batch_labels in test_loader:
            batch_data = batch_data.to(device)
            batch_labels = batch_labels.to(device)

            # Lấy z thật từ encoder
            z_mean, _, z = vae.encoder(batch_data)

            # Forward diffusion
            noisy_z = torch.sqrt(diffusion_model.alpha_bar[-1]) * z \
                      + torch.sqrt(1 - diffusion_model.alpha_bar[-1]) * torch.randn_like(z)

            # Reverse diffusion
            z_recovered = noisy_z
            for t in reversed(range(time_steps)):
                t_tensor = torch.full((z.shape[0],), t, device=z.device, dtype=torch.long)
                noise_pred = diffusion_model(z_recovered, t_tensor)
                alpha = diffusion_model.alpha[t]
                alpha_bar = diffusion_model.alpha_bar[t]
                beta = diffusion_model.beta[t]

                noise_term = torch.sqrt(beta) * torch.randn_like(z_recovered) if t > 0 else 0
                z_recovered = (1 / torch.sqrt(alpha)) * (z_recovered - ((1 - alpha) / torch.sqrt(1 - alpha_bar)) * noise_pred) + noise_term

            # Phân loại
            logits = classifier(z_recovered)
            preds = torch.argmax(logits, dim=1)
            correct += (preds == batch_labels).sum().item()
            total += batch_labels.size(0)

    accuracy = correct / total
    print(f"[Diffusion → Classifier] Accuracy: {accuracy * 100:.2f}%")

In [None]:
# def evaluate_diffusion_with_classifier(vae, diffusion_model, classifier, test_loader, device, time_steps=1000):
#     vae.eval()
#     diffusion_model.eval()
#     classifier.eval()
    
#     correct = 0
#     total = 0
    
#     with torch.no_grad():
#         for batch_data, batch_labels in test_loader:
#             batch_data = batch_data.to(device)
#             batch_labels = batch_labels.to(device)

#             # Lấy z từ encoder
#             z_mean, z_log_var, z = vae.encoder(batch_data)

#             # Forward diffusion
#             t_forward = time_steps - 1
#             sqrt_alpha_bar = torch.sqrt(diffusion_model.alpha_bar[t_forward])
#             sqrt_one_minus_alpha_bar = torch.sqrt(1 - diffusion_model.alpha_bar[t_forward])
#             noisy_z = sqrt_alpha_bar * z + sqrt_one_minus_alpha_bar * torch.randn_like(z)

#             # Reverse diffusion (hoàn nhiễu)
#             z_recovered = noisy_z
#             for t in reversed(range(time_steps)):
#                 z_recovered = diffusion_model.denoise_step(z_recovered, t)

#             # Phân loại trên z đã hoàn nhiễu
#             logits = classifier(z_recovered)

#             # Kiểm tra shape của logits
#             print("Logits shape:", logits.shape)  # Debug

#             if len(logits.shape) == 1:
#                 # Trường hợp: binary classification với output shape [batch_size]
#                 preds = (torch.sigmoid(logits) > 0.5).float()
#             elif len(logits.shape) == 2:
#                 # Trường hợp: multi-class classification
#                 preds = torch.argmax(logits, dim=1)
#             else:
#                 raise ValueError(f"Unexpected logits shape: {logits.shape}")

#             # Cập nhật accuracy
#             if len(batch_labels.shape) == 2 and batch_labels.shape[1] == 1:
#                 batch_labels = batch_labels.squeeze(1)  # về shape [batch_size]

#             correct += (preds == batch_labels).sum().item()
#             total += batch_labels.size(0)

#     accuracy = correct / total
#     print(f"Accuracy on recovered z: {accuracy:.4f}")

In [73]:
device

device(type='cuda')

In [74]:
# latent_dim = vae.encoder[-1].out_features
diffusion_model = SimpleDiffusionModel(latent_dim=latent_dim).to(device)
diffusion_optimizer = optim.Adam(diffusion_model.parameters(), lr=1e-3)

train_diffusion(vae_new, diffusion_model, train_loader, diffusion_optimizer, device)

evaluate_diffusion_with_classifier(vae, diffusion_model, vae.classifier, test_loader, device)

Epoch 1/20: 100%|██████████| 22/22 [00:00<00:00, 92.62it/s]


Epoch 1, Diffusion Loss: 42.7793


Epoch 2/20: 100%|██████████| 22/22 [00:00<00:00, 111.39it/s]


Epoch 2, Diffusion Loss: 4.0952


Epoch 3/20: 100%|██████████| 22/22 [00:00<00:00, 108.44it/s]


Epoch 3, Diffusion Loss: 1.3671


Epoch 4/20: 100%|██████████| 22/22 [00:00<00:00, 109.31it/s]


Epoch 4, Diffusion Loss: 1.0407


Epoch 5/20: 100%|██████████| 22/22 [00:00<00:00, 108.88it/s]


Epoch 5, Diffusion Loss: 0.9676


Epoch 6/20: 100%|██████████| 22/22 [00:00<00:00, 107.51it/s]


Epoch 6, Diffusion Loss: 0.9269


Epoch 7/20: 100%|██████████| 22/22 [00:00<00:00, 105.55it/s]


Epoch 7, Diffusion Loss: 0.8888


Epoch 8/20: 100%|██████████| 22/22 [00:00<00:00, 105.90it/s]


Epoch 8, Diffusion Loss: 0.8463


Epoch 9/20: 100%|██████████| 22/22 [00:00<00:00, 108.87it/s]


Epoch 9, Diffusion Loss: 0.8085


Epoch 10/20: 100%|██████████| 22/22 [00:00<00:00, 101.10it/s]


Epoch 10, Diffusion Loss: 0.7707


Epoch 11/20: 100%|██████████| 22/22 [00:00<00:00, 100.04it/s]


Epoch 11, Diffusion Loss: 0.7214


Epoch 12/20: 100%|██████████| 22/22 [00:00<00:00, 118.32it/s]


Epoch 12, Diffusion Loss: 0.6964


Epoch 13/20: 100%|██████████| 22/22 [00:00<00:00, 116.84it/s]


Epoch 13, Diffusion Loss: 0.6584


Epoch 14/20: 100%|██████████| 22/22 [00:00<00:00, 119.06it/s]


Epoch 14, Diffusion Loss: 0.6505


Epoch 15/20: 100%|██████████| 22/22 [00:00<00:00, 117.38it/s]


Epoch 15, Diffusion Loss: 0.6098


Epoch 16/20: 100%|██████████| 22/22 [00:00<00:00, 112.84it/s]


Epoch 16, Diffusion Loss: 0.5598


Epoch 17/20: 100%|██████████| 22/22 [00:00<00:00, 112.00it/s]


Epoch 17, Diffusion Loss: 0.5508


Epoch 18/20: 100%|██████████| 22/22 [00:00<00:00, 122.61it/s]


Epoch 18, Diffusion Loss: 0.5294


Epoch 19/20: 100%|██████████| 22/22 [00:00<00:00, 112.93it/s]


Epoch 19, Diffusion Loss: 0.5032


Epoch 20/20: 100%|██████████| 22/22 [00:00<00:00, 110.51it/s]


Epoch 20, Diffusion Loss: 0.4683
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([32])
Logits shape: torch.Size([12])
Accuracy on recovered z: 0.5433


In [150]:
# Dummy input để lấy latent_dim
with torch.no_grad():
    dummy_input = torch.randn(1, input_dim).to(device)  # input_dim = số chiều đầu vào của VAE encoder
    z_mean, _, _ = vae.encoder(dummy_input)
    latent_dim = z_mean.shape[1]

# Khởi tạo base UNET_1D với input_dim = latent_dim
unet_base = UNET_1D(
    input_dim=latent_dim,
    layer_n=32,               # có thể điều chỉnh theo nhu cầu
    kernel_size=5,
    depth=2
)

# Wrap bằng lớp hỗ trợ diffusion
diffusion_model = UNET_1D_Diffusion(latent_dim=latent_dim).to(device)

# Optimizer
diffusion_optimizer = optim.Adam(diffusion_model.parameters(), lr=1e-3)

# Huấn luyện mô hình diffusion"
train_diffusion(vae_new, diffusion_model, train_loader, diffusion_optimizer, device)

# Đánh giá độ chính xác phân loại trên z đã phục hồi
evaluate_diffusion_with_classifier(vae, diffusion_model, vae.classifier, test_loader, device)

  loss = F.mse_loss(predicted_noise, noise)
Epoch 1/20:   0%|          | 0/22 [00:00<?, ?it/s]

t_emb shape diffusion: torch.Size([32, 128])
z_t shape: torch.Size([32, 64])
x shape: torch.Size([32, 1, 64])
conbr_block t_emb shape: torch.Size([32, 64])
t_emb_proj shape: torch.Size([32, 64, 1])
torch.Size([32, 64, 1]) torch.Size([32, 64, 66])
conbr_block out shape: torch.Size([32, 64, 66])
conbr_block t_emb shape: torch.Size([32, 64])
t_emb_proj shape: torch.Size([32, 64, 1])
torch.Size([32, 64, 1]) torch.Size([32, 64, 68])
conbr_block out shape: torch.Size([32, 64, 68])
conbr_block t_emb shape: torch.Size([32, 64])
t_emb_proj shape: torch.Size([32, 64, 1])
torch.Size([32, 64, 1]) torch.Size([32, 64, 70])
conbr_block out shape: torch.Size([32, 64, 70])
e1 diffusion shape: torch.Size([32, 64, 66])
conbr_block t_emb shape: torch.Size([32, 128])
t_emb_proj shape: torch.Size([32, 128, 1])
torch.Size([32, 128, 1]) torch.Size([32, 128, 14])
conbr_block out shape: torch.Size([32, 128, 14])
lỗi 1
conbr_block t_emb shape: torch.Size([32, 128])
t_emb_proj shape: torch.Size([32, 128, 1])
torc




RuntimeError: The size of tensor a (362) must match the size of tensor b (64) at non-singleton dimension 1