In [153]:
import wandb
wandb.login()

True

In [154]:
import numpy as np
import pandas as pd
import torch.nn as nn
import h5py
import pickle
import torch
import time
import yaml
import copy
import math
import sys
import time
import os
import torch.nn.functional as F
import torchvision
import transformers
from torch import optim
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader

In [155]:
import yaml
config_wbd = {}
with open('/home/quang/Documents/XAI_env-main/Code/config.yml', 'r') as f:
    config_wdb = yaml.safe_load(f)

In [156]:
config_wdb

{'method': 'bayes',
 'metric': {'goal': 'minimize', 'name': 'test_loss_avg'},
 'parameters': {'batch_size': {'values': [128]},
  'd_model': {'value': 16},
  'dff': {'values': [128, 256]},
  'dropout': {'distribution': 'uniform', 'min': 0.05, 'max': 0.15},
  'l_win': {'distribution': 'int_uniform', 'min': 120, 'max': 125},
  'lr': {'distribution': 'log_uniform', 'min': -6.5, 'max': -5.5},
  'n_epochs': {'distribution': 'int_uniform', 'min': 60, 'max': 100},
  'n_head': {'value': 4},
  'num_layers': {'distribution': 'int_uniform', 'min': 1, 'max': 3},
  'weight_decay': {'distribution': 'log_uniform', 'min': -6, 'max': -4},
  'noise_level': {'distribution': 'uniform', 'min': 0.01, 'max': 0.05},
  'embed_dim': {'value': 16},
  'result_dir': {'value': '/home/quang/Documents/XAI_env-main/results/'},
  'data_dir': {'value': '/home/quang/Documents/XAI_env-main/data/processed/'}}}

In [157]:
class CustomDataset(Dataset):
    def __init__(self, config, x_path, y_path):
        super().__init__()
        self.config = config
        self.x_path = x_path
        self.y_path = y_path
        self.load_dataset()

    def __len__(self):
        return self.data_.shape[0]

    def __getitem__(self, idx):
        input_tensor = torch.from_numpy(self.data_[idx]).float()
        label_tensor = torch.from_numpy(np.array(self.labels[idx])).float()
        return {'input': input_tensor, 'labels': label_tensor}
    
    def load_dataset(self):
        x_data = {}
        y_data = {}
        x_path_ = os.path.join(self.config['data_dir'], self.x_path)
        y_path_ = os.path.join(self.config['data_dir'], self.y_path)

        with h5py.File(x_path_, 'r') as x_file:
            x_data_key = list(x_file.keys())[0]
            x_data['data'] = np.array(x_file[x_data_key])

        with h5py.File(y_path_, 'r') as y_file:
            y_data_key = list(y_file.keys())[0]
            y_data['label'] = np.array(y_file[y_data_key])

        self.data_ = x_data['data'].transpose(1,0)
        self.data_ = np.expand_dims(self.data_, axis=0)
        self.labels = y_data['label']

        
        
        self.config['data_shape'] = self.data_.shape[1:]

    def getshape(self):
            return self.config['data_shape']


In [158]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_dim,embed_dim, noise_level):
        super(Autoencoder, self).__init__()
        self.input_size, self.hidden_dim, self.noise_level = input_size, embed_dim,noise_level
        self.embed_dim = embed_dim
        self.fc1 = nn.Linear(self.input_size, self.hidden_dim)
        self.fc2 = nn.Linear(self.hidden_dim, self.input_size)
        
    def encoder(self,x):
        x = self.fc1(x)
        h1 = F.relu(x)
        return h1
    
    def mask(self,x):
        corrupted_x = x + self.noise_level + torch.randn_like(x)   # randn_like  Initializes a tensor where all the elements are sampled from a normal distribution.
        return corrupted_x
    
    def decoder(self, x):
        h2 = self.fc2(x)
        return h2
    
    def forward (self, x):
        out = self.mask(x) # Adding noise to feed the network
        encoder = self.encoder(out)
        decoder = self.decoder(encoder)
        return encoder, decoder 
    
    ## Transformer 
    ### Positional encoding
class PositionalEncoding(nn.Module):
        def __init__(self,d_model, dropout=0.0,max_len=16):
            super(PositionalEncoding, self).__init__()
            pe = torch.zeros(max_len,d_model)
            position = torch.arange(0,max_len, dtype = torch.float).unsqueeze(1)
            
            div_term = torch.exp(torch.arange(0,d_model,2).float()*(-math.log(10000.0) / d_model))
            
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)

            pe = pe.unsqueeze(0).transpose(0, 1)

            self.register_buffer('pe', pe)
            
        def forward(self, x):
            x = x + self.pe[:x.size(1), :].squeeze(1)
            return x
        
class Net(nn.Module):
        def __init__(self,feature_size,num_layers,n_head,dropout,noise_level,embed_dim):
            super(Net,self).__init__()
            self.embed_dim = embed_dim
            self.hidden_dim = 4*embed_dim
            self.auto_hidden = int(feature_size / 2)
            input_size = self.auto_hidden
            self.pos = PositionalEncoding(d_model=input_size, max_len=input_size)
            encoder_layers = nn.TransformerEncoderLayer(d_model=input_size, nhead=n_head, dim_feedforward=self.hidden_dim, dropout=dropout)
            self.cell = nn.TransformerEncoder(encoder_layers,num_layers=num_layers)
            self.linear = nn.Linear(input_size,1)
            self.autoencoder = Autoencoder(input_size = feature_size, hidden_dim = self.auto_hidden,embed_dim = embed_dim, noise_level=noise_level)
              
        def forward(self,x):
            batch_size, feature_num, feature_size = x.shape
            encode, decode = self.autoencoder(x.view(batch_size,-1).float()) # Equals batch_size * seq_len
            out = encode.reshape(batch_size,-1,self.auto_hidden)
            out = self.pos(out)
            out = out.reshape(1,batch_size,-1)  #(1,batch_size,feature_size)
            out = self.cell(out)
            out = out.reshape(batch_size,-1)
            out = self.linear(out)
            
            return out,decode
        

In [159]:
#Trainer 
class ModelTrainer():
    def __init__(self, model, train_data, criterion, optimizer, device, config):
        self.model = model
        self.train_data = train_data
        self.device = device
        self.config = config
        self.train_loss_list = list()
        self.min_loss = float('inf')
        self.best_model = None
        self.best_optimizer = None
        self.optimizer = optimizer
        self.criterion = criterion

    def train_epoch(self, epoch):
        train_loss = 0.0
        self.model.train()
        for x, rul in self.train_data:
            self.model.zero_grad()
            out = self.model(x.to(self.device).float())
            loss = torch.sqrt(self.criterion(out.float(), rul.to(self.device).float())) # RMSE
            loss.backward()
            self.optimizer.step()
            train_loss += loss

        train_loss = train_loss / len(self.train_data)
        wandb.log({"train loss": train_loss})
        self.train_loss_list.append(train_loss)

        if train_loss < self.min_loss:
            self.min_loss = train_loss
            self.best_model = deepcopy(self.model.state_dict())
            self.best_optimizer = deepcopy(self.optimizer.state_dict())
            self.best_epoch_in_round = epoch
    # Set up the epoch for trainning process
    def train(self):
        self.model.to(self.device)

        for epoch in range(1, self.config['n_epochs'] + 1):
            self.train_epoch(epoch)
            wandb.log({"epoch": epoch})

        self.config['train_loss_list'] = self.train_loss_list

    def update_config(self):
        return self.config




In [160]:
sweep_id = wandb.sweep(sweep=config_wdb, project='RUL_Bearing')



Create sweep with ID: ijn5cszy
Sweep URL: https://wandb.ai/zhukov01/RUL_Bearing/sweeps/ijn5cszy


In [161]:
#Train
torch.manual_seed(42)
def training():
    with wandb.init(config = config_wbd):
        config = wandb.config
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        train_data = CustomDataset(config, '/home/quang/Documents/XAI_env-main/data/processed/IMS/x_train.hdf5','/home/quang/Documents/XAI_env-main/data/processed/IMS/y_train.hdf5')
        train_loader = DataLoader(train_data,
                                  batch_size=128,
                                  shuffle=True)
        shape = train_data.getshape()
        model = Net(feature_size = shape[1],
                    noise_level = config['noise_level'],   
                    num_layers=config['num_layers'],
                    embed_dim = config['embed_dim'],
                    n_head = config['n_head'],
                    dropout=config['dropout'])

        optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config['weight_decay'])
        criterion = nn.MSELoss()
        trainer = ModelTrainer(model = model, 
                               train_data = train_loader, 
                               criterion = criterion , 
                               optimizer = optimizer, 
                               device = device, 
                               config = config)
        trainer.train()

        test_data = CustomDataset(config,'/home/quang/Documents/XAI_env-main/data/processed/IMS/x_test.hdf5','/home/quang/Documents/XAI_env-main/data/processed/IMS/y_test.hdf5')
        test_loader = DataLoader(test_data,
                                  batch_size=128,
                                  shuffle=True)
        model.to(device)
        test_loss = 0.0
        test_loss_list = list()
        pred_list = list()
        with torch.no_grad():
            for x, rul in test_loader:
                out = model(x.to(device).float())
                loss = torch.sqrt(criterion(out.float(), rul.to(device).float()))
                test_loss += loss
                test_loss_list.append(loss)
                pred_list.append(out.float())

        test_loss_avg = test_loss / len(test_loader)
        config['truth_list'] = truth_list
        config['pred_list'] = pred_list
        config['test_loss_avg'] = test_loss_avg
        config['test_loss_list_per_id'] = test_loss_list
        wandb.log({"test_loss_avg": test_loss_avg})





        val_data = CustomDataset(config,'/home/quang/Documents/XAI_env-main/data/processed/IMS/x_val.hdf5','/home/quang/Documents/XAI_env-main/data/processed/IMS/y_val.hdf5' )
        val_loader = DataLoader(val_data,
                                 batch_size=128,
                                 shuffle=True)
        model.to(device)
        val_loss = 0.0
        val_loss_list = list()
        with torch.no_grad():
            for x, rul in val_loader:
                out = model(x.to(device).float())
                loss = torch.sqrt(criterion(out.float(), rul.to(device).float()))
                val_loss += loss
                val_loss_list.append(loss)
        val_loss_avg = val_loss / len(test_loader)
        config['val_loss_avg'] = test_loss_avg
        config['val_loss_list_per_id'] = test_loss_list
        wandb.log({"val_loss_avg": test_loss_avg})

In [162]:
!date
wandb.agent(sweep_id, function=training ,count = 200)

Thứ hai, 12 Tháng 6 năm 2023 17:11:30 +07


[34m[1mwandb[0m: Agent Starting Run: jjrv7l4a with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	d_model: 16
[34m[1mwandb[0m: 	data_dir: /home/quang/Documents/XAI_env-main/data/processed/
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.0923934647328226
[34m[1mwandb[0m: 	embed_dim: 16
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.0034024401507932464
[34m[1mwandb[0m: 	n_epochs: 72
[34m[1mwandb[0m: 	n_head: 4
[34m[1mwandb[0m: 	noise_level: 0.013989830032011836
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	result_dir: /home/quang/Documents/XAI_env-main/results/
[34m[1mwandb[0m: 	weight_decay: 0.017274217276255673


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.526558…

Run jjrv7l4a errored: AttributeError("'str' object has no attribute 'to'")
[34m[1mwandb[0m: [32m[41mERROR[0m Run jjrv7l4a errored: AttributeError("'str' object has no attribute 'to'")
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
