# Seismic Transformer V4.0 train book

*Author: Jason Jiang (Xunfun Lee)*

*Date: 2024.02.01*

In [1]:
from PythonScripts.utility import SetDevice

device = SetDevice()

GPU: cuda
CUDA device numbers:  1


## Data preparation

From SeT-4 we are using totally different data and data format. The data is stored in `h5` format, which is a binary format. And we have 132GB data in total, which is too large to be loaded into memory. So we need to load the data in batches.

In [2]:
from PythonScripts.data_preparation import DynamicDatasetV1
import torch

BATCH_SIZE = 256

In [3]:
# initialize the dataset
gm_file_path = 'D:/SesimicTransformerData/All_GMs/GMs_knet_3474_AF_57.h5'
building_files_dir = 'D:/SeismicTransformerData/SeT-4.0'

dataset = DynamicDatasetV1(gm_file_path=gm_file_path, 
                           building_files_dir=building_files_dir, 
                           device=device)

In [4]:
def custom_collate(batch):
    gm_data_list, building_attributes_list, acc_floor_response_list, blg_damage_state_list = zip(*batch)

    # Stack ground motion data, floor response data, and damage state data
    gm_data_batch = torch.stack(gm_data_list)
    acc_floor_response_batch = torch.stack(acc_floor_response_list)
    blg_damage_state_batch = torch.stack(blg_damage_state_list)

    # Combine building attributes into a batched format
    batched_building_attributes = {}
    for key in building_attributes_list[0].keys():
        key_tensor_list = [d[key] for d in building_attributes_list]
        # Since batch size is 1, we can directly extract the single tensor
        # If batch size were greater than 1, we would use torch.stack(key_tensor_list)
        batched_building_attributes[key] = key_tensor_list[0]

    return gm_data_batch, batched_building_attributes, acc_floor_response_batch, blg_damage_state_batch

# Now, use the custom collate function with the DataLoader
# dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=custom_collate)

In [5]:
import torch
from torch.utils.data import DataLoader, random_split

# 计算训练集和测试集的大小
train_size = int(0.8 * len(dataset))
validation_size = len(dataset) - train_size

# 使用random_split来随机分割数据集
train_dataset, validation_dataset = random_split(dataset, [train_size, validation_size])

# 创建两个DataLoader实例
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=custom_collate)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=custom_collate)

In [6]:
# import matplotlib.pyplot as plt

# def test_dataloader(dataloader):
#     # Get a single batch from the dataloader
#     for gm_data, building_attributes, acc_floor_response, blg_damage_state in dataloader:
#         # Loop through all items in the batch
#         for i in range(len(gm_data)):
#             gm_data_sample = gm_data[i].squeeze()  # Remove unneeded dimensions
#             acc_floor_response_sample = acc_floor_response[i].squeeze()
#             blg_damage_state_sample = blg_damage_state[i].item()

#             # Plot ground motion data
#             plt.figure(figsize=(12, 4))
#             plt.plot(gm_data_sample.cpu().numpy())
#             plt.title('Ground Motion Data')
#             plt.xlabel('Time Steps')
#             plt.ylabel('Acceleration')
#             plt.show()

#             # Plot top floor acceleration response
#             plt.figure(figsize=(12, 4))
#             plt.plot(acc_floor_response_sample.cpu().numpy())
#             plt.title('Top Floor Acceleration Response ')
#             plt.xlabel('Time Steps')
#             plt.ylabel('Acceleration')
#             plt.show()

#             # Print building attributes
#             print('Building Attributes:', building_attributes)

#             # Print building damage state
#             print('Building Damage State:', blg_damage_state_sample)

#         # For demonstration, we only process the first batch
#         break

# # Create DataLoader with custom collate function if needed
# # dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)

# # Test the dataloader
# test_dataloader(dataloader)

## Build SeT-4

In [7]:
from PythonScripts.transformer import SeismicTransformerV4

In [8]:
model = SeismicTransformerV4(len_gm=3000,
                              patch_size=250,
                              hidden_size=768,
                              num_heads=12,
                              num_layers=12,
                              dropout_attn=0.1,
                              dropout_mlp=0.1,
                              dropout_embed=0.1,
                              num_of_classes=5).to(device)

### * test data

In [9]:
# key_padding_mask must be match the sequence = 15
key_padding_mask = torch.zeros(BATCH_SIZE, 15, dtype=torch.bool).to(device)
attn_mask = torch.triu(torch.ones(12, 12), diagonal=1).bool().to(device)

In [10]:
gm_data, building_attributes, acc_floor_response, blg_damage_state = next(iter(train_dataloader))

gm_data = gm_data.to(device)
acc_floor_response = acc_floor_response.to(device)
blg_damage_state = blg_damage_state.to(device)

gm_data.shape, building_attributes, acc_floor_response.shape, blg_damage_state.shape

(torch.Size([256, 3000, 1]),
 {'IM': tensor([2], device='cuda:0'),
  'height': tensor([15.], device='cuda:0'),
  'stories': tensor([5], device='cuda:0'),
  'struct_type': tensor([0], device='cuda:0')},
 torch.Size([256, 3000, 1]),
 torch.Size([256, 1]))

In [11]:
damage_state, dynamic_response = model(encoder_input=gm_data, 
                                        struct_info=building_attributes,
                                        decoder_input=acc_floor_response, 
                                        key_padding_mask=key_padding_mask, 
                                        attn_mask=attn_mask)

In [12]:
damage_state.shape, dynamic_response.shape

(torch.Size([256, 5]), torch.Size([256, 3000, 1]))

## Train the SeT-4 model

In [13]:
from torch.nn import CrossEntropyLoss, MSELoss
from transformers import get_linear_schedule_with_warmup

loss_fn_classification = CrossEntropyLoss()
loss_fn_regression = MSELoss()

# tring adamW in SeT-3
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.0)
# optimizer = torch.optim.Adam(params=SeismicTransformerV3_instance.parameters(), 
#                              lr=0.001,
#                              betas=(0.9, 0.999),
#                              weight_decay=0.0)

NUM_EPOCH = 5

num_training_steps = (198018*30 / BATCH_SIZE) * NUM_EPOCH         # total steps = len(train_dataset) / batch_size * epochs
num_warmup_steps = num_training_steps * 0.2  # warmup_ratio usually is 20% of the total steps

lr_scheduler_warmup = get_linear_schedule_with_warmup(optimizer,
                                                      num_warmup_steps=num_warmup_steps,
                                                      num_training_steps=num_training_steps)

# Set up the learning rate scheduler for decay, work inside train()
lr_scheduler_decay = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                                mode='max', # set the min or max lr
                                                                patience=10, # how many epoch loss don't change
                                                                factor=0.1, # new_lr = old_lr * factor
                                                                threshold=0.1, # loss change
                                                                threshold_mode='rel', # compare mode
                                                                cooldown=10, # how many epoch to wait
                                                                min_lr=1e-7, # minimun of lr
                                                                verbose=True)     # print something if useful

In [14]:
from PythonScripts.utility import CreateOutputFolder, CreateLogFileV3, CountNumOfTraining

CLASSIFICATION_WEIGHT = 0.2
HIDDEN_SIZE = 768
NUM_HEADS = 24
NUM_LAYERS = 24

In [15]:
# Count the number of csv file "training_results.csv"
num_of_training = CountNumOfTraining()

# Create output folder
save_dir = CreateOutputFolder(num_of_training=num_of_training+1,
                              hidden_size=HIDDEN_SIZE,
                              num_of_layer=NUM_LAYERS,
                              num_of_head=NUM_HEADS,
                              num_of_epoch=NUM_EPOCH)

# Create log file, a csv file                  
log_filename = CreateLogFileV3(save_dir=save_dir)

In [16]:
from PythonScripts.train import train_set4
import time

# Caculate the start time of the training
strat_time = time.time()

results = train_set4(model=model,
                    train_loader=train_dataloader,
                    val_loader=validation_dataloader,
                    loss_fn_classification=loss_fn_classification,
                    loss_fn_regression=loss_fn_regression,
                    loss_fn_weight_classification=CLASSIFICATION_WEIGHT,
                    optimizer=optimizer,
                    lr_scheduler_warmup=lr_scheduler_warmup,
                    lr_scheduler_decay=lr_scheduler_decay,
                    num_warmup_steps=num_warmup_steps,
                    num_epochs=NUM_EPOCH,
                    device=device,
                    log_filename=log_filename)

end_time = time.time()
total_time = end_time - strat_time
print(f"Training time: {total_time:.3f}秒")

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 000 : teacher forcing ratio = 1.0


KeyboardInterrupt: 