In [1]:
import torch
import sys
sys.path.append('/remote/tychodata/ftairli/work/Projects/TraceHexConv/Models/')
import torch.optim as optim
import torch.nn as nn
import torch.utils.data as data
import gc

from torch.profiler import profile, record_function, ProfilerActivity

In [2]:
N = 100
L = 120
C = 3
H = 11
W = 11
d_main = torch.rand((N,L,C,H,W),dtype=torch.float32,device='cuda')
# d_main = d_main.permute(0, 3, 4, 1, 2).reshape(N, H * W, L, C).reshape(N * H * W, L, C)

In [3]:

class Trace_Block(nn.Module):
    def __init__(self):
        super(Trace_Block, self).__init__()

        self.bi_lstm = nn.LSTM(input_size=3, 
                                hidden_size=10, 
                                num_layers=2, 
                                batch_first=True, 
                                bidirectional=True)
        
        self.lstm = nn.LSTM(input_size=20, # 2 for bidirection
                            hidden_size=10,
                            num_layers=2,
                            batch_first=True)

    def forward(self, x):
        # Read parameters of input
        # N, L, C, H, W = x.shape
        # N = 10
        # L = 120
        # C = 3
        # H = 11
        # W = 11

        # # Flatten spatial dimensions and transpose to (N, H*W, L, C) and Reshape again to (N*H*W, L, C)
        x = x.permute(0, 3, 4, 1, 2).reshape(N, H * W, L, C).reshape(N * H * W, L, C)

        

        # Now you can process all traces in parallel
        out, _ = self.bi_lstm(x)  # Shape = N*H*W, L, 2*hidden_size
        out, _ = self.lstm(out)   # Shape = N*H*W, L, hidden_size
        out = out[:, -1, :]       # Shape = N*H*W, hidden_size

        # Finally, reshape and permute the output to the desired shape
        out = out.reshape(N, H, W, -1).permute(0, 3, 1, 2)  # Shape = N, hidden_size, H, W

        return out


In [4]:
class Recurrent_Block(nn.Module):
    def __init__(self, input_dim=3, hidden_dim=10, num_layers=1, dropout_rate=0.5, num_features=10):
        super(Recurrent_Block, self).__init__()

        # Bidirectional LSTM layer
        self.bidirectional_lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout_rate, bidirectional=True)
        
        # LSTM layers
        self.lstm = nn.LSTM(hidden_dim*2, hidden_dim, num_layers, batch_first=True, dropout=dropout_rate)

        # Linear layer to transform output to desired number of features
        self.fc = nn.Linear(hidden_dim, num_features)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        for name, param in self.bidirectional_lstm.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0.0)
            elif 'weight' in name:
                nn.init.xavier_normal_(param)

        for name, param in self.lstm.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0.0)
            elif 'weight' in name:
                nn.init.xavier_normal_(param)

        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0.0)

    def forward(self, x):
        # input shape: (batch_size, sequence_length, num_channels, width, height)

        batch_size, sequence_length, num_channels, width, height = x.shape

        # rearrange input to shape: (batch_size*width*height, sequence_length, num_channels)
        x = x.permute(0, 3, 4, 1, 2).contiguous().view(-1, sequence_length, num_channels)

        # pass data through Bidirectional LSTM layer
        bidir_lstm_out, _ = self.bidirectional_lstm(x)  # output shape: (batch_size*width*height, sequence_length, hidden_dim*2)

        # pass data through LSTM layers
        lstm_out, _ = self.lstm(bidir_lstm_out)  # output shape: (batch_size*width*height, sequence_length, hidden_dim)

        # apply linear layer to every time step
        features = self.fc(lstm_out[:, -1, :])  # output shape: (batch_size*width*height, num_features)

        # reshape features to original width and height, shape: (batch_size, height, width, num_features)
        features = features.view(batch_size, -1, width, height)

        return features


In [5]:
class Transformer_Block(nn.Module):
    def __init__(self, d_model=360, nhead=4, num_layers=1, dim_feedforward=512, dropout=0.1,num_features=10):
        super(Transformer_Block, self).__init__()

        # Transformer layers
        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)

        # Linear layer to transform output to desired number of features
        self.fc = nn.Linear(d_model,num_features)
        
    def forward(self, x):
        # input shape: (batch_size, sequence_length, num_channels, width, height)

        batch_size, sequence_length, num_channels, width, height = x.shape

        # rearrange input to shape: (batch_size*width*height, sequence_length*num_channels)
        x = x.permute(0, 3, 4, 1, 2).contiguous().view(-1, sequence_length * num_channels)

        # pass data through Transformer layers
        transformer_out = self.transformer_encoder(x)  # output shape: (batch_size*width*height, sequence_length, d_model)

        # apply linear layer to every time step
        features = self.fc(transformer_out)  # output shape: (batch_size*width*height, sequence_length, d_model)

        # reshape features to original width and height, and permute dimensions
        # final shape: (batch_size, sequence_length*d_model, height, width)
        features = features.view(batch_size, width, height, -1).permute(0, 3, 1, 2)

        return features


In [6]:
model = Transformer_Block().cuda()

In [7]:
model.train()
with profile(activities=[ ProfilerActivity.CUDA],profile_memory=True, record_shapes=True) as prof:
    
    predictions = model(d_main)
    torch.cuda.empty_cache()


strings = ((prof.key_averages().table(sort_by="self_cuda_memory_usage", row_limit=10))).split('\n')
len1 = len('-------------------------------------------------------  ')
len2 = len('-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------')
len3 = len('')
for s in strings:
    print(s[:len1],end='')
    print(s[len2:])

STAGE:2023-07-05 22:13:09 22753:22753 ActivityProfilerController.cpp:311] Completed Stage: Warm Up


-------------------------------------------------------    ------------  ------------  ------------  
                                                   Name  m      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------    ------------  ------------  ------------  
                                               [memory]         5.12 Gb       5.12 Gb            59  
                                       cudaLaunchKernel             0 b           0 b            23  
void at::native::elementwise_kernel<128, 2, void at:...             0 b           0 b             2  
                                  cudaStreamIsCapturing             0 b           0 b            14  
                                             cudaMalloc             0 b           0 b            13  
                                               cudaFree             0 b           0 b             2  
                                 cudaDeviceGetAttribute             0 b         

STAGE:2023-07-05 22:13:09 22753:22753 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-07-05 22:13:09 22753:22753 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [8]:
predictions.shape

torch.Size([100, 10, 11, 11])