In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from net.utils import get_model_memory_nolog
# Define the MLP Gaussian Decoder
class MLP_GaussianDecoder(nn.Module):
    def __init__(self, input_dim, num_gaussians, output_h, output_w):
        super().__init__()
        self.num_gaussians = num_gaussians
        self.output_h = output_h
        self.output_w = output_w
        
        # MLP to generate Gaussian parameters
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, num_gaussians * 6 *2),  # Input -> hidden layer 0.72G
            nn.SiLU(),
            nn.Linear(num_gaussians * 6 *2, num_gaussians * 6),       # Hidden -> hidden
            nn.SiLU(),
            nn.Linear(num_gaussians * 6, num_gaussians * 6)  # Hidden -> Gaussian parameters
        )
        #     nn.Linear(input_dim, 512),  # Input -> hidden layer 0.30G
        #     nn.ReLU(),
        #     nn.Linear(512, 256),       # Hidden -> hidden
        #     nn.ReLU(),
        #     nn.Linear(256, num_gaussians * 6)  # Hidden -> Gaussian parameters
        # )

    def forward(self, z):
        """
        z: Input encoded vector of shape [batch, length, channel]
        Returns: Decoded Gaussian-based image of shape [batch, output_h, output_w]
        """
        batch_size, length, channel = z.shape

        # Flatten the input for MLP (combine length and channel dimensions)
        z_flat = z.view(batch_size, -1)  # Shape: [batch, length * channel]

        # Generate Gaussian parameters
        params = self.mlp(z_flat)  # Shape: [batch, num_gaussians * 6]
        params = params.view(batch_size, self.num_gaussians, 6)  # [batch, num_gaussians, 6]

        # Separate Gaussian parameters: x, y, sigma_x, sigma_y, rho, intensity
        xc, yc, sigma_x, sigma_y, rho, intensity = torch.chunk(params, 6, dim=-1)

        # Normalize xc, yc to [-1, 1] (image coordinate range)
        xc = torch.tanh(xc)
        yc = torch.tanh(yc)

        # Ensure sigma_x, sigma_y > 0 (use softplus activation)
        sigma_x = F.softplus(sigma_x) + 1e-6
        sigma_y = F.softplus(sigma_y) + 1e-6

        # Ensure rho is within a valid range [-1, 1] (correlation coefficient)
        rho = torch.tanh(rho)

        # Intensity can be scaled to [0, 1] using sigmoid
        intensity = torch.sigmoid(intensity)

        # Create a 2D grid for the output image
        x = torch.linspace(-1, 1, self.output_w, device=z.device)
        y = torch.linspace(-1, 1, self.output_h, device=z.device)
        X, Y = torch.meshgrid(x, y, indexing="ij")  # X, Y shape: [output_w, output_h]

        # Expand X, Y to match batch and num_gaussians dimensions
        X = X.unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, output_w, output_h]
        Y = Y.unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, output_w, output_h]

        # Initialize an empty output image
        output = torch.zeros(batch_size, self.output_h, self.output_w, device=z.device)

        # Loop over each batch to generate the Gaussian-based image
        for b in range(batch_size):
            gaussian_map = torch.zeros(self.output_h, self.output_w, device=z.device)
            for i in range(self.num_gaussians):
                # Extract parameters for the current Gaussian
                x_c, y_c = xc[b, i], yc[b, i]
                sig_x, sig_y = sigma_x[b, i], sigma_y[b, i]
                p_rho = rho[b, i]
                inten = intensity[b, i]

                # Compute Gaussian function
                gaussian = inten * torch.exp(
                    -(
                        ((X - x_c) ** 2) / (2 * sig_x ** 2)
                        + ((Y - y_c) ** 2) / (2 * sig_y ** 2)
                        + p_rho * (X - x_c) * (Y - y_c) / (sig_x * sig_y)
                    )
                )
                gaussian_map += gaussian.squeeze().t()  # Add to the final map
            output[b] = gaussian_map

        return output

# Hyperparameters
batch_size = 10
length = 281
channel = 576
num_gaussians = 100  # Number of Gaussians
output_h = 360       # Height of output image
output_w = 720       # Width of output image

# Input encoded vector (randomly generated)
encoded = torch.randn(batch_size, length, channel)  # Shape: [10, 281, 576]

# Initialize the decoder
decoder = MLP_GaussianDecoder(input_dim=length * channel, num_gaussians=num_gaussians, output_h=output_h, output_w=output_w)
get_model_memory_nolog(decoder)
# Forward pass
output = decoder(encoded)  # Shape: [10, 360, 720]

# Print output shape
print("Output shape:", output.shape)  # Should be [10, 360, 720]

'''

'''

模型占用0.7276GB
Output shape: torch.Size([10, 360, 720])


''

In [5]:
print(output)

tensor([[[6.5424, 6.5797, 6.6170,  ..., 6.4938, 6.4569, 6.4200],
         [6.6159, 6.6536, 6.6913,  ..., 6.5673, 6.5299, 6.4927],
         [6.6898, 6.7279, 6.7660,  ..., 6.6412, 6.6034, 6.5657],
         ...,
         [6.3503, 6.3867, 6.4232,  ..., 6.5373, 6.5006, 6.4640],
         [6.2784, 6.3144, 6.3505,  ..., 6.4641, 6.4278, 6.3916],
         [6.2070, 6.2426, 6.2782,  ..., 6.3913, 6.3554, 6.3197]],

        [[6.3316, 6.3672, 6.4028,  ..., 6.1968, 6.1617, 6.1267],
         [6.4046, 6.4405, 6.4765,  ..., 6.2687, 6.2332, 6.1978],
         [6.4779, 6.5143, 6.5507,  ..., 6.3410, 6.3051, 6.2692],
         ...,
         [6.7817, 6.8203, 6.8590,  ..., 6.8897, 6.8510, 6.8124],
         [6.7065, 6.7446, 6.7829,  ..., 6.8140, 6.7757, 6.7376],
         [6.6317, 6.6694, 6.7073,  ..., 6.7387, 6.7009, 6.6631]],

        [[6.2473, 6.2832, 6.3192,  ..., 6.4367, 6.4003, 6.3641],
         [6.3195, 6.3558, 6.3923,  ..., 6.5099, 6.4732, 6.4365],
         [6.3921, 6.4289, 6.4657,  ..., 6.5836, 6.5464, 6.

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from net.utils import get_model_memory_nolog
# Define the MLP Gaussian Decoder
class MLP_GaussianDecoder(nn.Module):
    def __init__(self, input_dim, num_gaussians, output_h, output_w):
        super().__init__()
        self.num_gaussians = num_gaussians
        self.output_h = output_h
        self.output_w = output_w
        
        # MLP to generate Gaussian parameters
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, num_gaussians * 6 *2),  # Input -> hidden layer 0.72G
            nn.SiLU(),
            nn.Linear(num_gaussians * 6 *2, num_gaussians * 6),       # Hidden -> hidden
            nn.SiLU(),
            nn.Linear(num_gaussians * 6, num_gaussians * 6)  # Hidden -> Gaussian parameters
        )
        #     nn.Linear(input_dim, 512),  # Input -> hidden layer 0.30G
        #     nn.ReLU(),
        #     nn.Linear(512, 256),       # Hidden -> hidden
        #     nn.ReLU(),
        #     nn.Linear(256, num_gaussians * 6)  # Hidden -> Gaussian parameters
        # )

    def forward(self, z):
        """
        z: Input encoded vector of shape [batch, length, channel]
        Returns: Decoded Gaussian-based image of shape [batch, output_h, output_w]
        """
        batch_size, length, channel = z.shape

        # Flatten the input for MLP (combine length and channel dimensions)
        z_flat = z.view(batch_size, -1)  # Shape: [batch, length * channel]

        # Generate Gaussian parameters
        params = self.mlp(z_flat)  # Shape: [batch, num_gaussians * 6]
        params = params.view(batch_size, self.num_gaussians, 6)  # [batch, num_gaussians, 6]

        # Separate Gaussian parameters: x, y, sigma_x, sigma_y, rho, intensity
        xc, yc, sigma_x, sigma_y, rho, intensity = torch.chunk(params, 6, dim=-1)

        # Normalize xc, yc to [-1, 1] (image coordinate range)
        xc = torch.tanh(xc)
        yc = torch.tanh(yc)

        # Ensure sigma_x, sigma_y > 0 (use softplus activation)
        sigma_x = F.softplus(sigma_x) + 1e-6
        sigma_y = F.softplus(sigma_y) + 1e-6

        # Ensure rho is within a valid range [-1, 1] (correlation coefficient)
        rho = torch.tanh(rho)

        # Intensity can be scaled to [0, 1] using sigmoid
        intensity = torch.sigmoid(intensity)

        # Create a 2D grid for the output image
        x = torch.linspace(-1, 1, self.output_w, device=z.device)
        y = torch.linspace(-1, 1, self.output_h, device=z.device)
        X, Y = torch.meshgrid(x, y, indexing="ij")  # X, Y shape: [output_w, output_h]

        # Expand X, Y to support broadcasting across batches and gaussians
        X = X.unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, output_w, output_h]
        Y = Y.unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, output_w, output_h]

        # Expand Gaussian parameters to match grid dimensions
        xc = xc.unsqueeze(-1).unsqueeze(-1)  # [batch, num_gaussians, 1, 1]
        yc = yc.unsqueeze(-1).unsqueeze(-1)
        sigma_x = sigma_x.unsqueeze(-1).unsqueeze(-1)
        sigma_y = sigma_y.unsqueeze(-1).unsqueeze(-1)
        rho = rho.unsqueeze(-1).unsqueeze(-1)
        intensity = intensity.unsqueeze(-1).unsqueeze(-1)

        # Compute Gaussian function for all Gaussians in parallel
        X_diff = X - xc  # [batch, num_gaussians, output_w, output_h]
        Y_diff = Y - yc  # [batch, num_gaussians, output_w, output_h]

        gaussians = intensity * torch.exp(
            -(
                (X_diff ** 2) / (2 * sigma_x ** 2)
                + (Y_diff ** 2) / (2 * sigma_y ** 2)
                + rho * (X_diff * Y_diff) / (sigma_x * sigma_y)
            )
        )  # [batch, num_gaussians, output_w, output_h]

        # Sum over all Gaussians
        output = gaussians.sum(dim=1)  # Shape: [batch, output_w, output_h]

        return output

# Hyperparameters
batch_size = 10
length = 281
channel = 576
num_gaussians = 100  # Number of Gaussians
output_h = 360       # Height of output image
output_w = 720       # Width of output image

# Input encoded vector (randomly generated)
# encoded = torch.randn(batch_size, length, channel)  # Shape: [10, 281, 576]

# Initialize the decoder
decoder = MLP_GaussianDecoder(input_dim=length * channel, num_gaussians=num_gaussians, output_h=output_h, output_w=output_w)
get_model_memory_nolog(decoder)
# Forward pass
output = decoder(encoded)  # Shape: [10, 360, 720]

# Print output shape
print("Output shape:", output.shape)  # Should be [10, 360, 720]

print(output)
#怎么output不一样了。。我笑死 不过好像问题不大，只要形状对上，能训练就行

模型占用0.7276GB


NameError: name 'encoded' is not defined

In [9]:
import time

# Hyperparameters
batch_size = 10
length = 281
channel = 576
output_h = 360
output_w = 720
device=torch.device('cuda:0')
# Input encoded vector (randomly generated)
encoded = torch.randn(batch_size, length, channel)  # Shape: [10, 281, 576]

# Range of num_gaussians to test
num_gaussians_list = [300, 400, 500, 600, 1000] #1000在当前单元格或上一个单元格中执行代码时 Kernel 崩溃。 根本就用不了。。

# Results storage
results = []

for num_gaussians in num_gaussians_list:
    # Initialize the decoder with current num_gaussians
    decoder = MLP_GaussianDecoder(input_dim=length * channel, num_gaussians=num_gaussians, output_h=output_h, output_w=output_w)

    # Measure model size
    model_memory = get_model_memory_nolog(decoder)

    # Measure forward pass time
    start_time = time.time()
    output = decoder(encoded)  # Forward pass
    end_time = time.time()
    forward_time = end_time - start_time

    # Record results
    results.append((num_gaussians, model_memory, forward_time))
    print(f"num_gaussians: {num_gaussians}, Model Memory: {model_memory:.2f} GB, Forward Time: {forward_time:.4f} s")

# Print all results
print("\nResults Summary:")
for num_gaussians, model_memory, forward_time in results:
    print(f"num_gaussians: {num_gaussians}, Model Memory: {model_memory:.2f} GB, Forward Time: {forward_time:.4f} s")


模型占用2.2069GB
num_gaussians: 300, Model Memory: 2.21 GB, Forward Time: 8.2478 s
模型占用2.9586GB
num_gaussians: 400, Model Memory: 2.96 GB, Forward Time: 8.1051 s
模型占用3.7184GB
num_gaussians: 500, Model Memory: 3.72 GB, Forward Time: 11.7652 s
模型占用4.4862GB
num_gaussians: 600, Model Memory: 4.49 GB, Forward Time: 15.4920 s
模型占用7.6379GB


: 

In [2]:
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from net.utils import get_model_memory_nolog
# Define the MLP Gaussian Decoder
class MLP_GaussianDecoder(nn.Module):
    def __init__(self, input_dim, num_gaussians, output_h, output_w):
        super().__init__()
        self.num_gaussians = num_gaussians
        self.output_h = output_h
        self.output_w = output_w
        
        # MLP to generate Gaussian parameters
        self.mlp = nn.Sequential(
        #     nn.Linear(input_dim, num_gaussians * 6 *2),  # Input -> hidden layer 0.72G
        #     nn.SiLU(),
        #     nn.Linear(num_gaussians * 6 *2, num_gaussians * 6),       # Hidden -> hidden
        #     nn.SiLU(),
        #     nn.Linear(num_gaussians * 6, num_gaussians * 6)  # Hidden -> Gaussian parameters
        # )
            nn.Linear(input_dim, 512),  # Input -> hidden layer 0.30G
            nn.ReLU(),
            nn.Linear(512, 256),       # Hidden -> hidden
            nn.ReLU(),
            nn.Linear(256, num_gaussians * 6)  # Hidden -> Gaussian parameters
        )

    def forward(self, z):
        """
        z: Input encoded vector of shape [batch, length, channel]
        Returns: Decoded Gaussian-based image of shape [batch, output_h, output_w]
        """
        batch_size, length, channel = z.shape

        # Flatten the input for MLP (combine length and channel dimensions)
        z_flat = z.view(batch_size, -1)  # Shape: [batch, length * channel]

        # Generate Gaussian parameters
        params = self.mlp(z_flat)  # Shape: [batch, num_gaussians * 6]
        params = params.view(batch_size, self.num_gaussians, 6)  # [batch, num_gaussians, 6]

        # Separate Gaussian parameters: x, y, sigma_x, sigma_y, rho, intensity
        xc, yc, sigma_x, sigma_y, rho, intensity = torch.chunk(params, 6, dim=-1)

        # Normalize xc, yc to [-1, 1] (image coordinate range)
        xc = torch.tanh(xc)
        yc = torch.tanh(yc)

        # Ensure sigma_x, sigma_y > 0 (use softplus activation)
        sigma_x = F.softplus(sigma_x) + 1e-6
        sigma_y = F.softplus(sigma_y) + 1e-6

        # Ensure rho is within a valid range [-1, 1] (correlation coefficient)
        rho = torch.tanh(rho)

        # Intensity can be scaled to [0, 1] using sigmoid
        intensity = torch.sigmoid(intensity)

        # Create a 2D grid for the output image
        x = torch.linspace(-1, 1, self.output_w, device=z.device)
        y = torch.linspace(-1, 1, self.output_h, device=z.device)
        X, Y = torch.meshgrid(x, y, indexing="ij")  # X, Y shape: [output_w, output_h]

        # Expand X, Y to support broadcasting across batches and gaussians
        X = X.unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, output_w, output_h]
        Y = Y.unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, output_w, output_h]

        # Expand Gaussian parameters to match grid dimensions
        xc = xc.unsqueeze(-1).unsqueeze(-1)  # [batch, num_gaussians, 1, 1]
        yc = yc.unsqueeze(-1).unsqueeze(-1)
        sigma_x = sigma_x.unsqueeze(-1).unsqueeze(-1)
        sigma_y = sigma_y.unsqueeze(-1).unsqueeze(-1)
        rho = rho.unsqueeze(-1).unsqueeze(-1)
        intensity = intensity.unsqueeze(-1).unsqueeze(-1)

        # Compute Gaussian function for all Gaussians in parallel
        X_diff = X - xc  # [batch, num_gaussians, output_w, output_h]
        Y_diff = Y - yc  # [batch, num_gaussians, output_w, output_h]

        gaussians = intensity * torch.exp(
            -(
                (X_diff ** 2) / (2 * sigma_x ** 2)
                + (Y_diff ** 2) / (2 * sigma_y ** 2)
                + rho * (X_diff * Y_diff) / (sigma_x * sigma_y)
            )
        )  # [batch, num_gaussians, output_w, output_h]

        # Sum over all Gaussians
        output = gaussians.sum(dim=1)  # Shape: [batch, output_w, output_h]

        return output
    
# Hyperparameters
batch_size = 10
length = 281
channel = 576
output_h = 360
output_w = 720
device=torch.device('cuda:0')
# Input encoded vector (randomly generated)
encoded = torch.randn(batch_size, length, channel).to(device)  # Shape: [10, 281, 576]

# Range of num_gaussians to test
num_gaussians_list = [10, 50, 100, 200, 250] #1000在当前单元格或上一个单元格中执行代码时 Kernel 崩溃。 根本就用不了。。

# Results storage
results = []

for num_gaussians in num_gaussians_list:
    # Initialize the decoder with current num_gaussians
    decoder = MLP_GaussianDecoder(input_dim=length * channel, num_gaussians=num_gaussians, output_h=output_h, output_w=output_w).to(device)

    # Measure model size
    model_memory = get_model_memory_nolog(decoder)

    # Measure forward pass time
    start_time = time.time()
    output = decoder(encoded)  # Forward pass
    end_time = time.time()
    forward_time = end_time - start_time

    # Record results
    results.append((num_gaussians, model_memory, forward_time))
    print(f"num_gaussians: {num_gaussians}, Model Memory: {model_memory:.2f} GB, Forward Time: {forward_time:.4f} s")

    del decoder, output  # Delete references to the model and output
    torch.cuda.empty_cache()  # Clear cached memor

# Print all results
print("\nResults Summary:")
for num_gaussians, model_memory, forward_time in results:
    print(f"num_gaussians: {num_gaussians}, Model Memory: {model_memory:.2f} GB, Forward Time: {forward_time:.4f} s")


模型占用0.3093GB
num_gaussians: 10, Model Memory: 0.31 GB, Forward Time: 0.0017 s
模型占用0.3095GB
num_gaussians: 50, Model Memory: 0.31 GB, Forward Time: 0.0063 s
模型占用0.3098GB
num_gaussians: 100, Model Memory: 0.31 GB, Forward Time: 0.0035 s
模型占用0.3104GB


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.93 GiB. GPU 