In [1]:
def estimate_gpu_memory(batch_size, input_shape=(224, 224, 3), dtype_bytes=4,
                        model_param_MB=98, optimizer_MB=294, activation_factor=5):
    """
    Estimate GPU memory usage for CNN training (e.g., ResNet-50).

    Parameters:
    - batch_size: Number of input images
    - input_shape: Tuple of (height, width, channels)
    - dtype_bytes: Bytes per pixel value (4 for float32, 2 for float16)
    - model_param_MB: Size of model parameters in MB
    - optimizer_MB: Memory used by optimizer states in MB
    - activation_factor: Multiplier for activation and gradient memory

    Returns:
    - Input memory (MB)
    - Total estimated memory (MB)
    - Total estimated memory (GB)
    """
    height, width, channels = input_shape
    input_MB = batch_size * height * width * channels * dtype_bytes / (1024**2)
    activation_MB = activation_factor * input_MB
    total_MB = input_MB + activation_MB + model_param_MB + optimizer_MB
    total_GB = total_MB / 1024
    return input_MB, total_MB, total_GB


In [3]:
estimate_gpu_memory(batch_size=512)

(294.0, 2156.0, 2.10546875)

In [7]:
def estimate_lstm_gpu_memory(batch_size, seq_length, input_dim, hidden_dim, num_layers,
                              dtype_bytes=4, bidirectional=False, model_param_MB_fixed=5):
    """
    Estimate GPU memory usage for an LSTM model on time series data.

    Parameters:
    - batch_size: Number of sequences in a batch
    - seq_length: Number of time steps per sequence
    - input_dim: Number of features per time step
    - hidden_dim: Number of hidden units in the LSTM
    - num_layers: Number of LSTM layers
    - dtype_bytes: 4 for float32, 2 for float16
    - bidirectional: True for bidirectional LSTM
    - model_param_MB_fixed: Estimate of model parameters in MB

    Returns:
    - Input memory (MB)
    - Activation memory (MB)
    - Total memory (MB)
    - Total memory (GB)
    """
    # Input memory
    input_MB = batch_size * seq_length * input_dim * dtype_bytes / (1024 ** 2)

    # Activation memory
    direction_multiplier = 2 if bidirectional else 1
    activation_MB = batch_size * seq_length * hidden_dim * direction_multiplier * num_layers * dtype_bytes / (1024 ** 2)

    # Total memory
    total_MB = input_MB + activation_MB + model_param_MB_fixed
    total_GB = total_MB / 1024

    return input_MB, activation_MB, total_MB, total_GB

# Example usage
if __name__ == "__main__":
    input_MB, act_MB, total_MB, total_GB = estimate_lstm_gpu_memory(
        batch_size=64,
        seq_length=100,
        input_dim=3,
        hidden_dim=128,
        num_layers=2,
        bidirectional=False
    )
    print(f"Input Memory:       {input_MB:.2f} MB")
    print(f"Activation Memory:  {act_MB:.2f} MB")
    print(f"Total Memory:       {total_MB:.2f} MB ({total_GB:.2f} GB)")

estimate_lstm_gpu_memory(500)

SyntaxError: invalid syntax (4053244093.py, line 49)