```
This file can roughly calculate the expected memory consumption of PIConGPU per GPU if nothing moves.

Copyright 2017-2018 PIConGPU contributors
Authors: Marco Garten
License: LGPLv3+
```

# Memory requirement calculator for PIConGPU <a class="tocSkip">

## Imports

In [2]:
import h5py as h5
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LogNorm
%matplotlib inline
import sys
import os
from scipy import constants as sc

## Override Matplotlib Defaults

In [3]:
# OVERWRITE DEFAULT PLOTTING PARAMETERS
params = {
    'font.size' : 20,
    'lines.linewidth' : 3,
    'legend.fontsize' : 20,
    'legend.frameon' : False,
    'legend.numpoints': 1,
    'xtick.labelsize' : 20,
    'ytick.labelsize' : 20,
    'figure.figsize': [12,8],
    'axes.labelsize' : 20
}
mpl.rcParams.update(params)

# Implementation

## Field Memory

In [4]:
def mem_req_by_fields(Lx,Ly,Lz,FieldTMPSlots = 1,particle_shape_order=2,sim_dim=3):
    """ Memory reserved for fields on each GPU
    
        Returns:
            req_mem required memory {unit: bytes}
    """
    # guard size in super cells in x, y, z
    guard_size_SC = np.array([1, 1, 1])
    # super cell size in cells in x, y, z
    SC_size = np.array([8,8,4])
    
    pso = particle_shape_order
    
    if sim_dim == 2:
        local_cells =     (Lx + SC_size[0] * 2 * guard_size_SC[0]) \
                        * (Ly + SC_size[1] * 2 * guard_size_SC[1])
    
        # cells around core-border region due to particle shape
        double_buffer_cells = (Lx + pso) * (Ly + pso) \
                            -  Lx        *  Ly
    elif sim_dim == 3: 
        local_cells =     (Lx + SC_size[0] * 2 * guard_size_SC[0]) \
                        * (Ly + SC_size[1] * 2 * guard_size_SC[1]) \
                        * (Lz + SC_size[2] * 2 * guard_size_SC[2])
    
        # cells around core-border region due to particle shape
        double_buffer_cells = (Lx + 2 * pso) * (Ly + 2 * pso) * (Lz + 2 * pso) \
                            -  Lx        *  Ly        *  Lz
    else:
        raise ValueError("PIConGPU only runs in either 2D or 3D: ",sim_dim," =/= {2, 3}")
    
    # size of a data entry in bytes
    data_size = np.float32().itemsize
    # number of fields: 3 * 3 = x,y,z for E,B,J
    num_fields = 3 * 3 + FieldTMPSlots
    # double buffer memory
    double_buffer_mem = double_buffer_cells * num_fields * data_size
    req_mem = data_size * num_fields * local_cells + double_buffer_mem
    return req_mem

## Particles

In [5]:
def mem_req_by_particles(
    Lx,
    Ly,
    Lz,
    num_additional_attributes = 0,
    particles_per_cell = 2
):
    """ Memory reserved for all particles of a species on a GPU.
        We currently neglect the constant species memory.
        
        Params:
        
            num_additional_attributes : number of additional attributes like e.g. `boundElectrons`
            
        Returns:
            req_mem required memory {unit: bytes} per GPU and species
    """
    # memory required by the standard particle attributes
    standard_attribute_mem = np.array([
        3 * 4, # momentum
        3 * 4, # position
        1 * 8, # multimask
        1 * 8, # cell index in supercell {lcellId_t}
        1 * 8  # weighting
    ])
    
    additional_mem = num_additional_attributes * 4 # we assume 4 bytes here - check if that's really the case
    
    local_cells = Lx * Ly * Lz
    
    req_mem = local_cells * (np.sum(standard_attribute_mem) + additional_mem) * particles_per_cell
    
    return req_mem
    

## RNG states

In [6]:
def mem_req_by_rng(Lx,Ly,Lz):
    """ Memory reserved for the random number generator state on each GPU.
        The RNG we use is: MRG32ka
    
        Returns:
            req_mem required memory {unit: bytes} per GPU
    """
    req_mem_per_cell = 6 * 8 # bytes
    local_cells = Lx * Ly * Lz
    req_mem = req_mem_per_cell * local_cells
    return req_mem

# Cu 30nm foil setup

## `20 x 3 x 40` case

In [7]:
Lx = 272
Ly = 1864
Lz = 128

target_x = Lx
target_y = 17
target_z = Lz

# field memory per GPU
field_gpu = mem_req_by_fields(Lx, Ly, Lz, FieldTMPSlots=2)
print("Memory requirement per GPU for fields: ",field_gpu / (1024 * 1024),"MB")
# particle memory per GPU - only the target area contributes here
e_gpu  = mem_req_by_particles(target_x, target_y, target_z, 
                              num_additional_attributes=0, 
                              particles_per_cell=26)         \
       + mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=0, 
                              particles_per_cell=10)         \
       + mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=0, 
                              particles_per_cell=(5 * 6))
H_gpu  = mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=1, 
                              particles_per_cell=10)
C_gpu  = mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=1, 
                              particles_per_cell=5)
Cu_gpu = mem_req_by_particles(target_x, target_y, target_z, 
                              num_additional_attributes=1, 
                              particles_per_cell=1)
print("Memory requirement per GPU and species:")
print("e: ", e_gpu / (1024 * 1024),"MB")
print("H: ", H_gpu / (1024 * 1024),"MB")
print("C: ", C_gpu / (1024 * 1024),"MB")
print("Cu: ",Cu_gpu / (1024 * 1024),"MB")
rng_gpu = mem_req_by_rng(Lx, Ly, Lz)
print("Memory requirement per GPU for RNG states: ",rng_gpu / (1024 * 1024),"MB")

mem_sum = field_gpu + e_gpu + H_gpu + C_gpu + Cu_gpu + rng_gpu
print("Sum of required GPU memory: ",mem_sum / (1024 * 1024),"MB")

Memory requirement per GPU for fields:  3222.39575195 MB
Memory requirement per GPU and species:
e:  768.1875 MB
H:  17.265625 MB
C:  8.6328125 MB
Cu:  29.3515625 MB
Memory requirement per GPU for RNG states:  2970.75 MB
Sum of required GPU memory:  7016.58325195 MB


## 2D Test with `20 x 3` GPUs

My idea is now to use the same setup just reduced by the number of GPUs in the third direction and also just in 2D. 
But that way I can make an estimate for the full case without changing the domain decomposition again.

In [8]:
Lx = 272
Ly = 1864
Lz = 1

target_x = Lx
target_y = 17
target_z = Lz

# field memory per GPU
field_gpu = mem_req_by_fields(Lx, Ly, Lz, FieldTMPSlots=2,sim_dim=2)
print("Memory requirement per GPU for fields: ",field_gpu / (1024 * 1024),"MB")
# particle memory per GPU - only the target area contributes here
e_gpu  = mem_req_by_particles(target_x, target_y, target_z, 
                              num_additional_attributes=0, 
                              particles_per_cell=26)         \
       + mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=0, 
                              particles_per_cell=10)         \
       + mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=0, 
                              particles_per_cell=(5 * 6))
H_gpu  = mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=1, 
                              particles_per_cell=10)
C_gpu  = mem_req_by_particles(target_x, 1, target_z, 
                              num_additional_attributes=1, 
                              particles_per_cell=5)
Cu_gpu = mem_req_by_particles(target_x, target_y, target_z, 
                              num_additional_attributes=1, 
                              particles_per_cell=1)
print("Memory requirement per GPU and species:")
print("e: ", e_gpu / (1024 * 1024),"MB")
print("H: ", H_gpu / (1024 * 1024),"MB")
print("C: ", C_gpu / (1024 * 1024),"MB")
print("Cu: ",Cu_gpu / (1024 * 1024),"MB")
rng_gpu = mem_req_by_rng(Lx, Ly, Lz)
print("Memory requirement per GPU for RNG states: ",rng_gpu / (1024 * 1024),"MB")

mem_sum = field_gpu + e_gpu + H_gpu + C_gpu + Cu_gpu + rng_gpu
print("Sum of required GPU memory: ",mem_sum / (1024 * 1024),"MB")

Memory requirement per GPU for fields:  22.8991546631 MB
Memory requirement per GPU and species:
e:  6.00146484375 MB
H:  0.134887695312 MB
C:  0.0674438476562 MB
Cu:  0.229309082031 MB
Memory requirement per GPU for RNG states:  23.208984375 MB
Sum of required GPU memory:  52.5412445068 MB
