# Profiling Splopter's Memory Usage
Creating instances of splopter has always consumed a lot of memory - mostly because of the files involved. This is now causing significant problems so I'm trying to figure out a method of minimising memory usage.

In [19]:
import sys
import pathlib as pth
import numpy as np
sys.path.append('home/jleland/coding/projects/flopter')
import flopter.spice.splopter as spl
import flopter.spice.tdata as td
from pympler import asizeof as szf

In [38]:
spl_path = pth.Path("/home/jleland/data/external_big/spice/marconi/spice2/sheath_exp/angled_1/alpha_yz_-1.0")

non_standard_variables = {'t', 'ProbePot', 'npartproc', 'Nz', 'Nzmax', 'Ny', 'count', 'Npc', 'snumber', 'nproc', 'version'}
desired_variables = (td.DEFAULT_REDUCED_DATASET | non_standard_variables) - {td.OBJECTSCURRENTFLUXE, td.OBJECTSCURRENTFLUXI}

In [39]:
desired_variables

{'Npc',
 'Ny',
 'Nz',
 'Nzmax',
 'Pot',
 'Potvac',
 'ProbePot',
 'Temp',
 'alphaxz',
 'alphayz',
 'count',
 'dt',
 'dz',
 'm',
 'npartproc',
 'nproc',
 'objectscurrente',
 'objectscurrenti',
 'objectsenum',
 'q',
 'snumber',
 't',
 'version'}

In [40]:
splopter = spl.Splopter(spl_path, reduce=desired_variables, ignore_tzero_fl=True)

Spice data directory is not valid, attempting to auto-fix.
Passed Spice directory (/home/jleland/data/external_big/spice/marconi/spice2/sheath_exp/angled_1/alpha_yz_-1.0) doesn't seem to be valid.
Continuing anyway.


In [47]:
np.squeeze(splopter.tdata.t_dict['version'])
splopter.tdata.matlab_data.version

'1.0'

## Look at the splopter object in a bit more detail

In [33]:
nbytes_tot__dict__ = 0

for name, thing in splopter.tdata.__dict__.items():
    named_arr = splopter.tdata.__dict__[name]
    if hasattr(named_arr, 'nbytes'):
        print(f"{name} ({type(thing)}) \n"
              f"  value: \t {szf.asizeof(thing)} \n"
              f"  accessed: \t {szf.asizeof(named_arr)} \n"
              f"  nbytes: \t {named_arr.nbytes}")
        nbytes_tot__dict__ += named_arr.nbytes

dt (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 8
dz (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 8
t (<class 'numpy.ndarray'>) 
  value: 	 16112 
  accessed: 	 16112 
  nbytes: 	 16000
q (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 16
m (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 16
temp (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 16
objectscurrenti (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 291634560
objectscurrente (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 291634560
objectspowerfluxi (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 291634560
objectspowerfluxe (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 291634560
pot (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 1038248
potvac (<class 'numpy.ndar

In [24]:
nbytes_tot_t_dict = 0
for name, thing in splopter.tdata.t_dict.items():
    named_arr = splopter.tdata.t_dict[name]
    if hasattr(named_arr, 'nbytes'):
        print(f"{name} ({type(thing)}) \n"
              f"  value: \t {szf.asizeof(thing)} \n"
              f"  accessed: \t {szf.asizeof(named_arr)} \n"
              f"  nbytes: \t {named_arr.nbytes}")
        nbytes_tot_t_dict += named_arr.nbytes

Nz (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 4
Nzmax (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 4
Ny (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 4
count (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 4
Npc (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 4
dt (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 8
dz (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 8
nproc (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 4
q (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 16
m (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 16
Temp (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 16
Pot (<class 'numpy.ndarray'>) 
  value: 	 112 
  accessed: 	 112 
  nbytes: 	 1038248
Potvac (<class 'numpy.ndarray'>)

In [26]:
print(nbytes_tot__dict__, nbytes_tot_t_dict)

1169549964 1242458612


In [29]:
print((nbytes_tot__dict__ + nbytes_tot_t_dict) / (1024 * 1024)) # in MB
print((nbytes_tot__dict__ + nbytes_tot_t_dict) / (1024 * 1024 * 1024)) # in GB

2300.2706298828125
2.246358036994934


---

To summarise:
 - The arrays in t_dict are not duplicated in memory by the attributes on tdata
 - The arrays objectscurrent and objectspowerflux are HUGE in their raw form, which is the default output in version 2.14 and hence why this has become such a big problem now.
 
What I can take from this is that we shouldn't be storing redundant arrays in memory, so I can probably do without the objectspowerflux arrays for now. Implementing this change above shows a halfing of the amount of memory consumed. This is good. 

I could also garbage collect the tdata object after homogenisation in splopter, or at least downsample the raw current arrays to make it a bit more tractable. 