In [12]:
# https://code.tutsplus.com/tutorials/understand-how-much-memory-your-python-objects-use--cms-25609
from collections import Mapping, Container
from sys import getsizeof
 
def deep_getsizeof(o, ids):
    """Find the memory footprint of a Python object
 
    This is a recursive function that drills down a Python object graph
    like a dictionary holding nested dictionaries with lists of lists
    and tuples and sets.
 
    The sys.getsizeof function does a shallow size of only. It counts each
    object inside a container as pointer only regardless of how big it
    really is.
 
    :param o: the object
    :param ids:
    :return:
    """
    d = deep_getsizeof
    if id(o) in ids:
        return 0
 
    r = getsizeof(o)
    ids.add(id(o))
 
    if isinstance(o, str) or isinstance(o, bytes): # or isinstance(0, unicode)
        return r
 
    if isinstance(o, Mapping):
        return r + sum(d(k, ids) + d(v, ids) for k, v in o.iteritems())
 
    if isinstance(o, Container):
        return r + sum(d(x, ids) for x in o)
 
    return r 

In [16]:
import fr_core_news_md, spacy
# !pip install pympler
from pympler.asizeof import asizeof

In [23]:
nlp = spacy.load("fr_core_news_md")

In [24]:
print(asizeof(nlp))

16168720


In [27]:
map_dict = nlp.vocab.prune_vectors(10000)

  "__main__", mod_spec)


TypeError: deep_getsizeof() missing 1 required positional argument: 'ids'

In [28]:
print(asizeof(nlp))

16168720


In [29]:
# V2  https://goshippo.com/blog/measure-real-size-any-python-object/
import sys

def get_size(obj, seen=None):
    """Recursively finds size of objects"""
    size = sys.getsizeof(obj)
    if seen is None:
        seen = set()
    obj_id = id(obj)
    if obj_id in seen:
        return 0
    # Important mark as seen *before* entering recursion to gracefully handle
    # self-referential objects
    seen.add(obj_id)
    if isinstance(obj, dict):
        size += sum([get_size(v, seen) for v in obj.values()])
        size += sum([get_size(k, seen) for k in obj.keys()])
    elif hasattr(obj, '__dict__'):
        size += get_size(obj.__dict__, seen)
    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        size += sum([get_size(i, seen) for i in obj])
    return size

In [35]:
nlp = spacy.load("fr_core_news_md")
print(get_size(nlp))
print(get_size(nlp.vocab.vectors))

16186569
18542384


In [37]:
print(get_size(nlp.vocab.vectors))

18542384


In [38]:
map_dict = nlp.vocab.prune_vectors(10000)
print(get_size(nlp))

  "__main__", mod_spec)


16186569


In [40]:
print(get_size(nlp.vocab.vectors))

20712860


In [45]:
nlp.vocab.vectors

<spacy.vectors.Vectors at 0x16c5bc601d8>

In [47]:
import numpy as np
get_size(np.random.random((300,)))*10000

25600000