In [2]:
import psutil
import sys
import os
import numpy as np

In [3]:
# print amount of available RAM memory
print(f"Available RAM: {psutil.virtual_memory().available / (1024 ** 3):.2f} GB")

Available RAM: 6.91 GB


In [4]:
# create a numpy array of numbers using more than 100 MB of RAM
big_array = np.arange(20_000_000)

In [5]:
print(f"Memory used by big_array: {sys.getsizeof(big_array) / (1024 ** 2):.3f} MB")

Memory used by big_array: 152.588 MB


In [6]:
# add one integer at the end of the list, check its size and memory usage
big_array = np.append(big_array, 1)
print(f"Memory used by big_array after appending one element: {sys.getsizeof(big_array) / (1024 ** 2):.3f} MB")

Memory used by big_array after appending one element: 152.588 MB


In [7]:
# add one integer at the end of the list, check its size and memory usage
big_array = np.append(big_array, 1)
print(f"Memory used by big_array after appending one element: {sys.getsizeof(big_array) / (1024 ** 2):.3f} MB")

Memory used by big_array after appending one element: 152.588 MB


In [8]:
# try appending 100 times
for i in range(100):
    size_of_big_array = sys.getsizeof(big_array)
    big_array = np.append(big_array, 1)

In [9]:
print(f"Memory used by big_array after appending one element: {sys.getsizeof(big_array) / (1024 ** 2):.3f} MB")

Memory used by big_array after appending one element: 152.589 MB


In [10]:
output_file = 'big_array.npz'
np.savez(output_file, big_array=big_array)

# output file size on disk in MB, memory footprint of numpy array in MB
print(f"Output file size on disk: {os.path.getsize(output_file) / (1024 ** 2):.3f} MB")
print(f"Memory used by big_array: {sys.getsizeof(big_array) / (1024 ** 2):.3f} MB")

Output file size on disk: 152.589 MB
Memory used by big_array: 152.589 MB


In [11]:
output_file_compressed = 'big_array_compressed.npz'
np.savez_compressed(output_file_compressed, big_array=big_array)
# output file size on disk in MB, memory footprint of numpy array in MB
print(f"Output file size on disk (compressed): {os.path.getsize(output_file_compressed) / (1024 ** 2):.3f} MB")
print(f"Memory used by big_array: {sys.getsizeof(big_array) / (1024 ** 2):.3f} MB")

Output file size on disk (compressed): 28.873 MB
Memory used by big_array: 152.589 MB


In [12]:
# todo explain why repeating `np.append(big_array, 1)` is inefficient ?
# exercise : increase the size of big_list until it reaches 10 GB, print memory usage each time it increases