# Setup

In [1]:
!ls -lh build/hetero1*

-rw-r--r-- 1 vscode vscode 8.4M Jan  4 09:53 build/hetero1.csv
-rw-r--r-- 1 vscode vscode 6.2M Jan  4 10:05 build/hetero1.h5


#### Code 1: Slice from csv file

In [15]:
%%writefile src/01A_slice_from_csv.py

from memory_profiler import profile
import numpy as np
import h5py
import sys

#@profile
def slice_from_csv(csv_filename):
    
    dt = np.dtype([('name','S20'),
                   ('city','S20'),
                   ('x','f8'),
                   ('y','f8'),
                   ('z','f8')])

    np_data = np.genfromtxt(csv_filename,delimiter=',',dtype=dt,names=True)
    x = np_data[1000:int(sys.argv[1])]
    return x
   

if __name__ == "__main__":

    csv_filename = 'build/hetero1.csv'
    x_csv = slice_from_csv(csv_filename)

Overwriting src/01A_slice_from_csv.py


#### Code 2: Slice from h5 file

In [16]:
%%writefile src/01B_slice_from_h5.py

from memory_profiler import profile
import h5py
import numpy as np
import sys


#@profile
def slice_from_h5(h5_filename):
    f = h5py.File(h5_filename)
    x = f['table'][1000:int(sys.argv[1])]
    f.close()
    return x
   

if __name__ == "__main__":

    h5_filename = 'build/hetero1.h5'
    x_h5 = slice_from_h5(h5_filename)

Overwriting src/01B_slice_from_h5.py


#### Task 1: Find the nuber of read and memory allocation calls by the operating system

In [17]:
%%bash
strace -c -e read,mmap python src/01A_slice_from_csv.py 1000

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 55.32    0.051256          19      2615           read
 44.68    0.041405          34      1208           mmap
------ ----------- ----------- --------- --------- ----------------
100.00    0.092661          24      3823           total


In [18]:
%%bash
strace -c -e read,mmap python src/01B_slice_from_h5.py 1000

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 54.97    0.039565          25      1541           read
 45.03    0.032407          28      1135           mmap
------ ----------- ----------- --------- --------- ----------------
100.00    0.071972          26      2676           total


In [None]:
%%bash
mkdir logs
strace -o logs/st_csv.log -t -r -e read python src/01A_slice_from_csv.py 1000
strace -o logs/st_h5.log -t -r -e read python src/01B_slice_from_h5.py 1000