In [1]:
import csv
import numpy as np
import matplotlib.pyplot as plt

### process file

In [8]:
def parse_key(key: str) -> int:
    if not key:
        return -1
    
    word_mapping = {
        'true': 1,
        'false': 0,
        'exec()': 0,
        'fork()': 1,
        'exit()': 2,
        'malloc()': 3,
        'free()': 4,
        'realloc()': 5
    }
    if key in word_mapping:
        return word_mapping[key]
    # bug: lifetime is a float, not handled correctly yet
    try:
        return int(key)
    except ValueError:
        return -1

def add_stats_line(arr: np.ndarray | None, line: list[str]) -> np.ndarray:
    if not line:
        return arr if arr is not None else np.empty((0, 0), dtype=int)

    vals = [parse_key(word) for word in line]
    row = np.array(values, dtype=np.int64)[None, :] # trick to add a dimension to row

    if arr is None or arr.size == 0:
        #print('arr is None')
        return row
    if arr.dtype != np.int64:
        arr = arr.astype(np.int64, copy=False)
    if arr.ndim == 1: # if dimension of arr is 1
        #print('arr is dimension 1')
        arr = arr[None, :]
    if arr.shape[1] != row.shape[1]:
        raise ValueError(f'Column mismatch: arr has {arr.shape[1]} cols, row has {row.shape[1]} cols.')
    return np.vstack((arr,row))

stats = None
with open("trace_results.csv", 'r') as trace_csv:
	trace_reader = csv.reader(trace_csv)
	next(trace_reader)

	for line in trace_reader:
		stats = add_stats_line(stats, line)

In [9]:
print(type(stats))
print(stats.shape)
print(stats.ndim)
print(stats[-5:])

<class 'numpy.ndarray'>
(130269, 8)
2
[[2570223624792         10655             1             4          9380
            152             4            -1]
 [2570223630824         10651             1             4          9340
            152            40            -1]
 [2570223636992         10644             1             4          9260
             72            80            -1]
 [2570223643189         10644             1             4          9252
             64             8            -1]
 [2570223927952         10644             1             2    2120208366
             -1            -1            -1]]


### ploting dynamic memory

In [10]:
# list format: timestamp, tid, isMain, operation, stat1, stat2, stat3, stat4
# operations: exec() marks the start of main thread, fork() marks start of child thread,
# 		malloc(), realloc(), free()
# ismain: 0 == False, 1 == True
# represent operations by int:
# 		exec() = 0, fork() = 1, exit() = 2, malloc() = 3, free() = 4, realloc() = 5
# stats by operation:
# 		malloc(): stat1 = process heap size, stat2 = thread heap size, stat3 = size of malloc()
# 		realloc(): stat1 = process heap size, stat2 = thread heap size, stat3 = old size, stat4 = new size
# 		free(): stat1 = process heap size, stat2 = thread heap size, stat3 = size freed

In [26]:
threads_map = {}
program_mem = []
start_ms = stats[0][0] // 1000000
threads = set()

for row in stats:
    tid = 0 if row[2] == 1 else row[1]
    # add threads
    if tid not in threads:
        threads.add(int(tid))
    if (row[3] == 0 or row[3] == 1) and tid not in threads_map:
        threads_map[tid] = ([0],[0]) #time_ns, memory_bytes
    if tid in threads_map and row[3] in [3,4,5]: # malloc(), free(), realloc()
        timestamp = row[0] //1000000 - start_ms
        threads_map[tid][0].append(timestamp)
        threads_map[tid][1].append(row[5])
        program_mem.append((timestamp,row[4]))

print(f'All threads recorded are: {threads}.')
print(f'Relevant threads are: {[int(tid) for tid in threads_map.keys()]}.')

lines = {}
plt.figure(figsize=(10,5))
plt.xlabel('time since program starts (ms)')
plt.ylabel('memory allocated (bytes)')
plt.title('dynamic memory by time')

# all threads dynamic memory plot
for tid, (time, mem) in threads_map.items():
    smooth_window = 3
    # np.convolve with mode=valid only returns indices that can be convolved, so the first n-1 items in list is dropped
    # therefore add a total of n-1=9 paddings to mem to balance out the cut
    pad_left, pad_right = smooth_window // 2 - 1 if smooth_window % 2 == 0 else smooth_window // 2, smooth_window // 2
    mem_padded = np.pad(mem, (pad_left, pad_right), mode='edge')
    # each number is now the average of nth to n-9th number in mem
    mem_smooth = np.convolve(mem_padded, np.ones(smooth_window)/smooth_window, mode='valid')
    line, = plt.plot(time, mem_smooth, label=tid) # interesting unpack returned list of lines into a single line with ,
    lines[tid] = line
plt.legend([lines[0]], ["main"])
plt.savefig("plots/dynamic_memory.png")
plt.cla()

# selective threads dynamic memory plot
plt.plot(threads_map[0][0], threads_map[0][1])
child_line_count = 1
child_thread = list(threads_map.keys())[child_line_count]
plt.plot(threads_map[child_thread][0], threads_map[child_thread][1])
plt.legend(["main", child_thread])
plt.savefig("plots/two_threads_memory.png")
plt.cla()

# process dynamic memory
plt.title('process heap memory by time')
time, mem = zip(*program_mem) #*iterable to unwrap
print(len(time), len(mem))
plt.plot(time, mem, label='process')
plt.savefig('plots/process_memory.png')
plt.clf()

plt.close()

All threads recorded are: {0, 10651, 10652, 10653, 10654, 10655}.
Relevant threads are: [0, 10651, 10652, 10653, 10654, 10655].
130257 130257


## no owner version

In [2]:
def parse_key(key: str) -> int:
    if not key:
        return -1
    
    word_mapping = {
        'true': 1,
        'false': 0,
        'exec()': 0,
        'fork()': 1,
        'exit()': 2,
        'malloc()': 3,
        'free()': 4,
        'realloc()': 5
    }
    if key in word_mapping:
        return word_mapping[key]
    # bug: lifetime is a float, not handled correctly yet
    try:
        return int(key)
    except ValueError:
        return -1

def add_stats_line(arr: np.ndarray | None, line: list[str]) -> np.ndarray:
    if not line:
        return arr if arr is not None else np.empty((0, 0), dtype=int)

    vals = [parse_key(word) for word in line]
    row = np.array(vals, dtype=np.int64)[None, :] # trick to add a dimension to row

    if arr is None or arr.size == 0:
        #print('arr is None')
        return row
    if arr.dtype != np.int64:
        arr = arr.astype(np.int64, copy=False)
    if arr.ndim == 1: # if dimension of arr is 1
        #print('arr is dimension 1')
        arr = arr[None, :]
    if arr.shape[1] != row.shape[1]:
        raise ValueError(f'Column mismatch: arr has {arr.shape[1]} cols, row has {row.shape[1]} cols.')
    return np.vstack((arr,row))

stats = None
with open("trace_no_owner_results.csv", 'r') as trace_csv:
	trace_reader = csv.reader(trace_csv)
	next(trace_reader)

	for line in trace_reader:
		stats = add_stats_line(stats, line)

In [3]:
print(type(stats))
print(stats.shape)
print(stats.ndim)
print(stats[-5:])

<class 'numpy.ndarray'>
(130337, 8)
2
[[769945563858         2608            1            4         8444
        -68958            4           -1]
 [769945575844         2608            1            4         8404
        -68998           40           -1]
 [769945588285         2608            1            4         8324
        -69078           80           -1]
 [769945601631         2608            1            4         8316
        -69086            8           -1]
 [769945866461         2608            1            2   2023985484
            -1           -1           -1]]


In [7]:
threads_map = {}
program_mem = []
start_ms = stats[0][0] // 1000000
threads = set()

mem_used, with_realloc = 0, 0

for row in stats:
    tid = 0 if row[2] == 1 else row[1]
    # add threads
    if tid not in threads:
        threads.add(int(tid))
    if (row[3] == 0 or row[3] == 1) and tid not in threads_map:
        threads_map[tid] = ([],[]) #time_ns, memory_bytes
    if tid in threads_map and row[3] in [3,4,5]: # malloc(), free(), realloc()
        if row[3] == 3:
            mem_used += row[6]
            with_realloc += row[6]
        if row[3] == 5:
            with_realloc += row[7] - row[6]
        timestamp = row[0] //1000000 - start_ms
        threads_map[tid][0].append(timestamp)
        threads_map[tid][1].append(row[5])
        program_mem.append((timestamp,row[4]))
print(f'{mem_used=}')
print(f'{with_realloc=}')
print(f'All threads recorded are: {threads}.')
print(f'Relevant threads are: {[int(tid) for tid in threads_map.keys()]}.')

lines = {}
plt.figure(figsize=(10,5))
plt.xlabel('time since program starts (ms)')
plt.ylabel('memory allocated (bytes)')
plt.title('dynamic memory by time')

# all threads dynamic memory plot
for tid, (time, mem) in threads_map.items():
    smooth_window = 3
    # np.convolve with mode=valid only returns indices that can be convolved, so the first n-1 items in list is dropped
    # therefore add a total of n-1=9 paddings to mem to balance out the cut
    pad_left, pad_right = smooth_window // 2 - 1 if smooth_window % 2 == 0 else smooth_window // 2, smooth_window // 2
    mem_padded = np.pad(mem, (pad_left, pad_right), mode='edge')
    # each number is now the average of nth to n-9th number in mem
    mem_smooth = np.convolve(mem_padded, np.ones(smooth_window)/smooth_window, mode='valid')
    line, = plt.plot(time, mem_smooth, label=tid, alpha=0.8, lw=2) # interesting unpack returned list of lines into a single line with ,
    lines[tid] = line
plt.legend([lines[0]], ["main"])
#plt.yscale('symlog')
plt.savefig("plots/no_owner_dynamic_memory.png", dpi=300)
plt.cla()

# selective threads dynamic memory plot
plt.plot(threads_map[0][0], threads_map[0][1])
child_line_count = 1
child_thread = list(threads_map.keys())[child_line_count]
plt.plot(threads_map[child_thread][0], threads_map[child_thread][1])
plt.legend(["main", child_thread])
plt.savefig("plots/no_owner_two_threads_memory.png", dpi=300)
plt.cla()

# process dynamic memory
plt.title('process heap memory by time')
plt.xlabel('time since program starts (ms)')
plt.ylabel('memory allocated (bytes)')
time, mem = zip(*program_mem) #*iterable to unwrap
print(len(time), len(mem))
plt.plot(time, mem, label='process')
plt.savefig('plots/no_owner_process_memory.png', dpi=300)
plt.clf()

plt.close()

All threads recorded are: {0, 2609, 2610, 2611, 2612, 2613}.
Relevant threads are: [0, 2609, 2610, 2611, 2612, 2613].
130325 130325
