In [28]:
%load_ext autoreload
%autoreload 2
import csv
import numpy as np
import pandas as pd
import math
import altair as alt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [120]:
COLUMN_COUNT = 50
PAGE_SIZE = 4096

FILE_NAME = "/product/app/YouTube/YouTube.apk"
ZIP_ENTRY_NAME = 'classes.dex'

In [37]:
mapped_faults = []
with open("mapped_faults.csv") as csv_file:
    for row in csv.DictReader(csv_file):
        row['offset'] = int(row['offset'])
        mapped_faults.append(row)

file_sizes = []
with open("file_sizes.csv") as csv_file:
    for row in csv.DictReader(csv_file):
        row['size'] = int(row['size'])
        file_sizes.append(row)

In [139]:

def page_fault_chart(file_name, zip_entry_name, file_sizes, mapped_faults):
    file_size =[file["size"] for file in file_sizes if file["file_name"] == file_name and file["zip_entry_name"] == zip_entry_name][0]
    
    faults = pd.DataFrame([
        entry
        for entry in mapped_faults
        if entry["file_name"] == file_name
        and entry["zip_entry_name"] == zip_entry_name
    ])

    rows = math.ceil(math.ceil(file_size / PAGE_SIZE) / COLUMN_COUNT)
    x, y = np.meshgrid(range(0, COLUMN_COUNT), range(0, rows))

    fault_offsets = faults["offset"].tolist()

    pages_accessed = set()
    
    for fault_offset in fault_offsets:
        page = math.floor(fault_offset / PAGE_SIZE)
        x_idx = page % COLUMN_COUNT
        y_idx = math.floor(page / COLUMN_COUNT)
        pages_accessed.add((x_idx,y_idx))
    
    z = np.array(x)
    for idx, _ in np.ndenumerate(x):
        z[idx] = 1 if idx in pages_accessed else 0
    
    # Convert this grid to columnar data expected by Altair
    source = pd.DataFrame({'x': x.ravel(),
                         'y': y.ravel(),
                         'z': z.ravel()})
    return alt.Chart(source).mark_rect().transform_calculate(
        offset='(datum.x + datum.y * 50) * 4096',
    ).encode(
        x='x:O',
        y='y:O',
        color='z:N',
        tooltip=['offset:Q'],
    ).properties(title=f"Page Faults for {file_name} - {zip_entry_name}")

charts = []
for dex_file in [file["zip_entry_name"] for file in file_sizes if file["file_name"] == FILE_NAME and file["zip_entry_name"] and file["zip_entry_name"].endswith(".dex")]:
    charts.append(page_fault_chart(FILE_NAME, dex_file,file_sizes, mapped_faults))

alt.vconcat(*charts).display()