In [1]:
import sqlite3

import numpy as np
import pandas as pd
from numpy import linalg

import db

In [2]:
Df = pd.DataFrame

In [3]:
DB_FILE = "android-base.db"
LEAD_REF = "refs/tags/android-13.0.0_r24"
TARGET_FILE = "core/java/android/view/View.java"
TARGET_ID = 102495 # The id of the top-level class of the target file

In [4]:
CON = sqlite3.connect(DB_FILE)

In [5]:
db.create_temp_tables(CON)
deps_df = db.fetch_internal_deps(CON, TARGET_ID)
deps_df = deps_df[(deps_df["kind"] == "Call") | (deps_df["kind"] == "Use")]
entities_df = db.fetch_children_of(CON, LEAD_REF, TARGET_ID)
entity_ids = list(entities_df.index)

In [6]:
def to_entity_id(ix: int) -> int:
    return entity_ids[ix]

def to_entity_ix(id: int) -> int:
    return entity_ids.index(id)

In [7]:
def create_sym_mat(n_entities, deps_df: Df) -> np.ndarray:
    arr = np.zeros((n_entities, n_entities))
    for _, row in deps_df.iterrows():
        src_ix = to_entity_ix(row["src_id"])
        tgt_ix = to_entity_ix(row["tgt_id"])
        arr[(src_ix, tgt_ix)] = 1.0
        arr[(tgt_ix, src_ix)] = 1.0
    return arr

In [8]:
entities_df

Unnamed: 0_level_0,parent_id,name,kind,start_row,end_row
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
102496,102495,DBG,field,841,842
102497,102495,DEBUG_DRAW,field,845,845
102498,102495,VIEW_LOG_TAG,field,850,850
102499,102495,AUTOFILL_LOG_TAG,field,857,857
102500,102495,CONTENT_CAPTURE_LOG_TAG,field,862,862
...,...,...,...,...,...
104116,102495,onVirtualViewTranslationResponses,method,31657,31660
104117,102495,dispatchCreateViewTranslationRequest,method,31689,31706
104118,102495,ViewTranslationRequestConsumer,class,31708,31732
104119,102495,generateDisplayHash,method,31748,31798


In [9]:
deps_df

Unnamed: 0,src_id,tgt_id,kind
1,104029,103046,Use
19,104036,103046,Use
21,104037,103046,Use
23,104038,103046,Use
25,104039,103046,Use
...,...,...,...
4021,103336,102594,Use
4022,103336,102899,Use
4023,103334,102578,Use
4024,103334,102579,Use


In [10]:
M = create_sym_mat(len(entities_df), deps_df)
D = np.diag(np.sum(M, axis=0))
L = D - M
L_inv = linalg.pinv(L, hermitian=True)

In [25]:
arb_id = 430
entities_df.iloc[arb_id]

parent_id               102495
name         mUserPaddingStart
kind                     field
start_row                 4554
end_row                   4555
Name: 102926, dtype: object

In [29]:
df = entities_df.copy()
df["sim"] = L_inv[arb_id]
df.sort_values(by="sim", ascending=False).head(15)

Unnamed: 0_level_0,parent_id,name,kind,start_row,end_row,sim
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
102926,102495,mUserPaddingStart,field,4554,4555,0.13287
103880,102495,isPaddingRelative,method,25366,25368,0.059107
103265,102495,applyInsets,method,11523,11529,0.029944
103870,102495,setPadding,method,25148,25161,0.029268
103882,102495,resetPaddingToInitialValues,method,25377,25391,0.028022
103872,102495,setPaddingRelative,method,25242,25262,0.024029
102930,102495,UNDEFINED_PADDING,field,4583,4583,0.023391
102927,102495,mUserPaddingEnd,field,4561,4562,0.021759
102929,102495,mUserPaddingRightInitial,field,4578,4578,0.018601
102928,102495,mUserPaddingLeftInitial,field,4570,4570,0.018601


In [12]:
L_inv[0]

array([ 2.53825870e-01, -1.82150322e-04,  6.09640055e-04, ...,
       -5.24684254e-03,  3.65990279e-04,  9.71607622e-04])