Analysis pinned to Kernel version 5.10

In [None]:
import re
import itertools
from itertools import islice
from subprocess import run

from pathlib import Path
import sqlite3

KERNEL = Path(".")
OUTDIR = Path("../function_survey/output/")
all_calls = OUTDIR/"cscope_all_calls.txt"
kernel_tags = OUTDIR/"kernel_tags"
test_targets = OUTDIR/"cscope_test_targets"
all_c_code = OUTDIR/"all_c_code.txt"
blame_files = Path("../blame")


connection = sqlite3.connect(OUTDIR/"function_survey.db")
cursor = connection.cursor()

def head(iterable, n=10):
    return list(islice(iterable, n))

# Comparing Ctags and Cregit

In [None]:
[(table_name, [x[1] for x in cursor.execute("PRAGMA table_info([%s])" % table_name)])
 for table_name in ("ctags", "cregit_functions", "cregit_calls")]

In [None]:
head(cursor.execute("SELECT COUNT(*) FROM ctags WHERE token_type='function'")) # includes variables, macros etc.

In [None]:
head(cursor.execute("SELECT COUNT(*) FROM cregit_functions"))

In [None]:
head(cursor.execute("""
SELECT COUNT(*) FROM ctags JOIN cregit_functions ON
ctags.file = cregit_functions.file
AND ctags.name = cregit_functions.name
"""))

In [None]:
head(cursor.execute("""
SELECT file, name FROM cregit_functions
EXCEPT SELECT file, name FROM ctags"""))

In [None]:
head(cursor.execute(
    "SELECT file, name FROM cregit_functions"))

In [None]:
head(cursor.execute(
    "SELECT file, name FROM ctags EXCEPT SELECT file, name FROM cregit_functions"))

In [None]:
head(cursor.execute(
    "SELECT file, name FROM cregit_functions EXCEPT SELECT file, name FROM ctags"))

## Cscope

In [None]:
def parse_cscope(filename):
    expression = r"(?P<path>^[^ ]+) (?P<funcname>[^ ]+) (?P<linenum>\d+) (?P<usage_line>.*)"
    return parse(filename, expression)

In [None]:
# get all function calls
!cscope -RL2 ".*" > {all_calls}
# get all calls from a function with test in the name
!cscope -RL2 ".*test.*" > {test_targets}

In [None]:
called_functions = set()
called_function_names = set()
for m in parse_cscope(all_calls):
    called_functions.add((m["funcname"], m["path"]))
    called_function_names.add(m["funcname"])

In [None]:
tested_functions = set()
tested_function_names = set()     
for m in parse_cscope(test_targets):
    tested_functions.add((m["funcname"], m["path"]))
    tested_function_names.add(m["funcname"])

In [None]:
len(tested_function_names - called_function_names), tested_function_names - called_function_names
# before the math.c fix, there were 187 functions in this set

In [None]:
# functions detected by cscope (getting called by the tests) but not by ctags
len(tested_function_names - func_names), len(tested_functions - functions)

In [None]:
target = KERNEL

!(cd {target}; cscope -RL0 ".*")

In [None]:
# the results of this cell seem to indicate that a single directory (arch/sh/math-emu) is tripping up cscope.
# removeing that file lets cscope run unimpeded,
def test_cscope(path):
    if not path.is_dir():
        print("fail on file:",path)
        return
    for p in path.iterdir():
        if p.is_dir():
            print(p)
            result = run('cscope -RL0 ".*"',cwd=p, capture_output=True, shell=True)
            # return code is a better way to identify if errors
            # could just list all dirs/files in kernel, and echo stderr to each
            # find -d (gives list of directories)
            if result.stderr not in {b'', b'cscope: no source files found\n'}:
                print(p, repr(result.stderr))
                test_cscope(p)

test_cscope(KERNEL)

In [None]:
result = run('cscope -RL0 ".*"', capture_output=True, shell=True)
result