In [26]:
import sqlite3
import itertools

Notebook to get various metrics to from a AOSP project, which are:
- Number of AOCs
- Types of AOCs
- AOCs per Class
- AOCs per File
- AOCs per LOC (file-wise)
- AOCs per LOC (repo-wise)
- AOCs per repo

In [2]:
connection = sqlite3.connect("../aosp_ck_output.db")

In [38]:
def get_projects():
    # the files table has more projects than the project table
    cursor = connection.execute("SELECT DISTINCT project_name FROM files")
    return [elem[0] for elem in cursor]

def report_types_of_aocs(project_name) -> dict:
    sql = f"SELECT aoc, count(id) as qty FROM aoc_reports WHERE project_name LIKE '{project_name}' GROUP BY aoc ORDER BY qty DESC"
    cursor = connection.execute(sql)
    output = {}
    for aoc_type, count in cursor:
        output[aoc_type] = count
    return output

def report_aocs_per_file(project_name) -> float:
    sql = f"SELECT ( qty_aocs  * 1.0)/qty_files FROM (SELECT count(path) as qty_files FROM files WHERE project_name LIKE '{project_name}'), (SELECT count(id) as qty_aocs FROM aoc_reports WHERE project_name LIKE '{project_name}')"
    cursor = connection.execute(sql)
    return next(cursor)[0]

def report_aocs_per_loc_filewise(project_name) -> dict:
    sql = f"SELECT files_path, ((loc * 1.0) / qty_aocs) as aocs_per_loc FROM (SELECT path as aoc_repors_path, count(id) as qty_aocs FROM aoc_reports WHERE project_name LIKE '{project_name}' GROUP BY path ORDER BY qty_aocs DESC), (SELECT path as files_path, loc FROM files WHERE project_name LIKE '{project_name}') WHERE aoc_repors_path == files_path ORDER BY aocs_per_loc ASC"
    cursor = connection.execute(sql)
    output = {}
    for file_path, aocs_per_loc in cursor:
        output[file_path] = aocs_per_loc
    return output

def report_aocs_per_loc(file_name) -> float:
    sql= f"SELECT loc from files WHERE path = '{file_name}'"
    loc = next(connection.execute(sql))[0]
    sql = f"SELECT count(id) from aoc_reports WHERE path = '{file_name}'"
    aocs = next(connection.execute(sql))[0]
    return loc / aocs

def report_aocs_per_loc_repowise() -> dict:
    sql = "SELECT files_project_name, ((sum_locs * 1.0) / qty_aocs) AS aocs_per_loc FROM (SELECT project_name as files_project_name, sum(loc) as sum_locs FROM files GROUP BY project_name), (SELECT project_name as aoc_reports_project_name, count(id) as qty_aocs FROM aoc_reports GROUP BY project_name) WHERE files_project_name = aoc_reports_project_name ORDER BY aocs_per_loc"
    cursor = connection.execute(sql)
    output = {}
    for proj_name, aocs_per_loc in cursor:
        output[proj_name] = aocs_per_loc
    return output

def report_aocs_per_repo(project_name) -> float:
    sql = f"SELECT count(id) FROM aoc_reports WHERE project_name = '{project_name}'"
    cursor = connection.execute(sql)
    return next(cursor)[0]


def report_aocs_per_repo_aospwise():
    sql = "SELECT count(id) FROM aoc_reports GROUP BY project_name "
    cursor = connection.execute(sql)
    output = {}
    for proj_name, aoc_count in cursor:
        output[proj_name] = aoc_count
    return output

def report_number_of_files(project_name) -> int:
    sql = f"SELECT count(path) FROM files WHERE project_name = '{project_name}'"
    cursor = connection.execute(sql)
    return next(cursor)[0]

def report_files_with_aocs(project_name) -> list:
    sql = f"SELECT DISTINCT path FROM aoc_reports WHERE project_name = '{project_name}'"
    cursor = connection.execute(sql)
    return [tup[0] for tup in cursor]

def report_files_without_aocs(project_name) -> list:
    sql = f"SELECT path FROM files WHERE project_name = '{project_name}'"
    cursor = connection.execute(sql)
    all_files = [tup[0] for tup in cursor]
    files_with_aoc = report_files_with_aocs(project_name)
    return [file for file in all_files if file not in files_with_aoc]

def report_list_of_repos() -> list:
    sql = "SELECT name FROM projects"
    cursor = connection.execute(sql)
    return [tup[0] for tup in cursor]



def make_report(project_name):
    report = []
    report.append(f"Report for {project_name}")
    report.append("")
    report.append("TYPE OF AOCS")
    report.append("")
    for k,v in report_types_of_aocs(project_name).items():
        report.append(f"{k}: {v}")
    report.append("")
    report.append("Atom rate per Line of Code (Top 10 files)")
    for k,v in itertools.islice(report_aocs_per_loc_filewise(project_name).items(), 10):
        report.append(f"{k}: {v}")
    report.append("")
    report.append("General Metrics")
    report.append("")
    report.append(f"Number of Atoms: {report_aocs_per_repo(project_name)}")
    report.append(f"Atom rate per Line of Code (repo wise): {report_aocs_per_loc_repowise()[project_name]}")
    report.append(f"Files with at least one atom: {len(report_files_with_aocs(project_name))}")
    report.append(f"Files without atoms: {len(report_files_without_aocs(project_name))}")
    report.append(f"Number of java files: {report_number_of_files(project_name)}")
    return "\n".join(report)


In [39]:
print(make_report("external/zxing"))

Report for external/zxing

TYPE OF AOCS

Logic as Control Flow: 508
Infix Operator Precedence: 350
Conditional Operator: 295
Type Conversion: 261
Post Increment Decrement: 52
Change of Literal Encoding: 22
Pre Increment Decrement: 18
Repurposed Variables: 12
Arithmetic as Logic: 1

Atom rate per Line of Code (Top 10 files)
/mnt/4846A54B46A53A98/AOSP/external/zxing/core/src/main/java/com/google/zxing/common/PerspectiveTransform.java: 2.343283582089552
/mnt/4846A54B46A53A98/AOSP/external/zxing/core/src/test/java/com/google/zxing/datamatrix/decoder/DecodedBitStreamParserTestCase.java: 4.7
/mnt/4846A54B46A53A98/AOSP/external/zxing/core/src/test/java/com/google/zxing/common/StringUtilsTestCase.java: 6.2105263157894735
/mnt/4846A54B46A53A98/AOSP/external/zxing/core/src/main/java/com/google/zxing/datamatrix/encoder/MinimalEncoder.java: 8.85593220338983
/mnt/4846A54B46A53A98/AOSP/external/zxing/core/src/test/java/com/google/zxing/qrcode/decoder/DataMaskTestCase.java: 9.5
/mnt/4846A54B46A53A98/

In [None]:
report_aocs_per_loc('/mnt/4846A54B46A53A98/AOSP/external/grpc-grpc-java/alts/src/test/java/io/grpc/alts/internal/AltsChannelCrypterTest.java')

In [None]:
report_types_of_aocs('frameworks/native')

In [None]:
report_aocs_per_file('external/grpc-grpc-java')

In [None]:
report_aocs_per_loc_filewise('frameworks/native')

In [25]:
{k:v for k,v in report_aocs_per_loc_repowise().items() if k.startswith("external")}

{'external/brotli': 3.5577078288942694,
 'external/conscrypt': 9.142527287993284,
 'external/sonic': 20.892857142857142,
 'external/flatbuffers': 21.15270935960591,
 'external/libese': 21.72460008646779,
 'external/s2-geometry-library-java': 35.14791666666667,
 'external/bouncycastle': 36.90100339732954,
 'external/lzma': 37.27368421052632,
 'external/cbor-java': 37.408256880733944,
 'external/google-smali': 40.361798536075284,
 'external/universal-tween-engine': 41.47787610619469,
 'external/jackson-core': 42.505014749262536,
 'external/noto-fonts': 44.85,
 'external/geojson-jackson': 44.90243902439025,
 'external/owasp/java-encoder': 45.05909090909091,
 'external/smali': 45.23227383863081,
 'external/cldr': 47.82247940368772,
 'external/icu': 48.6857797135288,
 'external/v4l2_codec2': 49.0,
 'external/zxing': 49.617511520737324,
 'external/apache-commons-math': 53.8125,
 'external/mp4parser': 55.189473684210526,
 'external/MPAndroidChart': 56.689473684210526,
 'external/obex': 57.837

In [None]:
report_aocs_per_loc_filewise('external/brotli')