In [1]:
import logging
import os
from typing import List
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import subprocess
from subprocess import PIPE, STDOUT
from datetime import datetime
import git
import functools
from threading import Lock
AOSP_SYNC_PATH = "/mnt/4846A54B46A53A98/AOSP/"
AOSP_ROOT_MATCHER = "<AOSP_ROOT>/"



In [2]:
logging.basicConfig(
    filename="HISTORYlistener.log",
    level=logging.DEBUG,
    format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
)

logger = logging.getLogger()
fhandler = logging.FileHandler(filename='run_notebook.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [3]:
# git_repos = []
# for dirpath, dirnames, filenames in os.walk(AOSP_SYNC_PATH):
#     if ".git" in dirnames:
#         git_repos.append(dirpath)

In [8]:
# with open("git_dirs_NORM.txt", "w") as txt_file:
#     for path in git_repos:
#         masked_path = path.replace(AOSP_SYNC_PATH, "<AOSP_ROOT>/")
#         txt_file.write(masked_path + "\n")

In [3]:
def get_project_name(path: str) -> str:
    return path.replace(AOSP_ROOT_MATCHER, "")

def get_java_files_from_path(path: str) -> List[str]:
    buffer = []
    logger.info(f"Checking {path}...")
    for dirpath, dirnames, filenames in os.walk(path):
        java_files_in_dirpath = [f"{dirpath}/{filename}" for filename in filenames if filename.endswith(".java")]
        if java_files_in_dirpath:
            buffer.extend(java_files_in_dirpath)
    return buffer

def save_to_file(path: str, content: List[str], mode="a"):
    with open(path, mode) as txt_file:
        logger.info(f"Saving to {path}")
        txt_file.writelines([line + "\n" for line in content])





In [6]:
def save_report(real_path):
    java_file_list = get_java_files_from_path(real_path)
    if java_file_list:
        save_to_file("aosp_java_files.txt", java_file_list)
    else:
        logger.warning(f"{real_path} does not have java files.")
GIT_DIRS = []
with open("git_dirs_NORM.txt", "r") as txt_file:
    for path in txt_file.readlines():
        real_path = path.strip().replace(AOSP_ROOT_MATCHER, AOSP_SYNC_PATH)
        GIT_DIRS.append(real_path)
def get_project_from_path(java_file_path: str) -> str:
    for project_path in GIT_DIRS:
        if java_file_path.startswith(project_path):
            return project_path.replace(AOSP_SYNC_PATH, "")
    raise Exception(f"{java_file_path = } does not have a project!")


In [None]:
# with ThreadPoolExecutor(max_workers=6) as executor:
#     futures = []
#     for dir in GIT_DIRS:
#         print(f"Submit {dir}")
#         fut = executor.submit(save_report, dir)
#         fut.add_done_callback(lambda x: print(f"{dir} is done"))
#         futures.append(fut)
#     print("Running...")
#     while not all([f.done() for f in futures]):
#         pass
#     print("All done.")


## Categorization

In [14]:
# df_java_files = pd.DataFrame()
# buffer = []
# with open("aosp_java_files.txt", "r") as txt_file:
#     for line in txt_file.readlines():
#         line_stripped = line.strip()
#         buffer.append({
#             "aosp_project": get_project_from_path(line_stripped),
#             "java_file": line_stripped
#         })
# df_java_files = pd.DataFrame.from_records(buffer)
# df_java_files.to_csv("categorized_aosp_java_files.csv")


In [15]:
# df_java_files.to_csv("categorized_aosp_java_files.csv", index=False)

In [5]:
# load
df_java_files = pd.read_csv("categorized_aosp_java_files.csv")

## Run BOHR

In [9]:
CHECKPOINTS_PATH = "checkpoints.txt"
CHECK_CSV_PATH_MUTEX = Lock()

def timestamp():
    return int(datetime.now().timestamp() * 1000 // 1)


def load_checkpoints():
    if os.path.exists(CHECKPOINTS_PATH):
        with open(CHECKPOINTS_PATH, "r") as checkpoint_file:
            lines = checkpoint_file.readlines()
            lines = [line.strip() for line in lines]
            return lines
    else:
        return []


def save_checkpoint(path):
    with open(CHECKPOINTS_PATH, "a") as checkpoint_file:
        checkpoint_file.write(path + "\n")
    logger.info(f"saved {path}")
def log_subprocess_output(pipe):
    for line in iter(pipe.readline, b''): # b'\n'-separated lines
        logger.info('got line from subprocess: %r', line)

def run_bohr(java_file_path, file_name):
    ms = timestamp()
    bohr_report_name = f"{file_name}_{ms}"
    process = subprocess.Popen(["java", "-jar", "../bohr.jar", java_file_path, bohr_report_name], stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        log_subprocess_output(process.stdout)
    if bohr_report_name.endswith(".csv"):
        return bohr_report_name
    return bohr_report_name + ".csv"


def gen_bohr_report_path(java_file_path):
    projpath = get_project_from_path(java_file_path).replace("/", "-")
    return f"./reports/summary/bohr_report_aosp_{projpath}.csv"


def get_loc(java_file_path):
    with open(java_file_path, "r") as java_file:
        loc = len(java_file.readlines()) + 1
    return loc


@functools.cache
def get_commit(project_path):
    repo = git.Repo(project_path)
    return repo.head.commit.hexsha


def execute_bohr_report_generation(java_file_path):
    filename = run_bohr(java_file_path, "./reports/bohr_report")
    df_bohr_report = pd.read_csv(filename)
    os.remove(filename)
    if df_bohr_report.empty:
        logger.warning(f"No AoCs found in {java_file_path}")
        return java_file_path
    project_name = get_project_from_path(java_file_path)
    commit = get_commit(f"{AOSP_SYNC_PATH}{project_name}")
    loc = get_loc(java_file_path)
    df_bohr_report = df_bohr_report.assign(
        filename=java_file_path, project_name=project_name, commit=commit, loc=loc
    )
    bohr_report_path = gen_bohr_report_path(java_file_path)
    with CHECK_CSV_PATH_MUTEX:
        if os.path.exists(bohr_report_path):
            # append new data
            df_bohr_report.to_csv(bohr_report_path, index=False, header=False, mode='a')
        else:
            # create headers and file
            df_bohr_report.to_csv(bohr_report_path, index=False)
    return java_file_path

In [8]:
java_file_path = df_java_files.loc[0]["java_file"]
filtered = df_java_files.loc[df_java_files["aosp_project"] == "/mnt/4846A54B46A53A98/AOSP/developers/build"]
futures = []
checkpoints = load_checkpoints()
with ThreadPoolExecutor(max_workers=6) as executor:
    try:
        for idx, row in filtered.iterrows():
            java_file = row["java_file"]
            if java_file in checkpoints:
                logger.info(f"Java file {java_file} already processed, skipping...")
                continue
            f = executor.submit(execute_bohr_report_generation, row["java_file"])
            futures.append(f)
        for f in as_completed(futures):
            path = f
            save_checkpoint(java_file_path)
            logger.info(f"Completed BOHR on {path}.")
    except KeyboardInterrupt:
        logger.error("Keyboard error received, stopping...")
        print("Keyboard error received, stopping...")
        executor.shutdown(wait=True, cancel_futures=True)