In [1]:
import pandas as pd
import logging
logger = logging.getLogger("analysis")
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.ERROR, datefmt='%H:%M:%S')

## Load data + utilities

In [2]:
df_apps = pd.read_csv("data/java_github_top20.csv")
def map_repo_to_project_name(repo_name) -> str:
    match_filter = df_apps["clone_url"].str.endswith(f"{repo_name}.git")
    matched = df_apps[match_filter]
    matched_as_list = list(matched["name"])
    if matched.shape[0] == 0:
        logger.error(f"No project matched \"{repo_name}\"")
        return ""
    if matched.shape[0] > 1:
        logger.warning("Matched more than one project")
        return matched_as_list
    return matched_as_list[0]
def transform_path_to_repo_name(path: str) -> str:
    path = path.split("/")[-1] # get the portion that ends the url in .git
    path = path.split(".git")[0] # removes .git
    return path
def map_project_name_to_repo(project) -> str:
    match_filter = df_apps["name"] == project
    matched = df_apps[match_filter]
    matched_as_list = list(matched["clone_url"])
    if matched.shape[0] == 0:
        logger.error(f"No repo matched \"{project}\"")
        return ""
    if matched.shape[0] > 1:
        logger.warning("Matched more than one repo")
        return [transform_path_to_repo_name(repo) for repo in matched_as_list["clone_url"]]
    return transform_path_to_repo_name(matched_as_list[0])

In [7]:
BOHR_REPORTS_PATH = "./bohr_reports"
PROJECT = "Nextcloud Dev"

df_project_aocs = pd.read_csv(f"{BOHR_REPORTS_PATH}/{map_project_name_to_repo(PROJECT)}_consolidated.csv")

In [9]:
df_project_aocs["Class"].describe()

count                                          1110
unique                                          166
top       com.owncloud.android.files.FileMenuFilter
freq                                             81
Name: Class, dtype: object