In [None]:
import jemma_utils as ju
from enum import Enum
import pandas as pd

class Metrics(Enum):
	NUMBER_OF_CLASSES_IN_PROJECT="number_of_classes_in_project"
	NUMBER_OF_PARAMETERS="number_of_parameters"
	NUMBER_OF_METHODS="number_of_methods"

method_parameters = "./jemma_datasets/properties/Jemma_Properties_Methods_NMPR.csv"
method_parameters_df = pd.read_csv(method_parameters)

classes = "./jemma_datasets/metadata/Jemma_Metadata_Classes.csv"
classes_df = pd.read_csv(classes)
classes_counts = classes_df.value_counts("project_id")

methods = "./jemma_datasets/metadata/Jemma_Metadata_Methods.csv"
methods_df = pd.read_csv(methods)
methods_counts = methods_df.value_counts("project_id")

projects = "./jemma_datasets/metadata/Jemma_Metadata_Projects.csv"
projects_df = pd.read_csv(projects)


In [None]:
method_parameters_merged = pd.merge(methods_df, method_parameters_df, on='method_id', how='inner')

In [None]:
method_parameters_merged['num_parameters'] = pd.to_numeric(method_parameters_merged['num_parameters'], errors='coerce')
method_parameters_merged_sum = method_parameters_merged.groupby("project_id")["num_parameters"].sum()

In [None]:

def extract_metrics_from_project(project_id):
	metrics = {
		"project_id": project_id,
		Metrics.NUMBER_OF_CLASSES_IN_PROJECT.value: classes_counts[project_id] if project_id in classes_counts else -1,
		Metrics.NUMBER_OF_METHODS.value: methods_counts[project_id] if project_id in methods_counts else -1,
		Metrics.NUMBER_OF_PARAMETERS.value: method_parameters_merged_sum[project_id] if project_id in method_parameters_merged_sum else -1
	}	
	return metrics



columns = [
	"project_id",
	Metrics.NUMBER_OF_CLASSES_IN_PROJECT.value,
	Metrics.NUMBER_OF_METHODS.value,
	Metrics.NUMBER_OF_PARAMETERS.value
]
rows = []
for project_id in projects_df['project_id']:
	print(f'Extracting insights for project {project_id}')
	metrics = extract_metrics_from_project(project_id)
	rows.append([
		metrics["project_id"],
		metrics[Metrics.NUMBER_OF_CLASSES_IN_PROJECT.value],
		metrics[Metrics.NUMBER_OF_METHODS.value],
		metrics[Metrics.NUMBER_OF_PARAMETERS.value]
    ])

consolidated_metrics_df = pd.DataFrame(rows, columns=columns)
consolidated_metrics_df.to_csv("50kc_insights.csv")
print(consolidated_metrics_df)
