In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp drive/MyDrive/collected_metrics.zip collected_metrics.zip
!unzip collected_metrics
!rm collected_metrics.zip

In [None]:
!ls

collected_metrics  drive  __MACOSX  sample_data


In [None]:
!find collected_metrics | sed -e "s/[^-][^\/]*\// |/g" -e "s/|\([^ ]\)/|-\1/"

In [None]:
import numpy as np
def optional_load_agg(filename, columns):
  try:
      return pd.read_csv(filename)._get_numeric_data().agg("mean")
  except pd.errors.EmptyDataError:
      df = pd.Series([np.nan] * len(columns), index=columns, dtype = 'float64')
      return df

def optional_load(filename, columns):
  try:
      return pd.read_csv(filename)._get_numeric_data().head(100).astype(float)
  except pd.errors.EmptyDataError:
      df = pd.DataFrame(columns=columns, dtype='float64')
      df.loc[0] = [np.nan] * len(columns)
      return df
  except FileNotFoundError:
      return None

In [None]:
import glob
import pandas as pd

all_repo_class_metrics_file_paths = glob.glob("collected_metrics/*/class.csv")
class_column = pd.read_csv(all_repo_class_metrics_file_paths[0])._get_numeric_data().columns
all_repo_method_metrics_file_paths = glob.glob("collected_metrics/*/method.csv")
method_column = pd.read_csv(all_repo_method_metrics_file_paths[0])._get_numeric_data().columns
all_repo_field_metrics_file_paths = glob.glob("collected_metrics/*/field.csv")
field_column = pd.read_csv(all_repo_field_metrics_file_paths[0])._get_numeric_data().columns
all_repo_variable_metrics_file_paths = glob.glob("collected_metrics/*/variable.csv")
variable_column = pd.read_csv(all_repo_field_metrics_file_paths[0])._get_numeric_data().columns

In [None]:
class Repo:
  def __init__(self, name):
    self.name = name
    self.is_load = False
    self.composite_df = None
  def load(self):
    if self.is_load:
      return self
    self._class_df = optional_load(f"collected_metrics/{self.name}/class.csv", class_column)
    self._method_df = optional_load(f"collected_metrics/{self.name}/method.csv", method_column)
    self._field_df = optional_load(f"collected_metrics/{self.name}/field.csv", field_column)
    self._variable_df = optional_load(f"collected_metrics/{self.name}/variable.csv", variable_column)
    self.is_load = True
    return self
  def class_df(self):
    return self._class_df
  def method_df(self):
    return self._method_df
  def field_df(self):
    return self._field_df
  def variable_df(self):
    return self._variable_df

## Loading Each Repository

In [None]:
import os
repos = [Repo(name).load() for name in list(os.walk("collected_metrics"))[0][1]]

## Objective function definition

In [None]:
import tensorflow as tf

def kruskal_stress(n_df, l_df, distance_tf):
    n_df_tensor = tf.convert_to_tensor(n_df.values, dtype=tf.float64)
    l_df_tensor = tf.convert_to_tensor(l_df.values, dtype=tf.float64)

    n_distances = distance_tf(n_df_tensor[:, None, :], n_df_tensor[None, :, :])
    l_distances = distance_tf(l_df_tensor[:, None, :], l_df_tensor[None, :, :])

    numerator = tf.reduce_sum(tf.square(n_distances - l_distances))
    denominator = tf.reduce_sum(tf.square(n_distances))

    stress = tf.sqrt(numerator) / tf.sqrt(denominator)

    return stress.numpy()

In [None]:
def eq_distance(vec1, vec2):
    vec1 = tf.convert_to_tensor(vec1)
    vec2 = tf.convert_to_tensor(vec2)

    squared_diff = tf.reduce_sum(tf.square(vec1 - vec2), axis=2)

    return tf.sqrt(squared_diff)

In [None]:
from sklearn.impute import SimpleImputer

class ImputedRepo:
  def __init__(self, repo):
    self.repo = repo

  def class_df(self):
    miss_mean_imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
    miss_mean_imputer = miss_mean_imputer.fit(self.repo.class_df())
    imputed_df = miss_mean_imputer.transform(self.repo.class_df())
    return pd.DataFrame(imputed_df, columns = self.repo.class_df().columns)
  def method_df(self):
    miss_mean_imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
    miss_mean_imputer = miss_mean_imputer.fit(self.repo.method_df())
    imputed_df = miss_mean_imputer.transform(self.repo.method_df())
    return pd.DataFrame(imputed_df, columns = self.repo.method_df().columns)
  def field_df(self):
    miss_mean_imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
    miss_mean_imputer = miss_mean_imputer.fit(self.repo.field_df())
    imputed_df = miss_mean_imputer.transform(self.repo.field_df())
    return pd.DataFrame(imputed_df, columns = self.repo.field_df().columns)
  def variable_df(self):
    miss_mean_imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
    miss_mean_imputer = miss_mean_imputer.fit(self.repo.variable_df())
    imputed_df = miss_mean_imputer.transform(self.repo.variable_df())
    return pd.DataFrame(imputed_df, columns = self.repo.variable_df().columns)

In [None]:
class CachedRepo:
  def __init__(self, repo):
    self.repo = repo
    self._class_df = None
    self._method_df = None
    self._field_df = None
    self._variable_df = None

  def class_df(self):
    if self._class_df is None:
      self._class_df = self.repo.class_df()
    return self._class_df
  def method_df(self):
    if self._method_df is None:
      self._method_df = self.repo.method_df()
    return self._method_df
  def field_df(self):
    if self._field_df is None:
      self._field_df = self.repo.field_df()
    return self._field_df
  def variable_df(self):
    if self._variable_df is None:
      self._variable_df = self.repo.variable_df()
    return self._variable_df

In [None]:
class ErrorRepositoryResults:
  def __init__(self, repo, error, distance):
    self.repo = repo
    self.error = error
    self.distance = distance

  def class_error(self, columns):
    return self.error(self.repo.class_df(), self.repo.class_df()[columns], self.distance)

  def method_error(self, columns):
    return self.error(self.repo.method_df(), self.repo.method_df()[columns], self.distance)

In [None]:
from sklearn.model_selection import train_test_split
repos_train, repos_test = train_test_split(repos, test_size=0.2, random_state=1)

# Genetic algorithm

### Class

In [None]:
POPULATION_SIZE = 20
N_OFFSPRINGS = 10
MUTATION_RATE = 0.1
MAX_ITERATIONS = 10
MAX_REPEATS = 10

In [None]:
class_columns = list(repos_train[0].class_df().columns)

In [None]:
def get_initial_population(labels: list[str], n: int) -> list:
    population = []

    for chromosome_index in range(POPULATION_SIZE):
        while True:
            indices = random.sample(range(len(labels)), n)
            chromosome = [labels[index] for index in indices]
            # get some chromosome

            # check that all metrics are unique
            assert len(set(chromosome)) == len(chromosome) == n

            # if subset is not present in population, then push it to the
            # population
            if chromosome not in population:
                population.append(chromosome)
                break

    return population

In [None]:
def fitness_class(columns):
    results = []

    # collect errors
    for i in range(len(repos)):
        repo = CachedRepo(ImputedRepo(repos[i]))

        try:
          result = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).class_error(columns)
        except ValueError:
          continue

        results.append(result)

    # transform to numpy
    results = np.array(results)

    # return a mean
    return np.mean(results)

In [None]:
def get_parents(population):
    # get the fittest parents
    mothers = population[:(2 * N_OFFSPRINGS):2]
    fathers = population[1:(2 * N_OFFSPRINGS + 1):2]

    return mothers, fathers

In [None]:
import random


def crossover(mother, father):
    assert len(mother) == len(father)

    border_left = np.random.randint(0, len(mother) + 1)
    border_right = np.random.randint(0, len(mother) + 1)

    possible_offsprings = [mother, father]

    for border_left in range(0, len(mother) + 1):
        for border_right in range(border_left, len(mother) + 1):
            offspring_1 = mother[:border_left] + father[border_left:border_right] + mother[border_right:]
            if len(set(offspring_1)) == len(offspring_1):
                possible_offsprings.append(offspring_1)

            offspring_2 = father[:border_left] + mother[border_left:border_right] + father[border_right:]
            if len(set(offspring_2)) == len(offspring_2):
                possible_offsprings.append(offspring_2)

    offspring = random.choice(possible_offsprings)

    assert len(set(offspring)) == len(offspring)

    return offspring

In [None]:
def mutate(chromosome):
    new_chromosome = chromosome[:]

    for i, column in enumerate(new_chromosome):
        if random.random() < MUTATION_RATE:
            while True:
                random_gene = random.choice(class_columns)
                if random_gene not in new_chromosome:
                    new_chromosome[i] = random_gene
                    break

    assert len(set(new_chromosome)) == len(new_chromosome)

    return new_chromosome

In [None]:
def get_population_fitness_class(population):
    fitness_results = []
    for chromosome in population:
        fitness_value = fitness_class(chromosome)
        fitness_results.append((chromosome, fitness_value))
    fitness_results.sort(key=lambda x: x[1])
    return fitness_results

In [None]:
def replace_population(population, new_individuals):
    # Add new chromosomes
    population.extend(new_individuals)

    # Sort them by fitness score
    fitness_results = get_population_fitness_class(population)

    print(f"Fitness score: {fitness_results[0][1]}")

    population = [a[0] for a in fitness_results]
    # Remove the chromosomes that have low fitness score
    population = population[:POPULATION_SIZE]

    # Return the updated population
    return population

In [None]:
def evolution_step(population):
    # Get the fittest n_offsprings parents from the given population
    mothers, fathers = get_parents(population)

    # Here we will store the offsprings
    offsprings = []

    # Iterate each pair of mother and father
    for mother, father in zip(mothers, fathers):
        # Make a crossover of them, mutate an offspring
        offspring = mutate(crossover(mother, father))

        # Add the offspring to the list of offsprings
        offsprings.append(offspring)

    # Update the population with new offsprings
    new_population = replace_population(population, offsprings)

    # Return the updated population
    return new_population

In [None]:
import random

In [None]:
for subset_size in range(1, len(class_columns)):
    population = get_initial_population(class_column, subset_size)

    prev_value = -1
    repeats = 0

    for iteration in range(MAX_ITERATIONS):
        population = evolution_step(population)

        fitness_scores = get_population_fitness_class(population)
        best_fitness = fitness_scores[0][1]
        best_result = fitness_scores[0][0]

        if prev_value != best_fitness:
            prev_value = best_fitness
            repeats = 1
        else:
            repeats += 1

        if repeats == MAX_REPEATS:
            break

        print(f"Best fitness: {best_fitness}")
        print(f"Best result: {best_result}")
        print(f"Repeats: {repeats}")
        print(f"Previous value: {prev_value}")
        print()

    fitness_scores = get_population_fitness_class(population)
    print(f"subset size: {subset_size}")
    print(f"Final best fitness: {fitness_scores[0][1]}")
    print(f"Final best result: {fitness_scores[0][0]}")
    print("-------------")
    print()

In [None]:
class_data = {
    1: {"fitness": 0.728992671638118, "subset": ["modifiers"]},
    2: {"fitness": 0.6160998820251482, "subset": ["rfc", "lcom"]},
    3: {"fitness": 0.3148567906030604, "subset": ["dit", "lcom", "modifiers"]},
    4: {
        "fitness": 0.3129877907088629,
        "subset": ["publicFieldsQty", "finalFieldsQty", "modifiers", "lcom"],
    },
    5: {
        "fitness": 0.11383694800707322,
        "subset": ["totalMethodsQty", "staticFieldsQty", "modifiers", "lcom", "loc"],
    },
    6: {
        "fitness": 0.07841392454910531,
        "subset": [
            "lcom",
            "variablesQty",
            "loc",
            "assignmentsQty",
            "modifiers",
            "stringLiteralsQty",
        ],
    },
    7: {
        "fitness": 0.23605238764495703,
        "subset": [
            "visibleMethodsQty",
            "maxNestedBlocksQty",
            "modifiers",
            "lcom",
            "finalMethodsQty",
            "cbo",
            "uniqueWordsQty",
        ],
    },
    8: {
        "fitness": 0.10359928193838702,
        "subset": [
            "noc",
            "numbersQty",
            "lcom",
            "modifiers",
            "loopQty",
            "tryCatchQty",
            "loc",
            "rfc",
        ],
    },
    9: {
        "fitness": 0.08255658136872673,
        "subset": [
            "tcc",
            "modifiers",
            "mathOperationsQty",
            "publicFieldsQty",
            "innerClassesQty",
            "stringLiteralsQty",
            "cboModified",
            "lcom",
            "loc",
        ],
    },
    10: {
        "fitness": 0.10106787309240577,
        "subset": [
            "fanin",
            "loc",
            "parenthesizedExpsQty",
            "lcom",
            "fanout",
            "modifiers",
            "cbo",
            "publicFieldsQty",
            "cboModified",
            "variablesQty",
        ],
    },
    11: {
        "fitness": 0.03669048322739602,
        "subset": [
            "lcom",
            "stringLiteralsQty",
            "defaultFieldsQty",
            "uniqueWordsQty",
            "loc",
            "assignmentsQty",
            "abstractMethodsQty",
            "mathOperationsQty",
            "privateMethodsQty",
            "modifiers",
            "lambdasQty",
        ],
    },
    12: {
        "fitness": 0.03783079120177472,
        "subset": [
            "visibleMethodsQty",
            "uniqueWordsQty",
            "tryCatchQty",
            "lcom",
            "loc",
            "finalFieldsQty",
            "innerClassesQty",
            "defaultFieldsQty",
            "stringLiteralsQty",
            "protectedFieldsQty",
            "modifiers",
            "variablesQty",
        ],
    },
    13: {
        "fitness": 0.030371317436994647,
        "subset": [
            "finalFieldsQty",
            "stringLiteralsQty",
            "visibleMethodsQty",
            "totalMethodsQty",
            "cboModified",
            "assignmentsQty",
            "lcom",
            "loc",
            "modifiers",
            "innerClassesQty",
            "uniqueWordsQty",
            "loopQty",
            "synchronizedMethodsQty",
        ],
    },
    14: {
        "fitness": 0.10229859076213801,
        "subset": [
            "parenthesizedExpsQty",
            "modifiers",
            "defaultMethodsQty",
            "totalFieldsQty",
            "rfc",
            "noc",
            "fanin",
            "maxNestedBlocksQty",
            "lcc",
            "lcom",
            "innerClassesQty",
            "cbo",
            "privateMethodsQty",
            "loc",
        ],
    },
    15: {
        "fitness": 0.04595272589940584,
        "subset": [
            "numbersQty",
            "lambdasQty",
            "totalFieldsQty",
            "wmc",
            "lcom",
            "variablesQty",
            "rfc",
            "comparisonsQty",
            "mathOperationsQty",
            "visibleMethodsQty",
            "loc",
            "uniqueWordsQty",
            "modifiers",
            "finalMethodsQty",
            "fanin",
        ],
    },
    16: {
        "fitness": 0.028369752534867287,
        "subset": [
            "stringLiteralsQty",
            "synchronizedFieldsQty",
            "modifiers",
            "numbersQty",
            "defaultFieldsQty",
            "loc",
            "noc",
            "privateFieldsQty",
            "fanin",
            "dit",
            "mathOperationsQty",
            "rfc",
            "defaultMethodsQty",
            "uniqueWordsQty",
            "lcom",
            "abstractMethodsQty",
        ],
    },
    17: {
        "fitness": 0.029906601614760418,
        "subset": [
            "totalMethodsQty",
            "lcc",
            "modifiers",
            "synchronizedFieldsQty",
            "variablesQty",
            "nosi",
            "returnQty",
            "maxNestedBlocksQty",
            "loc",
            "numbersQty",
            "uniqueWordsQty",
            "privateFieldsQty",
            "mathOperationsQty",
            "loopQty",
            "cbo",
            "lcom",
            "stringLiteralsQty",
        ],
    },
    18: {
        "fitness": 0.030488120285550806,
        "subset": [
            "uniqueWordsQty",
            "modifiers",
            "publicFieldsQty",
            "lcc",
            "totalFieldsQty",
            "stringLiteralsQty",
            "privateMethodsQty",
            "tryCatchQty",
            "lcom",
            "loc",
            "wmc",
            "maxNestedBlocksQty",
            "privateFieldsQty",
            "totalMethodsQty",
            "rfc",
            "cbo",
            "finalFieldsQty",
            "loopQty",
        ],
    },
    19: {
        "fitness": 0.030426303134509596,
        "subset": [
            "anonymousClassesQty",
            "loc",
            "uniqueWordsQty",
            "modifiers",
            "stringLiteralsQty",
            "lcom*",
            "tcc",
            "dit",
            "staticFieldsQty",
            "publicMethodsQty",
            "lambdasQty",
            "synchronizedFieldsQty",
            "totalMethodsQty",
            "lcom",
            "totalFieldsQty",
            "numbersQty",
            "innerClassesQty",
            "synchronizedMethodsQty",
            "assignmentsQty",
        ],
    },
    20: {
        "fitness": 0.030970079434672892,
        "subset": [
            "publicMethodsQty",
            "lambdasQty",
            "mathOperationsQty",
            "staticFieldsQty",
            "wmc",
            "noc",
            "fanin",
            "lcom",
            "uniqueWordsQty",
            "logStatementsQty",
            "privateFieldsQty",
            "loc",
            "cbo",
            "maxNestedBlocksQty",
            "fanout",
            "defaultFieldsQty",
            "stringLiteralsQty",
            "totalMethodsQty",
            "loopQty",
            "modifiers",
        ],
    },
    21: {
        "fitness": 0.03015717223977876,
        "subset": [
            "rfc",
            "totalMethodsQty",
            "dit",
            "returnQty",
            "tryCatchQty",
            "synchronizedMethodsQty",
            "fanin",
            "loc",
            "privateMethodsQty",
            "totalFieldsQty",
            "protectedMethodsQty",
            "privateFieldsQty",
            "uniqueWordsQty",
            "innerClassesQty",
            "publicFieldsQty",
            "visibleMethodsQty",
            "stringLiteralsQty",
            "publicMethodsQty",
            "tcc",
            "modifiers",
            "lcom",
        ],
    },
    22: {
        "fitness": 0.0201295679700671,
        "subset": [
            "variablesQty",
            "returnQty",
            "lcom",
            "anonymousClassesQty",
            "loc",
            "tcc",
            "stringLiteralsQty",
            "comparisonsQty",
            "modifiers",
            "noc",
            "protectedFieldsQty",
            "visibleMethodsQty",
            "lambdasQty",
            "rfc",
            "uniqueWordsQty",
            "fanin",
            "synchronizedFieldsQty",
            "parenthesizedExpsQty",
            "lcc",
            "cboModified",
            "wmc",
            "publicMethodsQty",
        ],
    },
    23: {
        "fitness": 0.01548689188084787,
        "subset": [
            "rfc",
            "lambdasQty",
            "numbersQty",
            "assignmentsQty",
            "lcom",
            "totalMethodsQty",
            "staticMethodsQty",
            "mathOperationsQty",
            "loc",
            "dit",
            "loopQty",
            "logStatementsQty",
            "cbo",
            "anonymousClassesQty",
            "defaultFieldsQty",
            "variablesQty",
            "wmc",
            "lcom*",
            "fanout",
            "modifiers",
            "nosi",
            "uniqueWordsQty",
            "stringLiteralsQty",
        ],
    },
    24: {
        "fitness": 0.015310090179535198,
        "subset": [
            "fanin",
            "parenthesizedExpsQty",
            "tryCatchQty",
            "logStatementsQty",
            "numbersQty",
            "privateMethodsQty",
            "visibleMethodsQty",
            "stringLiteralsQty",
            "wmc",
            "totalMethodsQty",
            "staticMethodsQty",
            "assignmentsQty",
            "uniqueWordsQty",
            "abstractMethodsQty",
            "maxNestedBlocksQty",
            "privateFieldsQty",
            "synchronizedMethodsQty",
            "rfc",
            "lcom",
            "loc",
            "modifiers",
            "dit",
            "finalFieldsQty",
            "nosi",
        ],
    },
    25: {
        "fitness": 0.011314529859779323,
        "subset": [
            "uniqueWordsQty",
            "maxNestedBlocksQty",
            "publicMethodsQty",
            "comparisonsQty",
            "modifiers",
            "noc",
            "numbersQty",
            "cbo",
            "synchronizedFieldsQty",
            "nosi",
            "parenthesizedExpsQty",
            "variablesQty",
            "rfc",
            "logStatementsQty",
            "defaultMethodsQty",
            "returnQty",
            "stringLiteralsQty",
            "lcom",
            "assignmentsQty",
            "fanout",
            "visibleMethodsQty",
            "cboModified",
            "loc",
            "tryCatchQty",
            "publicFieldsQty",
        ],
    },
    26: {
        "fitness": 0.0177483219969043,
        "subset": [
            "protectedMethodsQty",
            "assignmentsQty",
            "uniqueWordsQty",
            "defaultMethodsQty",
            "loopQty",
            "stringLiteralsQty",
            "publicMethodsQty",
            "fanin",
            "noc",
            "totalMethodsQty",
            "modifiers",
            "abstractMethodsQty",
            "mathOperationsQty",
            "parenthesizedExpsQty",
            "tryCatchQty",
            "visibleMethodsQty",
            "logStatementsQty",
            "synchronizedFieldsQty",
            "cbo",
            "lcc",
            "rfc",
            "totalFieldsQty",
            "loc",
            "lcom",
            "numbersQty",
            "lambdasQty",
        ],
    },
    27: {
        "fitness": 0.014014878360699937,
        "subset": [
            "numbersQty",
            "protectedMethodsQty",
            "anonymousClassesQty",
            "cbo",
            "totalMethodsQty",
            "mathOperationsQty",
            "uniqueWordsQty",
            "privateMethodsQty",
            "cboModified",
            "stringLiteralsQty",
            "protectedFieldsQty",
            "rfc",
            "parenthesizedExpsQty",
            "variablesQty",
            "tryCatchQty",
            "modifiers",
            "lcom*",
            "tcc",
            "loc",
            "lambdasQty",
            "wmc",
            "totalFieldsQty",
            "logStatementsQty",
            "lcom",
            "fanout",
            "privateFieldsQty",
            "publicFieldsQty",
        ],
    },
    28: {
        "fitness": 0.008703069744627414,
        "subset": [
            "lcom*",
            "cboModified",
            "rfc",
            "lcc",
            "assignmentsQty",
            "staticMethodsQty",
            "staticFieldsQty",
            "parenthesizedExpsQty",
            "stringLiteralsQty",
            "uniqueWordsQty",
            "anonymousClassesQty",
            "fanin",
            "variablesQty",
            "protectedMethodsQty",
            "totalFieldsQty",
            "loc",
            "abstractMethodsQty",
            "dit",
            "tryCatchQty",
            "visibleMethodsQty",
            "modifiers",
            "noc",
            "wmc",
            "defaultFieldsQty",
            "defaultMethodsQty",
            "tcc",
            "lcom",
            "numbersQty",
        ],
    },
    29: {
        "fitness": 0.010809475810812662,
        "subset": [
            "loc",
            "loopQty",
            "lcom",
            "defaultFieldsQty",
            "rfc",
            "cboModified",
            "fanout",
            "uniqueWordsQty",
            "stringLiteralsQty",
            "visibleMethodsQty",
            "dit",
            "numbersQty",
            "mathOperationsQty",
            "totalMethodsQty",
            "defaultMethodsQty",
            "variablesQty",
            "fanin",
            "modifiers",
            "comparisonsQty",
            "anonymousClassesQty",
            "nosi",
            "abstractMethodsQty",
            "privateMethodsQty",
            "totalFieldsQty",
            "protectedMethodsQty",
            "noc",
            "synchronizedFieldsQty",
            "wmc",
            "publicFieldsQty",
        ],
    },
    30: {
        "fitness": 0.008403610122878285,
        "subset": [
            "stringLiteralsQty",
            "returnQty",
            "defaultMethodsQty",
            "lcc",
            "mathOperationsQty",
            "dit",
            "assignmentsQty",
            "wmc",
            "loc",
            "privateMethodsQty",
            "maxNestedBlocksQty",
            "lcom*",
            "anonymousClassesQty",
            "numbersQty",
            "lcom",
            "totalMethodsQty",
            "privateFieldsQty",
            "fanout",
            "rfc",
            "modifiers",
            "publicFieldsQty",
            "variablesQty",
            "innerClassesQty",
            "synchronizedFieldsQty",
            "finalMethodsQty",
            "totalFieldsQty",
            "uniqueWordsQty",
            "nosi",
            "fanin",
            "staticFieldsQty",
        ],
    },
    31: {
        "fitness": 0.008997303934143056,
        "subset": [
            "numbersQty",
            "abstractMethodsQty",
            "protectedMethodsQty",
            "defaultFieldsQty",
            "returnQty",
            "cboModified",
            "finalFieldsQty",
            "privateFieldsQty",
            "lambdasQty",
            "privateMethodsQty",
            "variablesQty",
            "publicFieldsQty",
            "tryCatchQty",
            "lcom",
            "stringLiteralsQty",
            "synchronizedFieldsQty",
            "mathOperationsQty",
            "totalFieldsQty",
            "rfc",
            "protectedFieldsQty",
            "synchronizedMethodsQty",
            "assignmentsQty",
            "innerClassesQty",
            "wmc",
            "fanout",
            "staticMethodsQty",
            "modifiers",
            "lcom*",
            "finalMethodsQty",
            "uniqueWordsQty",
            "loc",
        ],
    },
    32: {
        "fitness": 0.0038917908951368358,
        "subset": [
            "protectedMethodsQty",
            "noc",
            "cbo",
            "dit",
            "fanout",
            "variablesQty",
            "tryCatchQty",
            "privateFieldsQty",
            "tcc",
            "uniqueWordsQty",
            "parenthesizedExpsQty",
            "rfc",
            "cboModified",
            "publicFieldsQty",
            "numbersQty",
            "returnQty",
            "defaultMethodsQty",
            "fanin",
            "comparisonsQty",
            "protectedFieldsQty",
            "loc",
            "publicMethodsQty",
            "assignmentsQty",
            "privateMethodsQty",
            "nosi",
            "wmc",
            "modifiers",
            "mathOperationsQty",
            "finalMethodsQty",
            "stringLiteralsQty",
            "staticFieldsQty",
            "lcom",
        ],
    },
    33: {
        "fitness": 0.003253881838403075,
        "subset": [
            "tcc",
            "assignmentsQty",
            "publicFieldsQty",
            "synchronizedFieldsQty",
            "maxNestedBlocksQty",
            "fanout",
            "visibleMethodsQty",
            "mathOperationsQty",
            "loc",
            "staticMethodsQty",
            "lcom*",
            "modifiers",
            "defaultMethodsQty",
            "anonymousClassesQty",
            "logStatementsQty",
            "protectedFieldsQty",
            "lcom",
            "cbo",
            "returnQty",
            "dit",
            "protectedMethodsQty",
            "cboModified",
            "fanin",
            "synchronizedMethodsQty",
            "finalFieldsQty",
            "numbersQty",
            "totalMethodsQty",
            "stringLiteralsQty",
            "noc",
            "defaultFieldsQty",
            "rfc",
            "uniqueWordsQty",
            "lambdasQty",
            "variablesQty",
            "finalMethodsQty",
            "wmc",
            "nosi",
            "loopQty",
            "innerClassesQty",
        ],
    },
    34: {
        "fitness": 0.007047611016713278,
        "subset": [
            "variablesQty",
            "wmc",
            "finalFieldsQty",
            "noc",
            "totalFieldsQty",
            "tcc",
            "cbo",
            "logStatementsQty",
            "totalMethodsQty",
            "rfc",
            "anonymousClassesQty",
            "loopQty",
            "assignmentsQty",
            "numbersQty",
            "publicFieldsQty",
            "loc",
            "mathOperationsQty",
            "privateFieldsQty",
            "comparisonsQty",
            "fanin",
            "defaultMethodsQty",
            "publicMethodsQty",
            "nosi",
            "modifiers",
            "stringLiteralsQty",
            "tryCatchQty",
            "lcom",
            "parenthesizedExpsQty",
            "dit",
            "staticFieldsQty",
            "uniqueWordsQty",
            "returnQty",
            "innerClassesQty",
            "lcom*",
        ],
    },
    35: {
        "fitness": 0.0034466316999683397,
        "subset": [
            "assignmentsQty",
            "uniqueWordsQty",
            "comparisonsQty",
            "visibleMethodsQty",
            "protectedMethodsQty",
            "publicFieldsQty",
            "maxNestedBlocksQty",
            "defaultMethodsQty",
            "innerClassesQty",
            "lcom",
            "rfc",
            "loopQty",
            "fanin",
            "loc",
            "stringLiteralsQty",
            "mathOperationsQty",
            "totalMethodsQty",
            "tryCatchQty",
            "staticMethodsQty",
            "privateFieldsQty",
            "nosi",
            "parenthesizedExpsQty",
            "wmc",
            "cboModified",
            "finalMethodsQty",
            "finalFieldsQty",
            "synchronizedFieldsQty",
            "protectedFieldsQty",
            "fanout",
            "returnQty",
            "numbersQty",
            "staticFieldsQty",
            "modifiers",
            "anonymousClassesQty",
            "variablesQty",
        ],
    },
    36: {
        "fitness": 0.0033667544512478494,
        "subset": [
            "nosi",
            "returnQty",
            "fanin",
            "totalFieldsQty",
            "lcom",
            "tcc",
            "comparisonsQty",
            "loc",
            "abstractMethodsQty",
            "lambdasQty",
            "staticFieldsQty",
            "synchronizedMethodsQty",
            "fanout",
            "loopQty",
            "wmc",
            "rfc",
            "visibleMethodsQty",
            "finalFieldsQty",
            "stringLiteralsQty",
            "dit",
            "privateFieldsQty",
            "cbo",
            "lcc",
            "assignmentsQty",
            "anonymousClassesQty",
            "numbersQty",
            "variablesQty",
            "protectedMethodsQty",
            "uniqueWordsQty",
            "defaultFieldsQty",
            "cboModified",
            "staticMethodsQty",
            "publicFieldsQty",
            "noc",
            "synchronizedFieldsQty",
            "modifiers",
        ],
    },
    37: {
        "fitness": 0.0018571642077977466,
        "subset": [
            "numbersQty",
            "maxNestedBlocksQty",
            "noc",
            "finalFieldsQty",
            "tcc",
            "variablesQty",
            "defaultMethodsQty",
            "uniqueWordsQty",
            "lcom",
            "synchronizedFieldsQty",
            "innerClassesQty",
            "totalMethodsQty",
            "publicMethodsQty",
            "visibleMethodsQty",
            "modifiers",
            "rfc",
            "fanin",
            "mathOperationsQty",
            "lcom*",
            "staticMethodsQty",
            "privateMethodsQty",
            "privateFieldsQty",
            "wmc",
            "assignmentsQty",
            "synchronizedMethodsQty",
            "dit",
            "lcc",
            "comparisonsQty",
            "nosi",
            "totalFieldsQty",
            "lambdasQty",
            "stringLiteralsQty",
            "fanout",
            "loc",
            "cboModified",
            "returnQty",
            "cbo",
        ],
    },
    38: {
        "fitness": 0.0016803774431456253,
        "subset": [
            "lambdasQty",
            "noc",
            "protectedFieldsQty",
            "privateMethodsQty",
            "synchronizedMethodsQty",
            "mathOperationsQty",
            "modifiers",
            "lcom",
            "fanout",
            "totalFieldsQty",
            "nosi",
            "variablesQty",
            "fanin",
            "cboModified",
            "abstractMethodsQty",
            "lcc",
            "finalFieldsQty",
            "rfc",
            "visibleMethodsQty",
            "returnQty",
            "privateFieldsQty",
            "cbo",
            "stringLiteralsQty",
            "staticFieldsQty",
            "publicFieldsQty",
            "loc",
            "defaultFieldsQty",
            "publicMethodsQty",
            "assignmentsQty",
            "parenthesizedExpsQty",
            "tryCatchQty",
            "uniqueWordsQty",
            "comparisonsQty",
            "synchronizedFieldsQty",
            "logStatementsQty",
            "wmc",
            "numbersQty",
            "loopQty",
        ],
    },
    39: {
        "fitness": 0.003253881838403075,
        "subset": [
            "tcc",
            "assignmentsQty",
            "publicFieldsQty",
            "synchronizedFieldsQty",
            "maxNestedBlocksQty",
            "fanout",
            "visibleMethodsQty",
            "mathOperationsQty",
            "loc",
            "staticMethodsQty",
            "lcom*",
            "modifiers",
            "defaultMethodsQty",
            "anonymousClassesQty",
            "logStatementsQty",
            "protectedFieldsQty",
            "lcom",
            "cbo",
            "returnQty",
            "dit",
            "protectedMethodsQty",
            "cboModified",
            "fanin",
            "synchronizedMethodsQty",
            "finalFieldsQty",
            "numbersQty",
            "totalMethodsQty",
            "stringLiteralsQty",
            "noc",
            "defaultFieldsQty",
            "rfc",
            "uniqueWordsQty",
            "lambdasQty",
            "variablesQty",
            "finalMethodsQty",
            "wmc",
            "nosi",
            "loopQty",
            "innerClassesQty",
        ],
    },
    40: {
        "fitness": 0.002261565518048223,
        "subset": [
            "loopQty",
            "visibleMethodsQty",
            "synchronizedFieldsQty",
            "tcc",
            "innerClassesQty",
            "fanin",
            "staticFieldsQty",
            "totalMethodsQty",
            "privateMethodsQty",
            "returnQty",
            "comparisonsQty",
            "logStatementsQty",
            "protectedMethodsQty",
            "numbersQty",
            "lambdasQty",
            "anonymousClassesQty",
            "staticMethodsQty",
            "wmc",
            "abstractMethodsQty",
            "rfc",
            "finalFieldsQty",
            "publicMethodsQty",
            "maxNestedBlocksQty",
            "stringLiteralsQty",
            "parenthesizedExpsQty",
            "mathOperationsQty",
            "cbo",
            "loc",
            "dit",
            "lcom",
            "assignmentsQty",
            "modifiers",
            "cboModified",
            "totalFieldsQty",
            "publicFieldsQty",
            "uniqueWordsQty",
            "variablesQty",
            "protectedFieldsQty",
            "tryCatchQty",
            "nosi",
        ],
    },
    41: {
        "fitness": 0.000806830334148319,
        "subset": [
            "uniqueWordsQty",
            "publicMethodsQty",
            "assignmentsQty",
            "cbo",
            "finalFieldsQty",
            "protectedMethodsQty",
            "lcc",
            "privateFieldsQty",
            "innerClassesQty",
            "tryCatchQty",
            "noc",
            "rfc",
            "staticMethodsQty",
            "stringLiteralsQty",
            "anonymousClassesQty",
            "comparisonsQty",
            "synchronizedMethodsQty",
            "modifiers",
            "defaultMethodsQty",
            "wmc",
            "privateMethodsQty",
            "totalFieldsQty",
            "maxNestedBlocksQty",
            "synchronizedFieldsQty",
            "mathOperationsQty",
            "totalMethodsQty",
            "fanout",
            "visibleMethodsQty",
            "cboModified",
            "publicFieldsQty",
            "defaultFieldsQty",
            "staticFieldsQty",
            "fanin",
            "lcom",
            "returnQty",
            "variablesQty",
            "nosi",
            "lcom*",
            "numbersQty",
            "parenthesizedExpsQty",
            "loc",
        ],
    },
    42: {
        "fitness": 0.0010148543970767334,
        "subset": [
            "cboModified",
            "rfc",
            "fanin",
            "publicFieldsQty",
            "totalFieldsQty",
            "tryCatchQty",
            "lcc",
            "lambdasQty",
            "publicMethodsQty",
            "visibleMethodsQty",
            "maxNestedBlocksQty",
            "dit",
            "lcom",
            "protectedMethodsQty",
            "loc",
            "assignmentsQty",
            "tcc",
            "protectedFieldsQty",
            "cbo",
            "totalMethodsQty",
            "modifiers",
            "parenthesizedExpsQty",
            "privateFieldsQty",
            "uniqueWordsQty",
            "synchronizedMethodsQty",
            "nosi",
            "finalFieldsQty",
            "returnQty",
            "variablesQty",
            "abstractMethodsQty",
            "wmc",
            "comparisonsQty",
            "mathOperationsQty",
            "anonymousClassesQty",
            "numbersQty",
            "fanout",
            "defaultMethodsQty",
            "defaultFieldsQty",
            "lcom*",
            "privateMethodsQty",
            "stringLiteralsQty",
            "noc",
        ],
    },
    43: {
        "fitness": 0.001053925303611058,
        "subset": [
            "defaultMethodsQty",
            "rfc",
            "uniqueWordsQty",
            "comparisonsQty",
            "mathOperationsQty",
            "protectedMethodsQty",
            "synchronizedMethodsQty",
            "innerClassesQty",
            "finalFieldsQty",
            "lcc",
            "staticMethodsQty",
            "privateFieldsQty",
            "lambdasQty",
            "parenthesizedExpsQty",
            "wmc",
            "privateMethodsQty",
            "fanout",
            "modifiers",
            "stringLiteralsQty",
            "totalMethodsQty",
            "finalMethodsQty",
            "numbersQty",
            "nosi",
            "noc",
            "loc",
            "dit",
            "variablesQty",
            "maxNestedBlocksQty",
            "returnQty",
            "staticFieldsQty",
            "cboModified",
            "totalFieldsQty",
            "anonymousClassesQty",
            "abstractMethodsQty",
            "fanin",
            "synchronizedFieldsQty",
            "protectedFieldsQty",
            "cbo",
            "publicMethodsQty",
            "lcom",
            "assignmentsQty",
            "visibleMethodsQty",
            "lcom*",
        ],
    },
    44: {
        "fitness": 0.0004460566699934578,
        "subset": [
            "protectedMethodsQty",
            "staticFieldsQty",
            "nosi",
            "parenthesizedExpsQty",
            "synchronizedMethodsQty",
            "wmc",
            "lcc",
            "maxNestedBlocksQty",
            "totalFieldsQty",
            "defaultFieldsQty",
            "abstractMethodsQty",
            "fanout",
            "synchronizedFieldsQty",
            "defaultMethodsQty",
            "loc",
            "visibleMethodsQty",
            "finalFieldsQty",
            "mathOperationsQty",
            "modifiers",
            "logStatementsQty",
            "privateMethodsQty",
            "staticMethodsQty",
            "tryCatchQty",
            "stringLiteralsQty",
            "returnQty",
            "publicFieldsQty",
            "fanin",
            "privateFieldsQty",
            "cbo",
            "comparisonsQty",
            "uniqueWordsQty",
            "assignmentsQty",
            "lcom",
            "noc",
            "cboModified",
            "variablesQty",
            "finalMethodsQty",
            "numbersQty",
            "innerClassesQty",
            "lambdasQty",
            "tcc",
            "totalMethodsQty",
            "publicMethodsQty",
            "rfc",
        ],
    },
    45: {
        "fitness": 0.0002339458408688818,
        "subset": [
            "dit",
            "parenthesizedExpsQty",
            "returnQty",
            "lcom*",
            "publicFieldsQty",
            "synchronizedFieldsQty",
            "staticFieldsQty",
            "loc",
            "defaultMethodsQty",
            "variablesQty",
            "lcom",
            "staticMethodsQty",
            "lambdasQty",
            "loopQty",
            "cbo",
            "synchronizedMethodsQty",
            "publicMethodsQty",
            "protectedFieldsQty",
            "modifiers",
            "abstractMethodsQty",
            "rfc",
            "anonymousClassesQty",
            "finalFieldsQty",
            "stringLiteralsQty",
            "lcc",
            "fanin",
            "cboModified",
            "noc",
            "logStatementsQty",
            "fanout",
            "assignmentsQty",
            "totalMethodsQty",
            "mathOperationsQty",
            "innerClassesQty",
            "finalMethodsQty",
            "privateFieldsQty",
            "defaultFieldsQty",
            "nosi",
            "comparisonsQty",
            "visibleMethodsQty",
            "uniqueWordsQty",
            "wmc",
            "numbersQty",
            "privateMethodsQty",
            "totalFieldsQty",
        ],
    },
    46: {
        "fitness": 5.664015869078378 * 10 ** (-5),
        "subset": [
            "tryCatchQty",
            "nosi",
            "lcc",
            "logStatementsQty",
            "loc",
            "numbersQty",
            "protectedMethodsQty",
            "totalMethodsQty",
            "defaultFieldsQty",
            "parenthesizedExpsQty",
            "abstractMethodsQty",
            "staticMethodsQty",
            "tcc",
            "cboModified",
            "loopQty",
            "cbo",
            "synchronizedMethodsQty",
            "visibleMethodsQty",
            "comparisonsQty",
            "modifiers",
            "stringLiteralsQty",
            "variablesQty",
            "staticFieldsQty",
            "anonymousClassesQty",
            "synchronizedFieldsQty",
            "publicFieldsQty",
            "privateMethodsQty",
            "wmc",
            "assignmentsQty",
            "privateFieldsQty",
            "publicMethodsQty",
            "returnQty",
            "dit",
            "uniqueWordsQty",
            "rfc",
            "lcom",
            "innerClassesQty",
            "mathOperationsQty",
            "totalFieldsQty",
            "maxNestedBlocksQty",
            "defaultMethodsQty",
            "noc",
            "finalFieldsQty",
            "lambdasQty",
            "fanout",
            "fanin",
        ],
    },
    47: {
        "fitness": 2.1673470385361027 * 10 ** (-5),
        "subset": [
            "assignmentsQty",
            "fanout",
            "protectedMethodsQty",
            "visibleMethodsQty",
            "loopQty",
            "anonymousClassesQty",
            "tcc",
            "uniqueWordsQty",
            "maxNestedBlocksQty",
            "defaultMethodsQty",
            "comparisonsQty",
            "finalFieldsQty",
            "rfc",
            "privateFieldsQty",
            "loc",
            "lcc",
            "returnQty",
            "totalMethodsQty",
            "abstractMethodsQty",
            "lcom*",
            "tryCatchQty",
            "dit",
            "finalMethodsQty",
            "publicFieldsQty",
            "logStatementsQty",
            "stringLiteralsQty",
            "numbersQty",
            "totalFieldsQty",
            "variablesQty",
            "innerClassesQty",
            "parenthesizedExpsQty",
            "modifiers",
            "publicMethodsQty",
            "staticMethodsQty",
            "mathOperationsQty",
            "staticFieldsQty",
            "cboModified",
            "lambdasQty",
            "protectedFieldsQty",
            "lcom",
            "fanin",
            "nosi",
            "cbo",
            "wmc",
            "privateMethodsQty",
            "noc",
            "defaultFieldsQty",
        ],
    },
    48: {
        "fitness": 5.780821274226774 * 10 ** (-17),
        "subset": [
            "nosi",
            "loopQty",
            "comparisonsQty",
            "cboModified",
            "maxNestedBlocksQty",
            "fanin",
            "lcom",
            "logStatementsQty",
            "stringLiteralsQty",
            "finalFieldsQty",
            "mathOperationsQty",
            "noc",
            "protectedFieldsQty",
            "finalMethodsQty",
            "cbo",
            "publicFieldsQty",
            "fanout",
            "synchronizedMethodsQty",
            "defaultFieldsQty",
            "innerClassesQty",
            "assignmentsQty",
            "privateFieldsQty",
            "lambdasQty",
            "uniqueWordsQty",
            "tryCatchQty",
            "rfc",
            "abstractMethodsQty",
            "tcc",
            "lcom*",
            "staticMethodsQty",
            "wmc",
            "numbersQty",
            "defaultMethodsQty",
            "publicMethodsQty",
            "returnQty",
            "staticFieldsQty",
            "privateMethodsQty",
            "loc",
            "parenthesizedExpsQty",
            "modifiers",
            "totalFieldsQty",
            "dit",
            "totalMethodsQty",
            "protectedMethodsQty",
            "lcc",
            "variablesQty",
            "visibleMethodsQty",
            "anonymousClassesQty",
        ],
    },
}

In [None]:
# Validation part

cached_test_repos = []

for repo in repos_test:
    repo = CachedRepo(ImputedRepo(repo))
    cached_test_repos.append(repo)

In [None]:
for subset_size in class_data.keys():
    awaited_fitness = class_data[subset_size]["fitness"]
    subset = class_data[subset_size]["subset"]

    errors = []

    for repo in cached_test_repos:
        try:
            error = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).class_error(subset)
            errors.append(error)
        except ValueError:
            pass

    errors = np.array(errors)

    actual_fitness = np.mean(errors)

    print(f"Subset size: {subset_size}")
    print(f"Awaited fitness: {awaited_fitness}")
    print(f"Actual fitness: {actual_fitness}")
    print("-" * 100)

Subset size: 1
Awaited fitness: 0.728992671638118
Actual fitness: 0.7265723049517221
----------------------------------------------------------------------------------------------------
Subset size: 2
Awaited fitness: 0.6160998820251482
Actual fitness: 0.6046969019962544
----------------------------------------------------------------------------------------------------
Subset size: 3
Awaited fitness: 0.3148567906030604
Actual fitness: 0.27845149646136635
----------------------------------------------------------------------------------------------------
Subset size: 4
Awaited fitness: 0.3129877907088629
Actual fitness: 0.2763754024916449
----------------------------------------------------------------------------------------------------
Subset size: 5
Awaited fitness: 0.11383694800707322
Actual fitness: 0.11072974148598609
----------------------------------------------------------------------------------------------------
Subset size: 6
Awaited fitness: 0.07841392454910531
Actual fitn

### Method

In [None]:
POPULATION_SIZE_METHOD = 20
N_OFFSPRINGS_METHOD = 10
MUTATION_RATE_METHOD = 0.1
MAX_ITERATIONS_METHOD = 10
MAX_REPEATS_METHOD = 10

In [None]:
method_columns = list(repos_train[0].method_df().columns)

In [None]:
import random

def get_initial_population_method(labels: list[str], n: int) -> list:
    population = []

    for chromosome_index in range(POPULATION_SIZE_METHOD):
        while True:
            indices = random.sample(range(len(labels)), n)
            chromosome = [labels[index] for index in indices]
            # get some chromosome

            # check that all metrics are unique
            assert len(set(chromosome)) == len(chromosome) == n

            # if subset is not present in population, then push it to the
            # population
            if chromosome not in population:
                population.append(chromosome)
                break

    return population

In [None]:
def fitness_method(columns):
    results = []

    # collect errors
    for i in range(len(repos)):
        repo = CachedRepo(ImputedRepo(repos[i]))

        try:
          result = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).method_error(columns)
        except ValueError:
          continue

        results.append(result)

    # transform to numpy
    results = np.array(results)

    # return a mean
    return np.mean(results)

In [None]:
def get_parents_method(population):
    # get the fittest parents
    mothers = population[:(2 * N_OFFSPRINGS_METHOD):2]
    fathers = population[1:(2 * N_OFFSPRINGS_METHOD + 1):2]

    return mothers, fathers

In [None]:
def crossover_method(mother, father):
    assert len(mother) == len(father)

    possible_offsprings = [mother, father]

    for border_left in range(0, len(mother) + 1):
        for border_right in range(border_left, len(mother) + 1):
            offspring_1 = mother[:border_left] + father[border_left:border_right] + mother[border_right:]
            if len(set(offspring_1)) == len(offspring_1):
                possible_offsprings.append(offspring_1)

            offspring_2 = father[:border_left] + mother[border_left:border_right] + father[border_right:]
            if len(set(offspring_2)) == len(offspring_2):
                possible_offsprings.append(offspring_2)

    offspring = random.choice(possible_offsprings)

    assert len(set(offspring)) == len(offspring)

    return offspring

In [None]:
def mutate_method(chromosome):
    new_chromosome = chromosome[:]

    for i, column in enumerate(new_chromosome):
        if random.random() < MUTATION_RATE_METHOD:
            while True:
                random_gene = random.choice(method_columns)
                if random_gene not in new_chromosome:
                    new_chromosome[i] = random_gene
                    break

    assert len(set(new_chromosome)) == len(new_chromosome)

    return new_chromosome

In [None]:
def get_population_fitness_method(population):
    fitness_results = []
    for chromosome in population:
        fitness_value = fitness_method(chromosome)
        fitness_results.append((chromosome, fitness_value))
    fitness_results.sort(key=lambda x: x[1])
    return fitness_results

In [None]:
def replace_population_method(population, new_individuals):
    # Add new chromosomes
    population.extend(new_individuals)

    # Sort them by fitness score
    fitness_results = get_population_fitness_method(population)

    print(f"Fitness score: {fitness_results[0][1]}")

    population = [a[0] for a in fitness_results]
    # Remove the chromosomes that have low fitness score
    population = population[:POPULATION_SIZE_METHOD]

    # Return the updated population
    return population

In [None]:
def evolution_step_method(population):
    # Get the fittest n_offsprings parents from the given population
    mothers, fathers = get_parents_method(population)

    # Here we will store the offsprings
    offsprings = []

    # Iterate each pair of mother and father
    for mother, father in zip(mothers, fathers):
        # Make a crossover of them, mutate an offspring
        offspring = mutate_method(crossover_method(mother, father))

        # Add the offspring to the list of offsprings
        offsprings.append(offspring)

    # Update the population with new offsprings
    new_population = replace_population_method(population, offsprings)

    # Return the updated population
    return new_population

In [None]:
for subset_size in range(1, len(method_columns)):
    population = get_initial_population_method(method_columns, subset_size)

    prev_value = -1
    repeats = 0

    for iteration in range(MAX_ITERATIONS_METHOD):
        population = evolution_step_method(population)

        fitness_scores = get_population_fitness_method(population)
        best_fitness = fitness_scores[0][1]
        best_result = fitness_scores[0][0]

        assert best_fitness <= prev_value and prev_value != -1 or prev_value == -1

        if prev_value != best_fitness:
            prev_value = best_fitness
            repeats = 1
        else:
            repeats += 1

        if repeats == MAX_REPEATS_METHOD:
            break

        print(f"Best fitness: {best_fitness}")
        print(f"Best result: {best_result}")
        print(f"Repeats: {repeats}")
        print(f"Previous value: {prev_value}")
        print()

    fitness_scores = get_population_fitness_method(population)
    print(f"subset size: {subset_size}")
    print(f"Final best fitness: {fitness_scores[0][1]}")
    print(f"Final best result: {fitness_scores[0][0]}")
    print("-------------")
    print()

In [None]:
method_data = {
    1: {"fitness": 0.7282976013165373, "subset": ["modifiers"]},
    2: {"fitness": 0.35571045785503425, "subset": ["line", "fanout"]},
    3: {"fitness": 0.05022695587607195, "subset": ["tryCatchQty", "modifiers", "line"]},
    4: {
        "fitness": 0.04996524640566263,
        "subset": ["parenthesizedExpsQty", "modifiers", "line", "comparisonsQty"],
    },
    5: {
        "fitness": 0.026848762012333383,
        "subset": [
            "assignmentsQty",
            "cboModified",
            "line",
            "uniqueWordsQty",
            "modifiers",
        ],
    },
    6: {
        "fitness": 0.02158060639113606,
        "subset": ["modifiers", "wmc", "uniqueWordsQty", "line", "fanin", "loc"],
    },
    7: {
        "fitness": 0.023757525687816968,
        "subset": [
            "fanin",
            "uniqueWordsQty",
            "modifiers",
            "methodsInvokedQty",
            "fanout",
            "numbersQty",
            "line",
        ],
    },
    8: {
        "fitness": 0.024234433217802616,
        "subset": [
            "fanout",
            "stringLiteralsQty",
            "modifiers",
            "fanin",
            "uniqueWordsQty",
            "tryCatchQty",
            "wmc",
            "line",
        ],
    },
    9: {
        "fitness": 0.021252406942167817,
        "subset": [
            "wmc",
            "cboModified",
            "uniqueWordsQty",
            "methodsInvokedQty",
            "hasJavaDoc",
            "modifiers",
            "line",
            "fanout",
            "cbo",
        ],
    },
    10: {
        "fitness": 0.01389775320716382,
        "subset": [
            "loc",
            "uniqueWordsQty",
            "line",
            "modifiers",
            "stringLiteralsQty",
            "variablesQty",
            "methodsInvokedQty",
            "fanin",
            "parenthesizedExpsQty",
            "rfc",
        ],
    },
    11: {
        "fitness": 0.02135418012118047,
        "subset": [
            "numbersQty",
            "uniqueWordsQty",
            "wmc",
            "rfc",
            "variablesQty",
            "fanin",
            "innerClassesQty",
            "assignmentsQty",
            "line",
            "modifiers",
            "fanout",
        ],
    },
    12: {
        "fitness": 0.0054386464427430194,
        "subset": [
            "fanin",
            "loc",
            "assignmentsQty",
            "modifiers",
            "fanout",
            "methodsInvokedQty",
            "stringLiteralsQty",
            "cboModified",
            "line",
            "anonymousClassesQty",
            "methodsInvokedLocalQty",
            "uniqueWordsQty",
        ],
    },
    13: {
        "fitness": 0.009215510732802044,
        "subset": [
            "methodsInvokedQty",
            "fanout",
            "modifiers",
            "parametersQty",
            "loc",
            "anonymousClassesQty",
            "innerClassesQty",
            "line",
            "cbo",
            "cboModified",
            "uniqueWordsQty",
            "numbersQty",
            "wmc",
        ],
    },
    14: {
        "fitness": 0.008939223185484015,
        "subset": [
            "returnsQty",
            "anonymousClassesQty",
            "parametersQty",
            "fanin",
            "cboModified",
            "loc",
            "tryCatchQty",
            "methodsInvokedLocalQty",
            "methodsInvokedQty",
            "modifiers",
            "stringLiteralsQty",
            "cbo",
            "uniqueWordsQty",
            "line",
        ],
    },
    15: {
        "fitness": 0.009062925083877248,
        "subset": [
            "fanout",
            "assignmentsQty",
            "modifiers",
            "methodsInvokedQty",
            "cbo",
            "loc",
            "line",
            "methodsInvokedLocalQty",
            "uniqueWordsQty",
            "comparisonsQty",
            "constructor",
            "cboModified",
            "lambdasQty",
            "parenthesizedExpsQty",
            "variablesQty",
        ],
    },
    16: {
        "fitness": 0.006594338858096854,
        "subset": [
            "loc",
            "uniqueWordsQty",
            "line",
            "cbo",
            "maxNestedBlocksQty",
            "modifiers",
            "cboModified",
            "fanout",
            "methodsInvokedQty",
            "innerClassesQty",
            "mathOperationsQty",
            "parenthesizedExpsQty",
            "numbersQty",
            "stringLiteralsQty",
            "comparisonsQty",
            "methodsInvokedLocalQty",
        ],
    },
    17: {
        "fitness": 0.003528514890139445,
        "subset": [
            "fanout",
            "stringLiteralsQty",
            "comparisonsQty",
            "logStatementsQty",
            "cboModified",
            "fanin",
            "rfc",
            "parametersQty",
            "uniqueWordsQty",
            "anonymousClassesQty",
            "line",
            "methodsInvokedIndirectLocalQty",
            "methodsInvokedQty",
            "wmc",
            "loc",
            "constructor",
            "modifiers",
        ],
    },
    18: {
        "fitness": 0.0016796067128774243,
        "subset": [
            "wmc",
            "stringLiteralsQty",
            "numbersQty",
            "modifiers",
            "assignmentsQty",
            "loopQty",
            "innerClassesQty",
            "line",
            "fanin",
            "methodsInvokedIndirectLocalQty",
            "logStatementsQty",
            "returnsQty",
            "cboModified",
            "uniqueWordsQty",
            "fanout",
            "rfc",
            "loc",
            "methodsInvokedQty",
        ],
    },
    19: {
        "fitness": 0.00288786038628067,
        "subset": [
            "rfc",
            "loopQty",
            "maxNestedBlocksQty",
            "assignmentsQty",
            "line",
            "returnsQty",
            "constructor",
            "methodsInvokedQty",
            "loc",
            "parametersQty",
            "fanin",
            "modifiers",
            "fanout",
            "uniqueWordsQty",
            "tryCatchQty",
            "stringLiteralsQty",
            "cboModified",
            "anonymousClassesQty",
            "hasJavaDoc",
        ],
    },
    20: {
        "fitness": 0.002643855890766759,
        "subset": [
            "cboModified",
            "comparisonsQty",
            "line",
            "returnsQty",
            "stringLiteralsQty",
            "innerClassesQty",
            "modifiers",
            "uniqueWordsQty",
            "rfc",
            "anonymousClassesQty",
            "cbo",
            "lambdasQty",
            "fanin",
            "logStatementsQty",
            "constructor",
            "numbersQty",
            "methodsInvokedQty",
            "loopQty",
            "fanout",
            "loc",
        ],
    },
    21: {
        "fitness": 0.001978123609255512,
        "subset": [
            "methodsInvokedQty",
            "loc",
            "uniqueWordsQty",
            "anonymousClassesQty",
            "variablesQty",
            "methodsInvokedLocalQty",
            "lambdasQty",
            "rfc",
            "parenthesizedExpsQty",
            "fanin",
            "stringLiteralsQty",
            "cbo",
            "constructor",
            "fanout",
            "innerClassesQty",
            "modifiers",
            "parametersQty",
            "assignmentsQty",
            "comparisonsQty",
            "line",
            "cboModified",
        ],
    },
    22: {
        "fitness": 0.0008417085722644984,
        "subset": [
            "assignmentsQty",
            "constructor",
            "loc",
            "maxNestedBlocksQty",
            "tryCatchQty",
            "stringLiteralsQty",
            "rfc",
            "cboModified",
            "line",
            "fanout",
            "anonymousClassesQty",
            "cbo",
            "wmc",
            "fanin",
            "variablesQty",
            "loopQty",
            "modifiers",
            "uniqueWordsQty",
            "numbersQty",
            "methodsInvokedLocalQty",
            "methodsInvokedQty",
            "innerClassesQty",
        ],
    },
    23: {
        "fitness": 0.0014382912325781965,
        "subset": [
            "loc",
            "returnsQty",
            "wmc",
            "fanin",
            "methodsInvokedQty",
            "line",
            "stringLiteralsQty",
            "comparisonsQty",
            "rfc",
            "numbersQty",
            "fanout",
            "methodsInvokedIndirectLocalQty",
            "parenthesizedExpsQty",
            "maxNestedBlocksQty",
            "modifiers",
            "anonymousClassesQty",
            "cbo",
            "innerClassesQty",
            "cboModified",
            "variablesQty",
            "parametersQty",
            "constructor",
            "uniqueWordsQty",
        ],
    },
    24: {
        "fitness": 0.0007295089105523778,
        "subset": [
            "hasJavaDoc",
            "stringLiteralsQty",
            "returnsQty",
            "cbo",
            "line",
            "cboModified",
            "methodsInvokedIndirectLocalQty",
            "fanin",
            "loc",
            "assignmentsQty",
            "loopQty",
            "lambdasQty",
            "logStatementsQty",
            "numbersQty",
            "fanout",
            "parametersQty",
            "mathOperationsQty",
            "variablesQty",
            "rfc",
            "comparisonsQty",
            "modifiers",
            "uniqueWordsQty",
            "methodsInvokedQty",
            "tryCatchQty",
        ],
    },
    25: {
        "fitness": 0.0006903533303983124,
        "subset": [
            "comparisonsQty",
            "assignmentsQty",
            "cbo",
            "line",
            "maxNestedBlocksQty",
            "anonymousClassesQty",
            "fanin",
            "stringLiteralsQty",
            "loc",
            "methodsInvokedQty",
            "loopQty",
            "rfc",
            "constructor",
            "tryCatchQty",
            "parametersQty",
            "mathOperationsQty",
            "cboModified",
            "fanout",
            "wmc",
            "lambdasQty",
            "returnsQty",
            "modifiers",
            "uniqueWordsQty",
            "numbersQty",
            "methodsInvokedLocalQty",
        ],
    },
    26: {
        "fitness": 9.595742008534122 * 10 ** (-5),
        "subset": [
            "line",
            "maxNestedBlocksQty",
            "returnsQty",
            "parametersQty",
            "fanout",
            "fanin",
            "variablesQty",
            "methodsInvokedQty",
            "modifiers",
            "loc",
            "wmc",
            "numbersQty",
            "methodsInvokedLocalQty",
            "mathOperationsQty",
            "constructor",
            "cboModified",
            "stringLiteralsQty",
            "uniqueWordsQty",
            "cbo",
            "parenthesizedExpsQty",
            "rfc",
            "innerClassesQty",
            "methodsInvokedIndirectLocalQty",
            "comparisonsQty",
            "hasJavaDoc",
            "assignmentsQty",
        ],
    },
    27: {
        "fitness": 0.0001293810964654912,
        "subset": [
            "innerClassesQty",
            "assignmentsQty",
            "loc",
            "methodsInvokedLocalQty",
            "methodsInvokedQty",
            "stringLiteralsQty",
            "variablesQty",
            "comparisonsQty",
            "hasJavaDoc",
            "modifiers",
            "parametersQty",
            "returnsQty",
            "maxNestedBlocksQty",
            "rfc",
            "mathOperationsQty",
            "numbersQty",
            "fanout",
            "cbo",
            "uniqueWordsQty",
            "loopQty",
            "fanin",
            "wmc",
            "anonymousClassesQty",
            "cboModified",
            "constructor",
            "line",
            "methodsInvokedIndirectLocalQty",
        ],
    },
    28: {
        "fitness": 0.00010058611458049798,
        "subset": [
            "methodsInvokedQty",
            "innerClassesQty",
            "cboModified",
            "returnsQty",
            "anonymousClassesQty",
            "mathOperationsQty",
            "cbo",
            "loc",
            "fanin",
            "variablesQty",
            "rfc",
            "wmc",
            "constructor",
            "uniqueWordsQty",
            "maxNestedBlocksQty",
            "tryCatchQty",
            "methodsInvokedIndirectLocalQty",
            "stringLiteralsQty",
            "lambdasQty",
            "parametersQty",
            "methodsInvokedLocalQty",
            "line",
            "numbersQty",
            "comparisonsQty",
            "fanout",
            "logStatementsQty",
            "assignmentsQty",
            "modifiers",
        ],
    },
    29: {
        "fitness": 2.0945905381087708 * 10 ** (-5),
        "subset": [
            "modifiers",
            "maxNestedBlocksQty",
            "parenthesizedExpsQty",
            "logStatementsQty",
            "line",
            "cboModified",
            "cbo",
            "variablesQty",
            "anonymousClassesQty",
            "comparisonsQty",
            "mathOperationsQty",
            "methodsInvokedQty",
            "rfc",
            "constructor",
            "hasJavaDoc",
            "fanin",
            "loopQty",
            "loc",
            "methodsInvokedLocalQty",
            "uniqueWordsQty",
            "stringLiteralsQty",
            "wmc",
            "returnsQty",
            "assignmentsQty",
            "parametersQty",
            "fanout",
            "methodsInvokedIndirectLocalQty",
            "numbersQty",
            "lambdasQty",
        ],
    },
    30: {
        "fitness": 5.2776204380851505 * 10 ** (-8),
        "subset": [
            "methodsInvokedIndirectLocalQty",
            "returnsQty",
            "line",
            "cboModified",
            "wmc",
            "comparisonsQty",
            "numbersQty",
            "maxNestedBlocksQty",
            "tryCatchQty",
            "variablesQty",
            "assignmentsQty",
            "mathOperationsQty",
            "uniqueWordsQty",
            "parenthesizedExpsQty",
            "modifiers",
            "rfc",
            "constructor",
            "anonymousClassesQty",
            "hasJavaDoc",
            "loopQty",
            "fanin",
            "parametersQty",
            "lambdasQty",
            "logStatementsQty",
            "methodsInvokedQty",
            "loc",
            "stringLiteralsQty",
            "cbo",
            "fanout",
            "methodsInvokedLocalQty",
        ],
    },
}


In [None]:
for subset_size in method_data.keys():
    awaited_fitness = method_data[subset_size]["fitness"]
    subset = method_data[subset_size]["subset"]

    errors = []

    for repo in cached_test_repos:
        try:
            error = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).method_error(subset)
            errors.append(error)
        except ValueError:
            pass

    errors = np.array(errors)

    actual_fitness = np.mean(errors)

    print(f"Subset size: {subset_size}")
    print(f"Awaited fitness: {awaited_fitness}")
    print(f"Actual fitness: {actual_fitness}")
    print("-" * 100)

Subset size: 1
Awaited fitness: 0.7282976013165373
Actual fitness: 0.7827145667050437
----------------------------------------------------------------------------------------------------
Subset size: 2
Awaited fitness: 0.35571045785503425
Actual fitness: 0.28166155218367783
----------------------------------------------------------------------------------------------------
Subset size: 3
Awaited fitness: 0.05022695587607195
Actual fitness: 0.04783523082969985
----------------------------------------------------------------------------------------------------
Subset size: 4
Awaited fitness: 0.04996524640566263
Actual fitness: 0.04771269835655848
----------------------------------------------------------------------------------------------------
Subset size: 5
Awaited fitness: 0.026848762012333383
Actual fitness: 0.029758748876797855
----------------------------------------------------------------------------------------------------
Subset size: 6
Awaited fitness: 0.02158060639113606
Act

# PSO

In [None]:
import pandas as pd

def all_nans(dataset):
    flag = True

    for index, row in dataset.iterrows():
        for cell in row:
            if not pd.isnull(cell):
                flag = False
                break
        if not flag:
            break

    return flag

In [None]:
def total_error_method(columns):
    results = []

    # collect errors
    for repo in repos_train:
        if all_nans(repo.method_df()):
            continue

        repo = CachedRepo(ImputedRepo(repo))

        try:
          result = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).method_error(columns)
        except ValueError:
          continue

        results.append(result)

    # transform to numpy
    results = np.array(results)

    # return a mean
    return np.mean(results)

In [None]:
from sko.PSO import PSO
import numpy as np
from tqdm import tqdm

# Number of top indexes you want to get
N = 10

def error_wrapper(repo):
    def error(x):
        x = np.array(x)

        subset = [columns[i] for i in x.argsort()[-N:][::-1]]

        if len(subset) == 0:
            return float('inf')  # Avoid empty subset
        else:
            try:
                return ErrorRepositoryResults(repo, kruskal_stress, eq_distance).method_error(subset)
            except ValueError:
                return float('inf')

    return error

columns = list(repos_train[0].method_df().columns)
lower_bounds = [0] * len(columns)
upper_bounds = [1] * len(columns)

for subset_size in range (1, len(columns) + 1):
    best_subset = []
    best_error = 1
    for i in tqdm(range(len(repos_train))):
        if all_nans(repos_train[i].method_df()):
            continue

        repo = CachedRepo(ImputedRepo(repos_train[i]))

        N = subset_size

        pso = PSO(func=error_wrapper(repo), n_dim=len(columns),pop=30, max_iter=30, lb=lower_bounds, ub=upper_bounds,w=0.6, c1=2, c2=2, verbose=False)
        pso.record_mode = True
        pso.run()

        x = np.array(pso.gbest_x)

        optimal_subset = [columns[i] for i in x.argsort()[-N:][::-1]]
        optimal_subset_error = total_error_method(optimal_subset)

        if optimal_subset_error < best_error:
            best_error = optimal_subset_error
            best_subset = optimal_subset

    print(f"subset: {best_subset}")
    print(f"subset size: {len(best_subset)}")
    print(f"error: {best_error}")

In [None]:
def total_error_class(columns):
    results = []

    # collect errors
    for repo in repos_train:
        if all_nans(repo.class_df()):
            continue

        repo = CachedRepo(ImputedRepo(repo))

        try:
          result = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).class_error(columns)
        except ValueError:
          continue

        results.append(result)

    # transform to numpy
    results = np.array(results)

    # return a mean
    return np.mean(results)

In [None]:
from sko.PSO import PSO
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

# Number of top indexes you want to get
N = 10


def error_wrapper(repo):
    def error(x):
        x = np.array(x)

        subset = [columns[i] for i in x.argsort()[-N:][::-1]]

        if len(subset) == 0:
            return float('inf')  # Avoid empty subset
        else:
            try:
                return ErrorRepositoryResults(repo, kruskal_stress, eq_distance).class_error(subset)
            except ValueError:
                return float('inf')

    return error


columns = list(repos_train[0].class_df().columns)
lower_bounds = [0] * len(columns)
upper_bounds = [1] * len(columns)

for subset_size in range (1, len(columns) + 1):
    best_subset = []
    best_error = 1
    for i in tqdm(range(len(repos_train))):
        if all_nans(repos_train[i].class_df()):
            continue

        repo = CachedRepo(ImputedRepo(repos_train[i]))

        N = subset_size

        pso = PSO(func=error_wrapper(repo), n_dim=len(columns),pop=30, max_iter=30, lb=lower_bounds, ub=upper_bounds,w=0.6, c1=2, c2=2, verbose=False)
        pso.record_mode = True
        pso.run()

        x = np.array(pso.gbest_x)

        optimal_subset = [columns[i] for i in x.argsort()[-N:][::-1]]
        optimal_subset_error = total_error_class(optimal_subset)

        if optimal_subset_error < best_error:
            best_error = optimal_subset_error
            best_subset = optimal_subset

    print(f"subset: {best_subset}")
    print(f"subset size: {len(best_subset)}")
    print(f"error: {best_error}")

In [None]:
class_pso_data = {
    1: {"fitness": 0.6541729290780735, "subset": ["lcom"]},
    2: {"fitness": 0.3229378892063299, "subset": ["modifiers", "lcom"]},
    3: {"fitness": 0.11851858295500113, "subset": ["lcom", "modifiers", "loc"]},
    4: {
        "fitness": 0.0782189580562296,
        "subset": ["modifiers", "lcom", "uniqueWordsQty", "loc"],
    },
    5: {
        "fitness": 0.046962469746672765,
        "subset": ["loc", "stringLiteralsQty", "uniqueWordsQty", "lcom", "modifiers"],
    },
    6: {
        "fitness": 0.03967794563500625,
        "subset": [
            "loc",
            "uniqueWordsQty",
            "lcom",
            "assignmentsQty",
            "stringLiteralsQty",
            "modifiers",
        ],
    },
    7: {
        "fitness": 0.03382083184249842,
        "subset": [
            "uniqueWordsQty",
            "cboModified",
            "lcom",
            "modifiers",
            "loc",
            "stringLiteralsQty",
            "assignmentsQty",
        ],
    },
    8: {
        "fitness": 0.027865428111338023,
        "subset": [
            "modifiers",
            "uniqueWordsQty",
            "cboModified",
            "rfc",
            "lcom",
            "loc",
            "assignmentsQty",
            "stringLiteralsQty",
        ],
    },
    9: {
        "fitness": 0.022638429372050746,
        "subset": [
            "numbersQty",
            "assignmentsQty",
            "loc",
            "lcom",
            "rfc",
            "stringLiteralsQty",
            "modifiers",
            "cboModified",
            "uniqueWordsQty",
        ],
    },
    10: {
        "fitness": 0.019437743097554826,
        "subset": [
            "uniqueWordsQty",
            "cboModified",
            "wmc",
            "rfc",
            "variablesQty",
            "lcom",
            "assignmentsQty",
            "stringLiteralsQty",
            "modifiers",
            "loc",
        ],
    },
    11: {
        "fitness": 0.017749346679945478,
        "subset": [
            "loc",
            "uniqueWordsQty",
            "cboModified",
            "wmc",
            "rfc",
            "lcom",
            "visibleMethodsQty",
            "assignmentsQty",
            "numbersQty",
            "stringLiteralsQty",
            "modifiers",
        ],
    },
    12: {
        "fitness": 0.016589074536587207,
        "subset": [
            "cbo",
            "loc",
            "uniqueWordsQty",
            "cboModified",
            "rfc",
            "lcom",
            "variablesQty",
            "totalFieldsQty",
            "assignmentsQty",
            "numbersQty",
            "stringLiteralsQty",
            "modifiers",
        ],
    },
    13: {
        "fitness": 0.012554123907059333,
        "subset": [
            "cbo",
            "loc",
            "modifiers",
            "lcom",
            "stringLiteralsQty",
            "numbersQty",
            "assignmentsQty",
            "mathOperationsQty",
            "variablesQty",
            "wmc",
            "cboModified",
            "uniqueWordsQty",
            "rfc",
        ],
    },
    14: {
        "fitness": 0.01066923725295219,
        "subset": [
            "rfc",
            "fanin",
            "uniqueWordsQty",
            "lcom",
            "cboModified",
            "loc",
            "variablesQty",
            "wmc",
            "assignmentsQty",
            "numbersQty",
            "stringLiteralsQty",
            "modifiers",
            "lambdasQty",
            "nosi",
        ],
    },
    15: {
        "fitness": 0.010329893989906596,
        "subset": [
            "publicFieldsQty",
            "numbersQty",
            "modifiers",
            "lcom",
            "rfc",
            "loc",
            "visibleMethodsQty",
            "fanin",
            "stringLiteralsQty",
            "wmc",
            "assignmentsQty",
            "cboModified",
            "uniqueWordsQty",
            "lambdasQty",
            "variablesQty",
        ],
    },
    16: {
        "fitness": 0.0088500440092959,
        "subset": [
            "cboModified",
            "uniqueWordsQty",
            "rfc",
            "wmc",
            "synchronizedFieldsQty",
            "loc",
            "variablesQty",
            "lcom",
            "assignmentsQty",
            "numbersQty",
            "stringLiteralsQty",
            "modifiers",
            "totalFieldsQty",
            "fanin",
            "fanout",
            "returnQty",
        ],
    },
    17: {
        "fitness": 0.007896371959570362,
        "subset": [
            "cbo",
            "assignmentsQty",
            "totalFieldsQty",
            "visibleMethodsQty",
            "loc",
            "lcom",
            "modifiers",
            "stringLiteralsQty",
            "numbersQty",
            "rfc",
            "wmc",
            "variablesQty",
            "fanout",
            "fanin",
            "cboModified",
            "lambdasQty",
            "uniqueWordsQty",
        ],
    },
    18: {
        "fitness": 0.00753467588319265,
        "subset": [
            "totalFieldsQty",
            "loc",
            "rfc",
            "lcom",
            "wmc",
            "stringLiteralsQty",
            "numbersQty",
            "assignmentsQty",
            "lcom*",
            "variablesQty",
            "fanin",
            "privateFieldsQty",
            "modifiers",
            "uniqueWordsQty",
            "returnQty",
            "nosi",
            "cboModified",
            "cbo",
        ],
    },
    19: {
        "fitness": 0.0067332801574629874,
        "subset": [
            "publicFieldsQty",
            "stringLiteralsQty",
            "cboModified",
            "fanin",
            "wmc",
            "rfc",
            "lcom",
            "finalMethodsQty",
            "modifiers",
            "finalFieldsQty",
            "loc",
            "cbo",
            "uniqueWordsQty",
            "numbersQty",
            "assignmentsQty",
            "publicMethodsQty",
            "variablesQty",
            "nosi",
            "fanout",
        ],
    },
    20: {
        "fitness": 0.005953306878027833,
        "subset": [
            "cbo",
            "privateFieldsQty",
            "modifiers",
            "nosi",
            "loc",
            "visibleMethodsQty",
            "lcom",
            "comparisonsQty",
            "rfc",
            "wmc",
            "stringLiteralsQty",
            "numbersQty",
            "assignmentsQty",
            "mathOperationsQty",
            "variablesQty",
            "fanout",
            "fanin",
            "cboModified",
            "uniqueWordsQty",
            "staticFieldsQty",
        ],
    },
    21: {
        "fitness": 0.004394752368703655,
        "subset": [
            "cbo",
            "wmc",
            "totalMethodsQty",
            "visibleMethodsQty",
            "staticFieldsQty",
            "modifiers",
            "lcom",
            "rfc",
            "finalFieldsQty",
            "nosi",
            "publicMethodsQty",
            "returnQty",
            "loc",
            "fanout",
            "uniqueWordsQty",
            "stringLiteralsQty",
            "numbersQty",
            "assignmentsQty",
            "cboModified",
            "fanin",
            "variablesQty",
        ],
    },
    22: {
        "fitness": 0.004548714923067559,
        "subset": [
            "cbo",
            "assignmentsQty",
            "modifiers",
            "totalMethodsQty",
            "loc",
            "visibleMethodsQty",
            "wmc",
            "rfc",
            "defaultMethodsQty",
            "maxNestedBlocksQty",
            "fanin",
            "nosi",
            "cboModified",
            "numbersQty",
            "uniqueWordsQty",
            "lcom",
            "mathOperationsQty",
            "variablesQty",
            "stringLiteralsQty",
            "totalFieldsQty",
            "publicMethodsQty",
            "fanout",
        ],
    },
    23: {
        "fitness": 0.004117518890444596,
        "subset": [
            "cbo",
            "stringLiteralsQty",
            "totalMethodsQty",
            "lcc",
            "staticFieldsQty",
            "modifiers",
            "lcom",
            "nosi",
            "returnQty",
            "wmc",
            "loc",
            "fanout",
            "variablesQty",
            "cboModified",
            "comparisonsQty",
            "numbersQty",
            "parenthesizedExpsQty",
            "rfc",
            "uniqueWordsQty",
            "fanin",
            "assignmentsQty",
            "totalFieldsQty",
            "publicMethodsQty",
        ],
    },
    24: {
        "fitness": 0.0034457774363013407,
        "subset": [
            "cbo",
            "mathOperationsQty",
            "staticMethodsQty",
            "totalFieldsQty",
            "nosi",
            "loc",
            "stringLiteralsQty",
            "returnQty",
            "variablesQty",
            "fanin",
            "uniqueWordsQty",
            "cboModified",
            "publicMethodsQty",
            "lcom",
            "visibleMethodsQty",
            "fanout",
            "privateFieldsQty",
            "assignmentsQty",
            "numbersQty",
            "modifiers",
            "wmc",
            "rfc",
            "defaultMethodsQty",
            "totalMethodsQty",
        ],
    },
    25: {
        "fitness": 0.003825820223615073,
        "subset": [
            "cbo",
            "wmc",
            "defaultMethodsQty",
            "publicMethodsQty",
            "totalMethodsQty",
            "modifiers",
            "lcom",
            "rfc",
            "finalFieldsQty",
            "nosi",
            "loc",
            "returnQty",
            "visibleMethodsQty",
            "loopQty",
            "parenthesizedExpsQty",
            "stringLiteralsQty",
            "numbersQty",
            "assignmentsQty",
            "mathOperationsQty",
            "variablesQty",
            "maxNestedBlocksQty",
            "fanout",
            "fanin",
            "cboModified",
            "uniqueWordsQty",
        ],
    },
    26: {
        "fitness": 0.003366554146940225,
        "subset": [
            "staticFieldsQty",
            "wmc",
            "loc",
            "defaultMethodsQty",
            "totalFieldsQty",
            "tcc",
            "modifiers",
            "stringLiteralsQty",
            "assignmentsQty",
            "variablesQty",
            "fanout",
            "cboModified",
            "uniqueWordsQty",
            "finalFieldsQty",
            "fanin",
            "lcom",
            "numbersQty",
            "lambdasQty",
            "privateMethodsQty",
            "publicFieldsQty",
            "rfc",
            "totalMethodsQty",
            "nosi",
            "returnQty",
            "cbo",
            "visibleMethodsQty",
        ],
    },
    27: {
        "fitness": 0.0023846281881736812,
        "subset": [
            "stringLiteralsQty",
            "loc",
            "totalFieldsQty",
            "fanout",
            "tryCatchQty",
            "numbersQty",
            "lcom",
            "maxNestedBlocksQty",
            "fanin",
            "totalMethodsQty",
            "staticFieldsQty",
            "returnQty",
            "variablesQty",
            "cboModified",
            "modifiers",
            "assignmentsQty",
            "rfc",
            "publicMethodsQty",
            "uniqueWordsQty",
            "wmc",
            "nosi",
            "mathOperationsQty",
            "cbo",
            "visibleMethodsQty",
            "finalFieldsQty",
            "defaultMethodsQty",
            "dit",
        ],
    },
    28: {
        "fitness": 0.0019435808783716483,
        "subset": [
            "publicFieldsQty",
            "returnQty",
            "wmc",
            "lcom",
            "publicMethodsQty",
            "visibleMethodsQty",
            "staticFieldsQty",
            "modifiers",
            "synchronizedFieldsQty",
            "loc",
            "cbo",
            "variablesQty",
            "uniqueWordsQty",
            "logStatementsQty",
            "stringLiteralsQty",
            "rfc",
            "finalFieldsQty",
            "numbersQty",
            "assignmentsQty",
            "cboModified",
            "mathOperationsQty",
            "fanin",
            "maxNestedBlocksQty",
            "totalFieldsQty",
            "privateFieldsQty",
            "fanout",
            "nosi",
            "totalMethodsQty",
        ],
    },
    29: {
        "fitness": 0.001879823222649674,
        "subset": [
            "lcom",
            "stringLiteralsQty",
            "privateFieldsQty",
            "visibleMethodsQty",
            "privateMethodsQty",
            "nosi",
            "loc",
            "returnQty",
            "totalMethodsQty",
            "tryCatchQty",
            "modifiers",
            "mathOperationsQty",
            "variablesQty",
            "rfc",
            "fanin",
            "cboModified",
            "uniqueWordsQty",
            "wmc",
            "defaultMethodsQty",
            "staticFieldsQty",
            "tcc",
            "publicMethodsQty",
            "numbersQty",
            "cbo",
            "assignmentsQty",
            "publicFieldsQty",
            "fanout",
            "comparisonsQty",
            "totalFieldsQty",
        ],
    },
    30: {
        "fitness": 0.0017804662757029573,
        "subset": [
            "publicFieldsQty",
            "loc",
            "cboModified",
            "fanout",
            "wmc",
            "rfc",
            "lcom",
            "totalMethodsQty",
            "publicMethodsQty",
            "staticFieldsQty",
            "modifiers",
            "nosi",
            "cbo",
            "numbersQty",
            "uniqueWordsQty",
            "stringLiteralsQty",
            "lambdasQty",
            "maxNestedBlocksQty",
            "parenthesizedExpsQty",
            "mathOperationsQty",
            "fanin",
            "totalFieldsQty",
            "defaultMethodsQty",
            "returnQty",
            "privateFieldsQty",
            "staticMethodsQty",
            "visibleMethodsQty",
            "assignmentsQty",
            "variablesQty",
            "dit",
        ],
    },
    31: {
        "fitness": 0.0015990077779234493,
        "subset": [
            "publicFieldsQty",
            "stringLiteralsQty",
            "staticFieldsQty",
            "modifiers",
            "privateFieldsQty",
            "staticMethodsQty",
            "totalMethodsQty",
            "defaultMethodsQty",
            "wmc",
            "parenthesizedExpsQty",
            "finalMethodsQty",
            "mathOperationsQty",
            "variablesQty",
            "maxNestedBlocksQty",
            "publicMethodsQty",
            "noc",
            "nosi",
            "comparisonsQty",
            "lcom",
            "uniqueWordsQty",
            "returnQty",
            "rfc",
            "fanin",
            "totalFieldsQty",
            "assignmentsQty",
            "numbersQty",
            "fanout",
            "loc",
            "cboModified",
            "cbo",
            "visibleMethodsQty",
        ],
    },
    32: {
        "fitness": 0.0009870149844462511,
        "subset": [
            "rfc",
            "loc",
            "uniqueWordsQty",
            "protectedMethodsQty",
            "cboModified",
            "fanin",
            "fanout",
            "privateMethodsQty",
            "mathOperationsQty",
            "modifiers",
            "numbersQty",
            "privateFieldsQty",
            "totalMethodsQty",
            "lcc",
            "lcom",
            "nosi",
            "visibleMethodsQty",
            "comparisonsQty",
            "finalFieldsQty",
            "stringLiteralsQty",
            "totalFieldsQty",
            "parenthesizedExpsQty",
            "wmc",
            "variablesQty",
            "cbo",
            "assignmentsQty",
            "defaultMethodsQty",
            "lambdasQty",
            "staticFieldsQty",
            "publicMethodsQty",
            "returnQty",
            "publicFieldsQty",
        ],
    },
    33: {
        "fitness": 0.0010057864904123642,
        "subset": [
            "wmc",
            "stringLiteralsQty",
            "publicMethodsQty",
            "modifiers",
            "privateFieldsQty",
            "protectedFieldsQty",
            "loc",
            "comparisonsQty",
            "fanout",
            "totalFieldsQty",
            "lambdasQty",
            "mathOperationsQty",
            "uniqueWordsQty",
            "maxNestedBlocksQty",
            "assignmentsQty",
            "cboModified",
            "rfc",
            "returnQty",
            "staticMethodsQty",
            "finalFieldsQty",
            "fanin",
            "lcom",
            "totalMethodsQty",
            "visibleMethodsQty",
            "publicFieldsQty",
            "numbersQty",
            "variablesQty",
            "cbo",
            "privateMethodsQty",
            "nosi",
            "defaultMethodsQty",
            "staticFieldsQty",
            "defaultFieldsQty",
        ],
    },
    34: {
        "fitness": 0.0008825086085272561,
        "subset": [
            "publicFieldsQty",
            "totalMethodsQty",
            "returnQty",
            "synchronizedMethodsQty",
            "publicMethodsQty",
            "stringLiteralsQty",
            "finalFieldsQty",
            "assignmentsQty",
            "synchronizedFieldsQty",
            "fanout",
            "fanin",
            "lambdasQty",
            "comparisonsQty",
            "lcom",
            "variablesQty",
            "nosi",
            "numbersQty",
            "privateFieldsQty",
            "uniqueWordsQty",
            "staticFieldsQty",
            "logStatementsQty",
            "modifiers",
            "privateMethodsQty",
            "parenthesizedExpsQty",
            "loc",
            "cboModified",
            "defaultMethodsQty",
            "mathOperationsQty",
            "wmc",
            "rfc",
            "visibleMethodsQty",
            "maxNestedBlocksQty",
            "cbo",
            "totalFieldsQty",
        ],
    },
    35: {
        "fitness": 0.0007659803953415644,
        "subset": [
            "logStatementsQty",
            "numbersQty",
            "loc",
            "totalFieldsQty",
            "visibleMethodsQty",
            "defaultMethodsQty",
            "tryCatchQty",
            "publicMethodsQty",
            "stringLiteralsQty",
            "assignmentsQty",
            "staticFieldsQty",
            "totalMethodsQty",
            "variablesQty",
            "maxNestedBlocksQty",
            "lcom",
            "rfc",
            "noc",
            "cboModified",
            "nosi",
            "publicFieldsQty",
            "modifiers",
            "fanin",
            "wmc",
            "fanout",
            "anonymousClassesQty",
            "returnQty",
            "cbo",
            "privateFieldsQty",
            "comparisonsQty",
            "finalFieldsQty",
            "lambdasQty",
            "uniqueWordsQty",
            "mathOperationsQty",
            "dit",
            "loopQty",
        ],
    },
    36: {
        "fitness": 0.0005845998131910809,
        "subset": [
            "publicFieldsQty",
            "comparisonsQty",
            "fanin",
            "fanout",
            "rfc",
            "lcom",
            "publicMethodsQty",
            "defaultMethodsQty",
            "visibleMethodsQty",
            "privateFieldsQty",
            "loc",
            "returnQty",
            "cbo",
            "variablesQty",
            "stringLiteralsQty",
            "lambdasQty",
            "modifiers",
            "totalFieldsQty",
            "mathOperationsQty",
            "lcom*",
            "uniqueWordsQty",
            "dit",
            "finalFieldsQty",
            "loopQty",
            "wmc",
            "staticFieldsQty",
            "numbersQty",
            "tcc",
            "nosi",
            "assignmentsQty",
            "staticMethodsQty",
            "cboModified",
            "maxNestedBlocksQty",
            "totalMethodsQty",
            "parenthesizedExpsQty",
            "logStatementsQty",
        ],
    },
    37: {
        "fitness": 0.0005070561101351682,
        "subset": [
            "publicFieldsQty",
            "assignmentsQty",
            "cboModified",
            "privateMethodsQty",
            "defaultFieldsQty",
            "synchronizedFieldsQty",
            "loopQty",
            "comparisonsQty",
            "cbo",
            "lcc",
            "fanin",
            "variablesQty",
            "synchronizedMethodsQty",
            "parenthesizedExpsQty",
            "visibleMethodsQty",
            "totalFieldsQty",
            "nosi",
            "lcom",
            "loc",
            "totalMethodsQty",
            "dit",
            "rfc",
            "stringLiteralsQty",
            "defaultMethodsQty",
            "mathOperationsQty",
            "numbersQty",
            "uniqueWordsQty",
            "wmc",
            "modifiers",
            "returnQty",
            "staticFieldsQty",
            "fanout",
            "logStatementsQty",
            "finalFieldsQty",
            "privateFieldsQty",
            "lambdasQty",
            "publicMethodsQty",
        ],
    },
    38: {
        "fitness": 0.0003567068532817493,
        "subset": [
            "logStatementsQty",
            "modifiers",
            "abstractMethodsQty",
            "loopQty",
            "privateMethodsQty",
            "tryCatchQty",
            "parenthesizedExpsQty",
            "publicMethodsQty",
            "numbersQty",
            "loc",
            "mathOperationsQty",
            "rfc",
            "dit",
            "wmc",
            "lambdasQty",
            "cboModified",
            "variablesQty",
            "lcom",
            "cbo",
            "totalMethodsQty",
            "stringLiteralsQty",
            "privateFieldsQty",
            "uniqueWordsQty",
            "returnQty",
            "fanout",
            "assignmentsQty",
            "visibleMethodsQty",
            "fanin",
            "defaultFieldsQty",
            "defaultMethodsQty",
            "nosi",
            "anonymousClassesQty",
            "comparisonsQty",
            "totalFieldsQty",
            "publicFieldsQty",
            "staticFieldsQty",
            "staticMethodsQty",
            "finalFieldsQty",
        ],
    },
    39: {
        "fitness": 0.00037902615927096797,
        "subset": [
            "logStatementsQty",
            "defaultFieldsQty",
            "loopQty",
            "comparisonsQty",
            "privateFieldsQty",
            "stringLiteralsQty",
            "visibleMethodsQty",
            "assignmentsQty",
            "protectedMethodsQty",
            "publicMethodsQty",
            "tcc",
            "lcom",
            "fanout",
            "cboModified",
            "protectedFieldsQty",
            "tryCatchQty",
            "rfc",
            "publicFieldsQty",
            "numbersQty",
            "variablesQty",
            "nosi",
            "totalFieldsQty",
            "finalMethodsQty",
            "dit",
            "lambdasQty",
            "returnQty",
            "loc",
            "modifiers",
            "totalMethodsQty",
            "fanin",
            "cbo",
            "parenthesizedExpsQty",
            "defaultMethodsQty",
            "mathOperationsQty",
            "uniqueWordsQty",
            "finalFieldsQty",
            "wmc",
            "staticFieldsQty",
            "noc",
        ],
    },
    40: {
        "fitness": 0.000178256797600767,
        "subset": [
            "rfc",
            "totalMethodsQty",
            "lambdasQty",
            "innerClassesQty",
            "cboModified",
            "maxNestedBlocksQty",
            "staticFieldsQty",
            "modifiers",
            "publicMethodsQty",
            "fanout",
            "stringLiteralsQty",
            "noc",
            "nosi",
            "comparisonsQty",
            "uniqueWordsQty",
            "totalFieldsQty",
            "variablesQty",
            "mathOperationsQty",
            "fanin",
            "cbo",
            "lcom",
            "finalFieldsQty",
            "defaultMethodsQty",
            "privateFieldsQty",
            "returnQty",
            "privateMethodsQty",
            "defaultFieldsQty",
            "publicFieldsQty",
            "loc",
            "staticMethodsQty",
            "wmc",
            "logStatementsQty",
            "numbersQty",
            "visibleMethodsQty",
            "loopQty",
            "anonymousClassesQty",
            "assignmentsQty",
            "dit",
            "parenthesizedExpsQty",
            "tryCatchQty",
        ],
    },
    41: {
        "fitness": 0.00016713574019355405,
        "subset": [
            "logStatementsQty",
            "stringLiteralsQty",
            "rfc",
            "publicMethodsQty",
            "abstractMethodsQty",
            "finalFieldsQty",
            "returnQty",
            "tryCatchQty",
            "parenthesizedExpsQty",
            "publicFieldsQty",
            "cbo",
            "uniqueWordsQty",
            "maxNestedBlocksQty",
            "anonymousClassesQty",
            "lambdasQty",
            "comparisonsQty",
            "variablesQty",
            "modifiers",
            "staticFieldsQty",
            "defaultMethodsQty",
            "lcom",
            "visibleMethodsQty",
            "privateMethodsQty",
            "totalMethodsQty",
            "numbersQty",
            "fanout",
            "fanin",
            "defaultFieldsQty",
            "noc",
            "loc",
            "totalFieldsQty",
            "wmc",
            "nosi",
            "assignmentsQty",
            "staticMethodsQty",
            "privateFieldsQty",
            "loopQty",
            "cboModified",
            "dit",
            "mathOperationsQty",
            "lcc",
        ],
    },
    42: {
        "fitness": 0.00015241097861640838,
        "subset": [
            "publicFieldsQty",
            "stringLiteralsQty",
            "totalFieldsQty",
            "modifiers",
            "privateFieldsQty",
            "defaultFieldsQty",
            "totalMethodsQty",
            "loc",
            "returnQty",
            "noc",
            "maxNestedBlocksQty",
            "wmc",
            "staticMethodsQty",
            "parenthesizedExpsQty",
            "publicMethodsQty",
            "numbersQty",
            "fanin",
            "mathOperationsQty",
            "assignmentsQty",
            "tryCatchQty",
            "defaultMethodsQty",
            "nosi",
            "tcc",
            "uniqueWordsQty",
            "rfc",
            "cboModified",
            "fanout",
            "visibleMethodsQty",
            "cbo",
            "lcom",
            "logStatementsQty",
            "staticFieldsQty",
            "dit",
            "protectedMethodsQty",
            "comparisonsQty",
            "variablesQty",
            "finalFieldsQty",
            "lambdasQty",
            "anonymousClassesQty",
            "lcc",
            "loopQty",
            "innerClassesQty",
        ],
    },
    43: {
        "fitness": 0.00014182674646162998,
        "subset": [
            "dit",
            "comparisonsQty",
            "uniqueWordsQty",
            "lambdasQty",
            "protectedFieldsQty",
            "cboModified",
            "maxNestedBlocksQty",
            "fanout",
            "defaultFieldsQty",
            "finalMethodsQty",
            "defaultMethodsQty",
            "stringLiteralsQty",
            "synchronizedMethodsQty",
            "noc",
            "modifiers",
            "mathOperationsQty",
            "loopQty",
            "rfc",
            "privateFieldsQty",
            "publicFieldsQty",
            "nosi",
            "assignmentsQty",
            "innerClassesQty",
            "totalMethodsQty",
            "wmc",
            "logStatementsQty",
            "privateMethodsQty",
            "variablesQty",
            "staticFieldsQty",
            "loc",
            "numbersQty",
            "parenthesizedExpsQty",
            "visibleMethodsQty",
            "totalFieldsQty",
            "cbo",
            "tryCatchQty",
            "staticMethodsQty",
            "returnQty",
            "lcom",
            "publicMethodsQty",
            "fanin",
            "finalFieldsQty",
            "protectedMethodsQty",
        ],
    },
    44: {
        "fitness": 5.050054510047045 * 10 ** (-5),
        "subset": [
            "publicFieldsQty",
            "noc",
            "defaultFieldsQty",
            "returnQty",
            "wmc",
            "protectedFieldsQty",
            "rfc",
            "staticFieldsQty",
            "nosi",
            "totalFieldsQty",
            "variablesQty",
            "fanin",
            "cboModified",
            "lambdasQty",
            "totalMethodsQty",
            "maxNestedBlocksQty",
            "numbersQty",
            "privateMethodsQty",
            "assignmentsQty",
            "dit",
            "mathOperationsQty",
            "stringLiteralsQty",
            "publicMethodsQty",
            "parenthesizedExpsQty",
            "finalFieldsQty",
            "privateFieldsQty",
            "loc",
            "lcc",
            "tryCatchQty",
            "lcom",
            "uniqueWordsQty",
            "logStatementsQty",
            "defaultMethodsQty",
            "staticMethodsQty",
            "comparisonsQty",
            "cbo",
            "modifiers",
            "tcc",
            "visibleMethodsQty",
            "innerClassesQty",
            "loopQty",
            "anonymousClassesQty",
            "fanout",
            "protectedMethodsQty",
        ],
    },
    45: {
        "fitness": 4.395711421446147 * 10 ** (-5),
        "subset": [
            "logStatementsQty",
            "loopQty",
            "cboModified",
            "dit",
            "rfc",
            "lcom",
            "lcom*",
            "privateMethodsQty",
            "protectedMethodsQty",
            "privateFieldsQty",
            "returnQty",
            "cbo",
            "stringLiteralsQty",
            "maxNestedBlocksQty",
            "tryCatchQty",
            "totalMethodsQty",
            "lcc",
            "mathOperationsQty",
            "wmc",
            "fanin",
            "visibleMethodsQty",
            "totalFieldsQty",
            "staticMethodsQty",
            "publicFieldsQty",
            "parenthesizedExpsQty",
            "finalFieldsQty",
            "fanout",
            "defaultFieldsQty",
            "uniqueWordsQty",
            "assignmentsQty",
            "anonymousClassesQty",
            "noc",
            "loc",
            "tcc",
            "comparisonsQty",
            "numbersQty",
            "lambdasQty",
            "innerClassesQty",
            "modifiers",
            "nosi",
            "staticFieldsQty",
            "publicMethodsQty",
            "variablesQty",
            "defaultMethodsQty",
            "protectedFieldsQty",
        ],
    },
    46: {
        "fitness": 1.7968453817759388 * 10 ** (-5),
        "subset": [
            "nosi",
            "parenthesizedExpsQty",
            "lambdasQty",
            "modifiers",
            "anonymousClassesQty",
            "wmc",
            "finalMethodsQty",
            "mathOperationsQty",
            "protectedMethodsQty",
            "lcom",
            "fanout",
            "assignmentsQty",
            "innerClassesQty",
            "stringLiteralsQty",
            "cbo",
            "totalMethodsQty",
            "rfc",
            "loc",
            "protectedFieldsQty",
            "logStatementsQty",
            "returnQty",
            "cboModified",
            "maxNestedBlocksQty",
            "tcc",
            "uniqueWordsQty",
            "staticMethodsQty",
            "comparisonsQty",
            "variablesQty",
            "numbersQty",
            "visibleMethodsQty",
            "privateFieldsQty",
            "fanin",
            "finalFieldsQty",
            "publicFieldsQty",
            "loopQty",
            "noc",
            "defaultMethodsQty",
            "publicMethodsQty",
            "dit",
            "defaultFieldsQty",
            "tryCatchQty",
            "synchronizedMethodsQty",
            "staticFieldsQty",
            "totalFieldsQty",
            "privateMethodsQty",
            "lcc",
        ],
    },
    47: {
        "fitness": 8.247886395057555 * 10 ** (-6),
        "subset": [
            "lcc",
            "assignmentsQty",
            "protectedMethodsQty",
            "loopQty",
            "comparisonsQty",
            "modifiers",
            "publicMethodsQty",
            "finalFieldsQty",
            "defaultMethodsQty",
            "variablesQty",
            "protectedFieldsQty",
            "lcom",
            "wmc",
            "staticFieldsQty",
            "nosi",
            "synchronizedMethodsQty",
            "visibleMethodsQty",
            "rfc",
            "defaultFieldsQty",
            "cboModified",
            "privateMethodsQty",
            "maxNestedBlocksQty",
            "parenthesizedExpsQty",
            "mathOperationsQty",
            "finalMethodsQty",
            "returnQty",
            "cbo",
            "publicFieldsQty",
            "staticMethodsQty",
            "numbersQty",
            "logStatementsQty",
            "loc",
            "tryCatchQty",
            "privateFieldsQty",
            "lambdasQty",
            "fanin",
            "uniqueWordsQty",
            "fanout",
            "totalFieldsQty",
            "totalMethodsQty",
            "stringLiteralsQty",
            "tcc",
            "lcom*",
            "anonymousClassesQty",
            "innerClassesQty",
            "noc",
            "dit",
        ],
    },
    48: {
        "fitness": 1.0316200887784086 * 10 ** (-17),
        "subset": [
            "cbo",
            "loc",
            "cboModified",
            "lambdasQty",
            "tcc",
            "visibleMethodsQty",
            "mathOperationsQty",
            "assignmentsQty",
            "abstractMethodsQty",
            "finalFieldsQty",
            "parenthesizedExpsQty",
            "tryCatchQty",
            "publicMethodsQty",
            "dit",
            "logStatementsQty",
            "lcc",
            "uniqueWordsQty",
            "numbersQty",
            "defaultFieldsQty",
            "comparisonsQty",
            "protectedFieldsQty",
            "noc",
            "wmc",
            "lcom*",
            "synchronizedMethodsQty",
            "rfc",
            "innerClassesQty",
            "modifiers",
            "lcom",
            "returnQty",
            "staticFieldsQty",
            "privateMethodsQty",
            "loopQty",
            "nosi",
            "fanin",
            "publicFieldsQty",
            "protectedMethodsQty",
            "privateFieldsQty",
            "totalMethodsQty",
            "staticMethodsQty",
            "anonymousClassesQty",
            "maxNestedBlocksQty",
            "defaultMethodsQty",
            "variablesQty",
            "finalMethodsQty",
            "totalFieldsQty",
            "fanout",
            "stringLiteralsQty",
        ],
    },
    49: {
        "fitness": 1.0374014712470986 * 10 ** (-17),
        "subset": [
            "finalFieldsQty",
            "fanin",
            "staticFieldsQty",
            "finalMethodsQty",
            "publicMethodsQty",
            "loopQty",
            "maxNestedBlocksQty",
            "logStatementsQty",
            "uniqueWordsQty",
            "staticMethodsQty",
            "privateMethodsQty",
            "abstractMethodsQty",
            "tcc",
            "modifiers",
            "wmc",
            "protectedFieldsQty",
            "anonymousClassesQty",
            "cboModified",
            "publicFieldsQty",
            "tryCatchQty",
            "lcom",
            "visibleMethodsQty",
            "numbersQty",
            "totalFieldsQty",
            "dit",
            "privateFieldsQty",
            "variablesQty",
            "assignmentsQty",
            "totalMethodsQty",
            "loc",
            "comparisonsQty",
            "parenthesizedExpsQty",
            "synchronizedFieldsQty",
            "stringLiteralsQty",
            "cbo",
            "nosi",
            "mathOperationsQty",
            "fanout",
            "defaultMethodsQty",
            "lambdasQty",
            "protectedMethodsQty",
            "lcom*",
            "defaultFieldsQty",
            "lcc",
            "synchronizedMethodsQty",
            "rfc",
            "noc",
            "returnQty",
            "innerClassesQty",
        ],
    },
}


In [None]:
for subset_size in class_pso_data.keys():
    awaited_fitness = class_pso_data[subset_size]["fitness"]
    subset = class_pso_data[subset_size]["subset"]

    errors = []

    for repo in cached_test_repos:
        try:
            error = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).class_error(subset)
            errors.append(error)
        except ValueError:
            pass

    errors = np.array(errors)

    actual_fitness = np.mean(errors)

    print(f"Subset size: {subset_size}")
    print(f"Awaited fitness: {awaited_fitness}")
    print(f"Actual fitness: {actual_fitness}")
    print("-" * 100)

Subset size: 1
Awaited fitness: 0.6541729290780735
Actual fitness: 0.6235844236123657
----------------------------------------------------------------------------------------------------
Subset size: 2
Awaited fitness: 0.3229378892063299
Actual fitness: 0.2788109194079246
----------------------------------------------------------------------------------------------------
Subset size: 3
Awaited fitness: 0.11851858295500113
Actual fitness: 0.11187533412286181
----------------------------------------------------------------------------------------------------
Subset size: 4
Awaited fitness: 0.0782189580562296
Actual fitness: 0.06874952030698206
----------------------------------------------------------------------------------------------------
Subset size: 5
Awaited fitness: 0.046962469746672765
Actual fitness: 0.03864488500648484
----------------------------------------------------------------------------------------------------
Subset size: 6
Awaited fitness: 0.03967794563500625
Actual 

In [None]:
method_pco_data = {
    1: {
        "fitness": 0.36959324895916246,
        "subset": ["line"],
    },
    2: {
        "fitness": 0.05595463069500887,
        "subset": ["line", "modifiers"],
    },
    3: {
        "fitness": 0.04035934073646,
        "subset": ["loc", "modifiers", "line"],
    },
    4: {
        "fitness": 0.028780742768303638,
        "subset": ["loc", "modifiers", "uniqueWordsQty", "line"],
    },
    5: {
        "fitness": 0.018701936244252038,
        "subset": ["modifiers", "uniqueWordsQty", "line", "cboModified", "loc"],
    },
    6: {
        "fitness": 0.015213928278104074,
        "subset": [
            "cboModified",
            "modifiers",
            "uniqueWordsQty",
            "line",
            "loc",
            "fanout",
        ],
    },
    7: {
        "fitness": 0.012378932911979943,
        "subset": [
            "fanout",
            "modifiers",
            "uniqueWordsQty",
            "loc",
            "line",
            "cboModified",
            "rfc",
        ],
    },
    8: {
        "fitness": 0.009105007049359399,
        "subset": [
            "stringLiteralsQty",
            "line",
            "fanout",
            "loc",
            "rfc",
            "uniqueWordsQty",
            "modifiers",
            "cboModified",
        ],
    },
    9: {
        "fitness": 0.008002053914751965,
        "subset": [
            "stringLiteralsQty",
            "cboModified",
            "fanout",
            "loc",
            "numbersQty",
            "rfc",
            "line",
            "uniqueWordsQty",
            "modifiers",
        ],
    },
    10: {
        "fitness": 0.006913613933788978,
        "subset": [
            "modifiers",
            "uniqueWordsQty",
            "line",
            "cboModified",
            "maxNestedBlocksQty",
            "loc",
            "stringLiteralsQty",
            "methodsInvokedQty",
            "fanout",
            "rfc",
        ],
    },
    11: {
        "fitness": 0.004986702953593242,
        "subset": [
            "loc",
            "stringLiteralsQty",
            "rfc",
            "fanout",
            "fanin",
            "cboModified",
            "line",
            "numbersQty",
            "modifiers",
            "uniqueWordsQty",
            "assignmentsQty",
        ],
    },
    12: {
        "fitness": 0.0031810510284981555,
        "subset": [
            "modifiers",
            "uniqueWordsQty",
            "line",
            "cbo",
            "cboModified",
            "fanin",
            "fanout",
            "numbersQty",
            "stringLiteralsQty",
            "rfc",
            "loc",
            "methodsInvokedQty",
        ],
    },
    13: {
        "fitness": 0.0020421685546196645,
        "subset": [
            "numbersQty",
            "assignmentsQty",
            "modifiers",
            "methodsInvokedQty",
            "line",
            "rfc",
            "fanout",
            "stringLiteralsQty",
            "uniqueWordsQty",
            "loc",
            "fanin",
            "variablesQty",
            "cboModified",
        ],
    },
    14: {
        "fitness": 0.0018548764642826478,
        "subset": [
            "methodsInvokedQty",
            "line",
            "cbo",
            "cboModified",
            "fanout",
            "uniqueWordsQty",
            "returnsQty",
            "stringLiteralsQty",
            "rfc",
            "loc",
            "fanin",
            "numbersQty",
            "modifiers",
            "assignmentsQty",
        ],
    },
    15: {
        "fitness": 0.0014372523554459927,
        "subset": [
            "constructor",
            "cboModified",
            "methodsInvokedQty",
            "wmc",
            "stringLiteralsQty",
            "loc",
            "assignmentsQty",
            "numbersQty",
            "cbo",
            "line",
            "uniqueWordsQty",
            "modifiers",
            "rfc",
            "fanout",
            "fanin",
        ],
    },
    16: {
        "fitness": 0.0014623292786715492,
        "subset": [
            "numbersQty",
            "fanin",
            "loc",
            "methodsInvokedQty",
            "rfc",
            "wmc",
            "fanout",
            "stringLiteralsQty",
            "variablesQty",
            "cboModified",
            "line",
            "uniqueWordsQty",
            "methodsInvokedLocalQty",
            "returnsQty",
            "modifiers",
            "assignmentsQty",
        ],
    },
    17: {
        "fitness": 0.0009769964206714729,
        "subset": [
            "loc",
            "fanin",
            "uniqueWordsQty",
            "methodsInvokedQty",
            "maxNestedBlocksQty",
            "assignmentsQty",
            "variablesQty",
            "fanout",
            "modifiers",
            "line",
            "rfc",
            "cboModified",
            "numbersQty",
            "wmc",
            "parenthesizedExpsQty",
            "cbo",
            "stringLiteralsQty",
        ],
    },
    18: {
        "fitness": 0.0005102464156062268,
        "subset": [
            "methodsInvokedQty",
            "numbersQty",
            "uniqueWordsQty",
            "line",
            "cboModified",
            "fanin",
            "mathOperationsQty",
            "modifiers",
            "loc",
            "parametersQty",
            "assignmentsQty",
            "rfc",
            "fanout",
            "returnsQty",
            "variablesQty",
            "wmc",
            "stringLiteralsQty",
            "cbo",
        ],
    },
    19: {
        "fitness": 0.0004426583233477651,
        "subset": [
            "loc",
            "assignmentsQty",
            "rfc",
            "line",
            "comparisonsQty",
            "parametersQty",
            "stringLiteralsQty",
            "cbo",
            "lambdasQty",
            "cboModified",
            "modifiers",
            "fanin",
            "wmc",
            "fanout",
            "numbersQty",
            "variablesQty",
            "uniqueWordsQty",
            "mathOperationsQty",
            "methodsInvokedQty",
        ],
    },
    20: {
        "fitness": 0.00034733679118127285,
        "subset": [
            "loc",
            "comparisonsQty",
            "returnsQty",
            "lambdasQty",
            "fanout",
            "variablesQty",
            "cbo",
            "line",
            "cboModified",
            "assignmentsQty",
            "rfc",
            "wmc",
            "numbersQty",
            "uniqueWordsQty",
            "fanin",
            "parametersQty",
            "mathOperationsQty",
            "methodsInvokedQty",
            "stringLiteralsQty",
            "modifiers",
        ],
    },
    21: {
        "fitness": 0.0003015796321330578,
        "subset": [
            "rfc",
            "fanin",
            "modifiers",
            "uniqueWordsQty",
            "lambdasQty",
            "parametersQty",
            "variablesQty",
            "maxNestedBlocksQty",
            "methodsInvokedQty",
            "comparisonsQty",
            "mathOperationsQty",
            "numbersQty",
            "loc",
            "stringLiteralsQty",
            "wmc",
            "line",
            "assignmentsQty",
            "cboModified",
            "returnsQty",
            "cbo",
            "fanout",
        ],
    },
    22: {
        "fitness": 0.000251870828508292,
        "subset": [
            "stringLiteralsQty",
            "cboModified",
            "modifiers",
            "uniqueWordsQty",
            "line",
            "returnsQty",
            "anonymousClassesQty",
            "parametersQty",
            "methodsInvokedLocalQty",
            "wmc",
            "numbersQty",
            "fanin",
            "loc",
            "variablesQty",
            "cbo",
            "mathOperationsQty",
            "rfc",
            "assignmentsQty",
            "maxNestedBlocksQty",
            "comparisonsQty",
            "methodsInvokedQty",
            "fanout",
        ],
    },
    23: {
        "fitness": 0.00017707027452529752,
        "subset": [
            "line",
            "modifiers",
            "fanout",
            "mathOperationsQty",
            "stringLiteralsQty",
            "rfc",
            "parenthesizedExpsQty",
            "variablesQty",
            "comparisonsQty",
            "logStatementsQty",
            "fanin",
            "loc",
            "parametersQty",
            "assignmentsQty",
            "methodsInvokedLocalQty",
            "returnsQty",
            "numbersQty",
            "methodsInvokedQty",
            "maxNestedBlocksQty",
            "cboModified",
            "wmc",
            "uniqueWordsQty",
            "cbo",
        ],
    },
    24: {
        "fitness": 0.00015482343158574498,
        "subset": [
            "loopQty",
            "assignmentsQty",
            "loc",
            "variablesQty",
            "methodsInvokedLocalQty",
            "comparisonsQty",
            "wmc",
            "numbersQty",
            "rfc",
            "mathOperationsQty",
            "fanout",
            "cboModified",
            "uniqueWordsQty",
            "fanin",
            "parenthesizedExpsQty",
            "stringLiteralsQty",
            "parametersQty",
            "line",
            "methodsInvokedQty",
            "modifiers",
            "returnsQty",
            "logStatementsQty",
            "methodsInvokedIndirectLocalQty",
            "cbo",
        ],
    },
    25: {
        "fitness": 9.384559349540012 * 10 ** (-5),
        "subset": [
            "wmc",
            "fanout",
            "modifiers",
            "maxNestedBlocksQty",
            "assignmentsQty",
            "parametersQty",
            "logStatementsQty",
            "cbo",
            "loc",
            "variablesQty",
            "stringLiteralsQty",
            "mathOperationsQty",
            "parenthesizedExpsQty",
            "cboModified",
            "methodsInvokedLocalQty",
            "rfc",
            "methodsInvokedQty",
            "comparisonsQty",
            "fanin",
            "numbersQty",
            "line",
            "uniqueWordsQty",
            "methodsInvokedIndirectLocalQty",
            "returnsQty",
            "loopQty",
        ],
    },
    26: {
        "fitness": 7.007612678280222 * 10 ** (-5),
        "subset": [
            "modifiers",
            "uniqueWordsQty",
            "cboModified",
            "maxNestedBlocksQty",
            "wmc",
            "loc",
            "parametersQty",
            "stringLiteralsQty",
            "fanout",
            "line",
            "constructor",
            "methodsInvokedQty",
            "logStatementsQty",
            "loopQty",
            "cbo",
            "assignmentsQty",
            "comparisonsQty",
            "rfc",
            "mathOperationsQty",
            "parenthesizedExpsQty",
            "methodsInvokedIndirectLocalQty",
            "variablesQty",
            "fanin",
            "numbersQty",
            "methodsInvokedLocalQty",
            "returnsQty",
        ],
    },
    27: {
        "fitness": 4.896424642264191 * 10 ** (-5),
        "subset": [
            "hasJavaDoc",
            "wmc",
            "modifiers",
            "loc",
            "fanout",
            "fanin",
            "cboModified",
            "parenthesizedExpsQty",
            "comparisonsQty",
            "cbo",
            "returnsQty",
            "mathOperationsQty",
            "methodsInvokedQty",
            "methodsInvokedLocalQty",
            "uniqueWordsQty",
            "stringLiteralsQty",
            "line",
            "parametersQty",
            "lambdasQty",
            "rfc",
            "methodsInvokedIndirectLocalQty",
            "numbersQty",
            "assignmentsQty",
            "maxNestedBlocksQty",
            "constructor",
            "logStatementsQty",
            "variablesQty",
        ],
    },
    28: {
        "fitness": 2.7614026092421962 * 10 ** (-5),
        "subset": [
            "constructor",
            "stringLiteralsQty",
            "modifiers",
            "wmc",
            "rfc",
            "parenthesizedExpsQty",
            "parametersQty",
            "hasJavaDoc",
            "fanin",
            "loopQty",
            "methodsInvokedIndirectLocalQty",
            "variablesQty",
            "fanout",
            "assignmentsQty",
            "numbersQty",
            "maxNestedBlocksQty",
            "methodsInvokedQty",
            "lambdasQty",
            "cboModified",
            "cbo",
            "logStatementsQty",
            "methodsInvokedLocalQty",
            "uniqueWordsQty",
            "line",
            "mathOperationsQty",
            "loc",
            "returnsQty",
            "comparisonsQty",
        ],
    },
    29: {
        "fitness": 8.565825486111946 * 10 ** (-6),
        "subset": [
            "hasJavaDoc",
            "stringLiteralsQty",
            "loc",
            "assignmentsQty",
            "uniqueWordsQty",
            "modifiers",
            "line",
            "cbo",
            "variablesQty",
            "wmc",
            "comparisonsQty",
            "lambdasQty",
            "rfc",
            "mathOperationsQty",
            "returnsQty",
            "methodsInvokedLocalQty",
            "logStatementsQty",
            "maxNestedBlocksQty",
            "constructor",
            "methodsInvokedQty",
            "loopQty",
            "numbersQty",
            "parametersQty",
            "methodsInvokedIndirectLocalQty",
            "tryCatchQty",
            "parenthesizedExpsQty",
            "fanout",
            "fanin",
            "cboModified",
        ],
    },
    30: {
        "fitness": 6.610914022443504 * 10 ** (-8),
        "subset": [
            "uniqueWordsQty",
            "lambdasQty",
            "variablesQty",
            "anonymousClassesQty",
            "modifiers",
            "line",
            "returnsQty",
            "cbo",
            "rfc",
            "methodsInvokedQty",
            "stringLiteralsQty",
            "loc",
            "comparisonsQty",
            "tryCatchQty",
            "hasJavaDoc",
            "loopQty",
            "fanout",
            "maxNestedBlocksQty",
            "methodsInvokedIndirectLocalQty",
            "methodsInvokedLocalQty",
            "parametersQty",
            "logStatementsQty",
            "mathOperationsQty",
            "assignmentsQty",
            "constructor",
            "wmc",
            "parenthesizedExpsQty",
            "numbersQty",
            "fanin",
            "cboModified",
        ],
    },
}

In [None]:
for subset_size in method_pco_data.keys():
    awaited_fitness = method_pco_data[subset_size]["fitness"]
    subset = method_pco_data[subset_size]["subset"]

    errors = []

    for repo in cached_test_repos:
        try:
            error = ErrorRepositoryResults(repo, kruskal_stress, eq_distance).method_error(subset)
            errors.append(error)
        except ValueError:
            pass

    errors = np.array(errors)

    actual_fitness = np.mean(errors)

    print(f"Subset size: {subset_size}")
    print(f"Awaited fitness: {awaited_fitness}")
    print(f"Actual fitness: {actual_fitness}")
    print("-" * 100)

Subset size: 1
Awaited fitness: 0.36959324895916246
Actual fitness: 0.2853890214058656
----------------------------------------------------------------------------------------------------
Subset size: 2
Awaited fitness: 0.05595463069500887
Actual fitness: 0.04788142601602099
----------------------------------------------------------------------------------------------------
Subset size: 3
Awaited fitness: 0.04035934073646
Actual fitness: 0.03463830459357069
----------------------------------------------------------------------------------------------------
Subset size: 4
Awaited fitness: 0.028780742768303638
Actual fitness: 0.023670020360459846
----------------------------------------------------------------------------------------------------
Subset size: 5
Awaited fitness: 0.018701936244252038
Actual fitness: 0.02030736881151835
----------------------------------------------------------------------------------------------------
Subset size: 6
Awaited fitness: 0.015213928278104074
Act

In [None]:
final_subset = {
    "class": [
        "cboModified",
        "uniqueWordsQty",
        "rfc",
        "wmc",
        "synchronizedFieldsQty",
        "loc",
        "variablesQty",
        "lcom",
        "assignmentsQty",
        "numbersQty",
        "stringLiteralsQty",
        "modifiers",
        "totalFieldsQty",
        "fanin",
        "fanout",
        "returnQty",
    ],
    "method": [
        "stringLiteralsQty",
        "line",
        "fanout",
        "loc",
        "rfc",
        "uniqueWordsQty",
        "modifiers",
        "cboModified",
    ],
    "field": [
        "usage"
    ],
    "variable": [
        "usage"
    ]
}