## WORDS

In [1]:
import re
from ast import literal_eval
from collections import defaultdict
from pathlib import Path
from collections import Counter

import numpy as np

REPO_ROOT = Path(__name__).resolve().parents[1]

In [2]:
def collect_test_accuracies(ga: bool) -> defaultdict:
    """Collect the test accuracies from the log files."""
    if ga:
        logs = list(REPO_ROOT.glob("models/**/ga/logs/*.log"))
    else:
        logs = list(REPO_ROOT.glob("models/**/base/logs/*.log"))
   
    test_accuracies = defaultdict(list)

    for log in logs:
        with open(log, "r") as f:
            text = f.read()
            regex = re.compile(r"Test Accuracy: (\d)+.(\d)+%")
            match = re.search(regex, text)
            if match:
                test_accuracy = float(match.group().split(":")[1].strip("%"))
                test_accuracies[log.parent.parent.parent.name].append(test_accuracy)

    return test_accuracies

In [3]:
ga_test_accuracies = collect_test_accuracies(ga=True)
base_test_accuracies = collect_test_accuracies(ga=False)

In [4]:
for key, value in ga_test_accuracies.items():
    print(f"Average GA Test Accuracy - {key}: {np.mean(value):.3f}% +/- {np.std(value):.3f}%")

Average GA Test Accuracy - iris: 83.833% +/- 8.516%
Average GA Test Accuracy - wine: 94.722% +/- 4.203%
Average GA Test Accuracy - seeds: 89.643% +/- 4.343%


In [5]:
for key, value in base_test_accuracies.items():
    print(f"Average Base Test Accuracy - {key}: {np.mean(value):.3f}% +/- {np.std(value):.3f}%")

Average Base Test Accuracy - iris: 59.167% +/- 19.347%
Average Base Test Accuracy - wine: 35.972% +/- 10.953%
Average Base Test Accuracy - seeds: 49.881% +/- 18.109%


In [6]:
def collect_test_best_params(ga: bool = True) -> defaultdict:
    """Collect the best parameters from the log files."""
    logs = list(REPO_ROOT.glob("models/**/ga/logs/*.log"))
   
    best_params = defaultdict(list)

    for log in logs:
        with open(log, "r") as f:
            text = f.read()
            regex = re.compile(r"Best Parameters: \{.*\}")
            match = re.search(regex, text)
            if match:
                match_string = match.group()
                best_params[log.parent.parent.parent.name].append(literal_eval(match_string[17:]))

    return best_params

In [7]:
ga_best_params = collect_test_best_params(ga=True)

In [8]:
for key, value in ga_best_params.items():
    print(f"GA Hidden Layer Sizes - {key}: {Counter([len(val['clf__estimator__hidden_layer_sizes']) for val in value])}")
    print(f"GA Total Neurons - {key}: {[sum(val['clf__estimator__hidden_layer_sizes']) for val in value]}")
    print(f"GA Total Neurons by layer - {key}: {[(val['clf__estimator__hidden_layer_sizes']) for val in value]}")
    print(f"GA Min Total Neurons - {key}: {min([sum(val['clf__estimator__hidden_layer_sizes']) for val in value])}")
    print(f"GA Max Total Neurons - {key}: {max([sum(val['clf__estimator__hidden_layer_sizes']) for val in value])}")
    print(f"GA activation - {key}: {Counter([val['clf__estimator__activation'] for val in value])}")
    print("\n")

GA Hidden Layer Sizes - iris: Counter({3: 11, 4: 7, 2: 2})
GA Total Neurons - iris: [368, 784, 464, 432, 352, 448, 464, 240, 304, 448, 576, 512, 336, 464, 416, 576, 400, 576, 544, 464]
GA Total Neurons by layer - iris: [(64, 144, 128, 32), (176, 240, 256, 112), (96, 224, 144), (176, 176, 80), (80, 256, 16), (112, 224, 80, 32), (48, 176, 240), (208, 32), (144, 80, 80), (80, 128, 240), (80, 240, 144, 112), (240, 112, 160), (256, 80), (144, 224, 96), (128, 176, 112), (160, 256, 112, 48), (176, 160, 64), (80, 240, 192, 64), (32, 224, 208, 80), (64, 240, 160)]
GA Min Total Neurons - iris: 240
GA Max Total Neurons - iris: 784
GA activation - iris: Counter({'tanh': 20})


GA Hidden Layer Sizes - wine: Counter({3: 9, 4: 9, 2: 2})
GA Total Neurons - wine: [240, 320, 320, 528, 560, 512, 608, 512, 464, 432, 432, 464, 608, 608, 416, 544, 608, 512, 464, 560]
GA Total Neurons by layer - wine: [(176, 64), (240, 80), (80, 128, 112), (256, 96, 176), (176, 240, 144), (80, 96, 256, 80), (256, 208, 144), 