In [2]:
from __future__ import annotations

from typing import Any, Callable, Dict, Optional, Tuple, Union

import numpy as np
from numpy.typing import NDArray

from sklearn.cluster import KMeans

#from ._selfcga import SelfCGA
from thefittest.optimizers import SelfCGA
#from thefittest.utils.transformations import GrayCode


class ClusterSelfCGA(SelfCGA):
    """
    SelfCGA + кластеризация популяции и отбрасывание "плохих" кластеров.

    Идея:
      - на каждом поколении, когда уже посчитан fitness для текущей популяции,
        но ещё не сгенерировано следующее поколение:
          * выполняем кластеризацию (по фенотипу, если задан genotype_to_phenotype,
            иначе по генотипу как по бинарному вектору);
          * считаем размер и среднюю пригодность каждого кластера;
          * кластеры с малым размером и плохой средней пригодностью считаем "плохими";
          * особей из плохих кластеров заменяем клонами особей из "хороших"
            кластеров (либо оставляем как есть, если хороших нет).
      - таким образом, селекция в этом поколении будет опираться на "очищенную"
        популяцию, где малоэффективные малочисленные области вытеснены.

    ВАЖНО:
      - размер популяции не меняется (чтобы не ломать базовый GeneticAlgorithm);
      - мы не переопределяем run(), только _update_data(), поэтому
        совместимы с существующей логикой SelfCGA (адаптация операторов,
        история и т.д.).
    """

    def __init__(
        self,
        fitness_function: Callable[[NDArray[Any]], NDArray[np.float64]],
        iters: int,
        pop_size: int,
        str_len: int,
        tour_size: int = 2,
        mutation_rate: float = 0.05,
        parents_num: int = 2,
        elitism: bool = True,
        selections: Tuple[str, ...] = (
            "proportional",
            "rank",
            "tournament_3",
            "tournament_5",
            "tournament_7",
        ),
        crossovers: Tuple[str, ...] = (
            "empty",
            "one_point",
            "two_point",
            "uniform_2",
            "uniform_7",
            "uniform_prop_2",
            "uniform_prop_7",
            "uniform_rank_2",
            "uniform_rank_7",
            "uniform_tour_3",
            "uniform_tour_7",
        ),
        mutations: Tuple[str, ...] = ("weak", "average", "strong"),
        init_population: Optional[NDArray[np.byte]] = None,
        K: float = 2,
        selection_threshold_proba: float = 0.05,
        crossover_threshold_proba: float = 0.05,
        mutation_threshold_proba: float = 0.05,
        genotype_to_phenotype: Optional[Callable[[NDArray[np.byte]], NDArray[Any]]] = None,
        optimal_value: Optional[float] = None,
        termination_error_value: float = 0.0,
        no_increase_num: Optional[int] = None,
        minimization: bool = False,
        show_progress_each: Optional[int] = None,
        keep_history: bool = False,
        n_jobs: int = 1,
        fitness_function_args: Optional[Dict] = None,
        genotype_to_phenotype_args: Optional[Dict] = None,
        random_state: Optional[Union[int, np.random.RandomState]] = None,
        on_generation: Optional[Callable] = None,
        fitness_update_eps: float = 0.0,
        # === новые параметры кластеризации ===
        n_clusters: int = 5,
        alpha_N: float = 1.0,
        alpha_F: float = 1.0,
        enable_clustering: bool = True,
    ):
        """
        Все параметры те же, что у SelfCGA, плюс:

        n_clusters : int
            Максимальное количество кластеров для KMeans.
        alpha_N : float
            Порог по размеру кластера:
              кластер "маленький", если N_k < alpha_N * N_mean.
        alpha_F : float
            Порог по среднему fitness:
              - при минимизации: "плохой", если F_k > alpha_F * F_mean.
              - при максимизации: "плохой", если F_k < alpha_F * F_mean.
        enable_clustering : bool
            Можно выключить кластеризацию, оставив чистый SelfCGA.
        """
        super().__init__(
            fitness_function=fitness_function,
            iters=iters,
            pop_size=pop_size,
            str_len=str_len,
            tour_size=tour_size,
            mutation_rate=mutation_rate,
            parents_num=parents_num,
            elitism=elitism,
            selections=selections,
            crossovers=crossovers,
            mutations=mutations,
            init_population=init_population,
            K=K,
            selection_threshold_proba=selection_threshold_proba,
            crossover_threshold_proba=crossover_threshold_proba,
            mutation_threshold_proba=mutation_threshold_proba,
            genotype_to_phenotype=genotype_to_phenotype,
            optimal_value=optimal_value,
            termination_error_value=termination_error_value,
            no_increase_num=no_increase_num,
            minimization=minimization,
            show_progress_each=show_progress_each,
            keep_history=keep_history,
            n_jobs=n_jobs,
            fitness_function_args=fitness_function_args,
            genotype_to_phenotype_args=genotype_to_phenotype_args,
            random_state=random_state,
            on_generation=on_generation,
            fitness_update_eps=fitness_update_eps,
        )

        self._n_clusters: int = n_clusters
        self._alpha_N: float = alpha_N
        self._alpha_F: float = alpha_F
        self._enable_clustering: bool = enable_clustering

    # =====================================================================
    #  Вспомогательные методы: подготовка данных для кластеризации
    # =====================================================================

    def _get_clustering_data(self) -> Optional[NDArray[np.float64]]:
        """
        Возвращает 2D-массив для кластеризации:
          - если задан genotype_to_phenotype, используем его результат,
          - иначе берём бинарные строки как 0/1 и трактуем как вещественный вектор.
        """
        if getattr(self, "_population_g_i", None) is None:
            return None
        pop = self._population_g_i
        if pop.size == 0:
            return None

        # если есть явная функция genotype_to_phenotype и она возвращает числовой массив
        if hasattr(self, "_genotype_to_phenotype") and self._genotype_to_phenotype is not None:
            phen = self._genotype_to_phenotype(pop)
            phen = np.asarray(phen)
            if phen.ndim == 1:
                phen = phen.reshape(-1, 1)
            elif phen.ndim > 2:
                # на всякий случай "сплющиваем" всё кроме первого измерения
                phen = phen.reshape(phen.shape[0], -1)
            return phen.astype(np.float64)

        # по умолчанию: бинарные строки 0/1 -> float
        return pop.astype(np.float64)

    # =====================================================================
    #  Основная логика кластеризации и фильтрации
    # =====================================================================

    def _cluster_and_filter(self) -> None:
        """
        Кластеризация текущей популяции и фильтрация "плохих" кластеров.

        Работает ТОЛЬКО на текущем поколении:
          - pop = self._population_g_i
          - fitness = self._fitness_i

        После работы:
          - self._population_g_i может быть изменена (часть особей заменена клонами);
          - self._fitness_i синхронно изменён (копируются значения фитнеса доноров).
        """
        if not self._enable_clustering:
            return

        if getattr(self, "_population_g_i", None) is None:
            return
        if getattr(self, "_fitness_i", None) is None:
            return

        pop = self._population_g_i
        fit = self._fitness_i

        n_samples = pop.shape[0]
        if n_samples == 0:
            return
        if n_samples == 1:
            # 1 особь – кластеризация не имеет смысла
            return

        X = self._get_clustering_data()
        if X is None:
            return

        # число кластеров не может быть больше числа особей и не меньше 1
        n_clusters = max(1, min(self._n_clusters, n_samples))

        if n_clusters == 1:
            # один кластер – нечего фильтровать
            return

        # KMeans по фенотипу / генотипу
        kmeans = KMeans(
            n_clusters=n_clusters,
            n_init=10,
        )
        labels = kmeans.fit_predict(X)

        # собираем статистику по кластерам
        cluster_indices = []
        N_k = []
        F_k = []

        for k in range(n_clusters):
            idx = np.where(labels == k)[0]
            cluster_indices.append(idx)
            N_k.append(len(idx))
            if len(idx) > 0:
                F_k.append(float(np.mean(fit[idx])))
            else:
                F_k.append(np.nan)

        N_k = np.array(N_k, dtype=float)
        F_k = np.array(F_k, dtype=float)

        # игнорируем пустые кластеры для N_mean
        non_empty = N_k > 0
        if not np.any(non_empty):
            return

        N_mean = float(np.mean(N_k[non_empty]))
        F_mean = float(np.mean(fit)) if fit.size > 0 else 0.0

        # определяем "плохие" кластеры:
        #  - маленький: N_k < alpha_N * N_mean
        #  - при минимизации: F_k > alpha_F * F_mean  (хуже среднего)
        #  - при максимизации: F_k < alpha_F * F_mean  (хуже среднего)
        bad_mask = np.zeros(n_clusters, dtype=bool)
        for k in range(n_clusters):
            if N_k[k] == 0:
                continue
            small = N_k[k] < self._alpha_N * N_mean
            if self._minimization:
                worse = F_k[k] > self._alpha_F * F_mean
            else:
                worse = F_k[k] < self._alpha_F * F_mean
            bad_mask[k] = small and worse

        if not np.any(bad_mask):
            # все кластеры ок, ничего не делаем
            return

        # индексы всех "плохих" и "хороших" особей
        bad_indices_all = np.concatenate(
            [cluster_indices[k] for k in range(n_clusters) if bad_mask[k] and len(cluster_indices[k]) > 0],
            axis=0,
        ) if np.any(bad_mask) else np.array([], dtype=int)

        good_indices_all = np.concatenate(
            [cluster_indices[k] for k in range(n_clusters) if (not bad_mask[k]) and len(cluster_indices[k]) > 0],
            axis=0,
        ) if np.any(~bad_mask) else np.array([], dtype=int)

        if bad_indices_all.size == 0:
            return

        if good_indices_all.size == 0:
            # все кластеры "плохие" по нашим критериям — в этом случае
            # лучше ничего не мутить
            return

        # заменяем каждую "плохую" особь клоном случайной "хорошей"
        for idx_bad in bad_indices_all:
            donor = np.random.choice(good_indices_all)
            pop[idx_bad] = pop[donor].copy()
            fit[idx_bad] = fit[donor]

        self._population_g_i = pop
        self._fitness_i = fit

    # =====================================================================
    #  ПЕРЕОПРЕДЕЛЕНИЕ _update_data
    # =====================================================================

    def _update_data(self) -> None:
        """
        Расширяем стандартный _update_data SelfCGA:

          1. Сначала вызываем super()._update_data(), чтобы:
               - обновить историю,
               - обновить внутренние статистики,
               - адаптировать вероятности операторов (через SelfCGA._adapt и т.д.);
          2. Затем выполняем кластеризацию и фильтрацию текущей популяции
             (self._cluster_and_filter()), при необходимости модифицируя
             self._population_g_i и self._fitness_i перед генерацией
             следующего поколения.
        """
        # шаг 1: штатное поведение SelfCGA / GeneticAlgorithm
        super()._update_data()

        # шаг 2: кластеризация + фильтрация
        self._cluster_and_filter()



In [6]:
from __future__ import annotations

from typing import Any, Callable, Dict, Optional, Tuple, Union

import numpy as np
from numpy.typing import NDArray


from thefittest.optimizers import SelfCGA        # OK
from thefittest.utils import numpy_group_by      # OK
from thefittest.utils.random import random_weighted_sample  # OK

#from ._selfcga import SelfCGA            # <-- твой SelfCGA из thefittest
#from ..utils import numpy_group_by
#from ..utils.random import random_weighted_sample


class ClusterSelfCGA(SelfCGA):
    """
    Self-configuring Genetic Algorithm с дополнительной
    кластеризацией и фильтрацией кластеров по размеру/пригодности.

    Идея:
      - после оценки fitness текущего поколения выполняется кластеризация
        (по фенотипу) и отбрасываются "плохие" кластеры:
            N_k < alpha_N * N_mean и F_k > alpha_F * F_mean
      - тем самым усиливаем работы на более перспективных областях.

    Параметры:
        n_clusters    – максимальное число кластеров (как в AGA-DSP: K);
        alpha_N       – порог по размеру кластера (относительно среднего);
        alpha_F       – порог по среднему fitness кластера;
        enable_clustering – флаг, можно выключить логику.
    """

    def __init__(
        self,
        fitness_function: Callable[[NDArray[Any]], NDArray[np.float64]],
        iters: int,
        pop_size: int,
        str_len: int,
        tour_size: int = 2,
        mutation_rate: float = 0.05,
        parents_num: int = 2,
        elitism: bool = True,
        selections: Tuple[str, ...] = (
            "proportional",
            "rank",
            "tournament_3",
            "tournament_5",
            "tournament_7",
        ),
        crossovers: Tuple[str, ...] = (
            "empty",
            "one_point",
            "two_point",
            "uniform_2",
            "uniform_7",
            "uniform_prop_2",
            "uniform_prop_7",
            "uniform_rank_2",
            "uniform_rank_7",
            "uniform_tour_3",
            "uniform_tour_7",
        ),
        mutations: Tuple[str, ...] = ("weak", "average", "strong"),
        init_population: Optional[NDArray[np.byte]] = None,
        K: float = 2,
        selection_threshold_proba: float = 0.05,
        crossover_threshold_proba: float = 0.05,
        mutation_threshold_proba: float = 0.05,
        genotype_to_phenotype: Optional[Callable[[NDArray[np.byte]], NDArray[Any]]] = None,
        optimal_value: Optional[float] = None,
        termination_error_value: float = 0.0,
        no_increase_num: Optional[int] = None,
        minimization: bool = False,
        show_progress_each: Optional[int] = None,
        keep_history: bool = False,
        n_jobs: int = 1,
        fitness_function_args: Optional[Dict] = None,
        genotype_to_phenotype_args: Optional[Dict] = None,
        random_state: Optional[Union[int, np.random.RandomState]] = None,
        on_generation: Optional[Callable] = None,
        fitness_update_eps: float = 0.0,
        # ---- наши дополнительные параметры ----
        n_clusters: int = 5,
        alpha_N: float = 1.0,
        alpha_F: float = 1.0,
        enable_clustering: bool = True,
    ):
        super().__init__(
            fitness_function=fitness_function,
            iters=iters,
            pop_size=pop_size,
            str_len=str_len,
            tour_size=tour_size,
            mutation_rate=mutation_rate,
            parents_num=parents_num,
            elitism=elitism,
            selections=selections,
            crossovers=crossovers,
            mutations=mutations,
            init_population=init_population,
            K=K,
            selection_threshold_proba=selection_threshold_proba,
            crossover_threshold_proba=crossover_threshold_proba,
            mutation_threshold_proba=mutation_threshold_proba,
            genotype_to_phenotype=genotype_to_phenotype,
            optimal_value=optimal_value,
            termination_error_value=termination_error_value,
            no_increase_num=no_increase_num,
            minimization=minimization,
            show_progress_each=show_progress_each,
            keep_history=keep_history,
            n_jobs=n_jobs,
            fitness_function_args=fitness_function_args,
            genotype_to_phenotype_args=genotype_to_phenotype_args,
            random_state=random_state,
            on_generation=on_generation,
            fitness_update_eps=fitness_update_eps,
        )

        # параметры кластеризации
        self._n_clusters: int = n_clusters
        self._alpha_N: float = alpha_N
        self._alpha_F: float = alpha_F
        self._enable_clustering: bool = enable_clustering

    # ------------------------------------------------------------------
    #  Кластеризация + фильтрация (проекция, как в AGA-DSP, но проще)
    # ------------------------------------------------------------------

    def _cluster_and_filter(self) -> None:
        """
        Делим популяцию по фенотипу на K кластеров и отбрасываем
        кластера, которые:
            N_k < alpha_N * N_mean и F_k > alpha_F * F_mean.
        Здесь предполагается задача минимизации по умолчанию.
        """

        if not self._enable_clustering:
            return

        X = self._population_ph_i
        f = self._fitness_i
        n_pop = X.shape[0]

        if n_pop == 0 or self._n_clusters <= 1:
            # нечего кластеризовать
            return

        # узнаем, минимизируем ли мы (на всякий случай подстрахуемся)
        minimization_flag = getattr(self, "_minimization", None)
        if minimization_flag is None:
            # в базовом коде thefittest minimization передаётся,
            # но приватное имя не документировано. Для CEC считаем,
            # что minimization=True, т.е. "хуже" = больше f.
            minimization_flag = True

        # число кластеров не больше числа особей
        K = min(self._n_clusters, n_pop)

        # грубая кластеризация: k-means через numpy (очень простой вариант)
        # вместо sklearn, чтобы не тащить лишнюю зависимость
        # ---------------------------------------------------
        # 1) случайно выбираем центры
        rng = np.random.default_rng(self._random_state)
        centers_idx = rng.choice(np.arange(n_pop), size=K, replace=False)
        centers = X[centers_idx].copy()

        # 2) одна-две итерации Lloyd (достаточно для "грубого" разбиения)
        for _ in range(2):
            # расстояния до центров
            dists = np.linalg.norm(X[:, None, :] - centers[None, :, :], axis=2)  # (n_pop, K)
            labels = np.argmin(dists, axis=1)
            # пересчёт центров
            for k in range(K):
                idx_k = np.where(labels == k)[0]
                if len(idx_k) > 0:
                    centers[k] = X[idx_k].mean(axis=0)

        # теперь у нас есть разметка labels (0..K-1)
        # собираем статистику по кластерам
        N_k = np.zeros(K, dtype=int)
        F_k = np.zeros(K, dtype=float)
        for k in range(K):
            idx_k = np.where(labels == k)[0]
            if len(idx_k) == 0:
                N_k[k] = 0
                F_k[k] = np.nan
            else:
                N_k[k] = len(idx_k)
                F_k[k] = f[idx_k].mean()

        non_empty = N_k > 0
        if not np.any(non_empty):
            return

        N_mean = N_k[non_empty].mean()
        F_mean = np.nanmean(F_k[non_empty])

        # маска "оставить особь"
        keep_mask = np.ones(n_pop, dtype=bool)

        for k in range(K):
            if N_k[k] == 0:
                continue
            small = N_k[k] < self._alpha_N * N_mean

            # ИСПРАВЛЕНИЕ: вместо self._minimization используем minimization_flag
            if minimization_flag:
                # минимизация: "хуже" = больше F
                worse = F_k[k] > self._alpha_F * F_mean
            else:
                # на случай, если когда-нибудь захочешь максимизацию
                worse = F_k[k] < self._alpha_F * F_mean

            if small and worse:
                # выкидываем весь кластер
                idx_k = np.where(labels == k)[0]
                keep_mask[idx_k] = False

        # если всё выкинули — откатываемся
        if not np.any(keep_mask):
            return

        # применяем маску к популяции и фитнесу
        self._population_g_i = self._population_g_i[keep_mask]
        self._population_ph_i = self._population_ph_i[keep_mask]
        self._fitness_i = self._fitness_i[keep_mask]

        # обновляем размер популяции
        self._pop_size = self._population_g_i.shape[0]

    # ------------------------------------------------------------------
    #  Переопределяем _update_data: добавляем кластеризацию
    # ------------------------------------------------------------------

    def _update_data(self: "ClusterSelfCGA") -> None:
        """
        Расширенная версия _update_data из SelfCGA:
        сначала стандартное обновление, потом кластеризация и фильтрация
        (после вычисления fitness текущего поколения).
        """
        super()._update_data()
        self._cluster_and_filter()


In [1]:
import numpy as np
from sklearn.cluster import KMeans

from thefittest.optimizers import SelfCGA  # базовый SelfCGA


class ClusterSelfCGA(SelfCGA):
    """
    SelfCGA + кластеризация фенотипов и выкидывание
    малых/плохих кластеров (по аналогии с AGA-DSP).
    """

    def __init__(
        self,
        *args,
        enable_clustering: bool = True,
        n_clusters: int = 5,
        alpha_N: float = 1.0,
        alpha_F: float = 1.0,
        **kwargs,
    ):
        """
        Все стандартные параметры SelfCGA идут через *args/**kwargs,
        дополнительные:

        enable_clustering : включать ли кластеризацию
        n_clusters        : максимальное число кластеров KMeans
        alpha_N           : порог по размеру кластера
        alpha_F           : порог по пригодности кластера
        """
        super().__init__(*args, **kwargs)

        self._enable_clustering = enable_clustering
        self._n_clusters = n_clusters
        self._alpha_N = alpha_N
        self._alpha_F = alpha_F

        # просто для логирования
        self._last_clusters_num = 0
        self._last_pop_before = self._pop_size
        self._last_pop_after = self._pop_size

    # -------------------------------
    # переопределяем _update_data
    # -------------------------------
    def _update_data(self) -> None:
        """
        Сначала стандартная логика SelfCGA (в т.ч. статистика),
        затем — кластеризация и фильтрация (если включена).
        """
        super()._update_data()  # SelfCGA._update_data -> EA._update_data + _update_stats

        if self._enable_clustering:
            self._cluster_and_filter()

    # -------------------------------
    # кластеризация + фильтрация
    # -------------------------------
    def _cluster_and_filter(self) -> None:
        """
        Работает с:
          - self._population_ph_i  (фенотипы, X_real)
          - self._fitness_i        (значения f(x))
          - self._population_g_i   (генотипы)
          - self._selection_operators,
            self._crossover_operators,
            self._mutation_operators

        После фильтрации ВСЕ эти массивы режутся по одной и той же маске.
        """
        X = self._population_ph_i       # shape (N, dim)
        F = self._fitness_i             # shape (N,)
        N = X.shape[0]

        self._last_pop_before = N

        # мало особей — не кластеризуем
        if N <= 1 or self._n_clusters <= 1:
            self._last_clusters_num = 0
            self._last_pop_after = N
            return

        K = min(self._n_clusters, N)

        # KMeans по фенотипам
        # random_state берём аккуратно: если _random_state - RandomState,
        # используем randint, иначе None.
        if hasattr(self._random_state, "randint"):
            rs = int(self._random_state.randint(0, 1_000_000_000))
        else:
            rs = None

        kmeans = KMeans(
            n_clusters=K,
            n_init=10,
            random_state=rs,
        )
        labels = kmeans.fit_predict(X)

        # размер кластера и средний fitness
        N_k = np.bincount(labels, minlength=K)
        F_k = np.full(K, np.nan)
        for k in range(K):
            idx_k = (labels == k)
            if np.any(idx_k):
                F_k[k] = F[idx_k].mean()

        non_empty = N_k > 0
        if not np.any(non_empty):
            self._last_clusters_num = 0
            self._last_pop_after = N
            return

        N_mean = N_k[non_empty].mean()
        F_mean = F.mean()

        # режим: минимизация или максимизация
        is_minimization = getattr(self, "_minimization", True)

        # маска, кого оставить
        keep = np.ones(N, dtype=bool)

        for k in range(K):
            if N_k[k] == 0:
                continue

            small = N_k[k] < self._alpha_N * N_mean

            if is_minimization:
                # при минимизации "хуже" = больше F
                worse = F_k[k] > self._alpha_F * F_mean
            else:
                # при максимизации "хуже" = меньше F
                worse = F_k[k] < self._alpha_F * F_mean

            # маленький + "плохой" -> выкидываем
            if small and worse:
                keep[labels == k] = False

        # если никого не выкинули — ничего не делаем
        if keep.all():
            self._last_clusters_num = int(np.sum(non_empty))
            self._last_pop_after = N
            return

        # === применяем маску ко ВСЕМ связанным массивам ===
        self._population_g_i = self._population_g_i[keep]
        self._population_ph_i = self._population_ph_i[keep]
        self._fitness_i = self._fitness_i[keep]

        # те же индексы применяем к операторам SelfCGA
        self._selection_operators = self._selection_operators[keep]
        self._crossover_operators = self._crossover_operators[keep]
        self._mutation_operators = self._mutation_operators[keep]

        # обновляем размер популяции
        N_new = self._population_g_i.shape[0]
        self._pop_size = int(N_new)

        self._last_clusters_num = int(np.sum(non_empty))
        self._last_pop_after = int(N_new)


In [1]:
import sys, os

# путь к папке, где лежит папка cec2017 и ноутбук
sys.path.append(os.getcwd())

from cec2017 import basic, transforms, hybrid

print(hybrid.__file__)  # для проверки, откуда он грузится

import numpy as np

import warnings

# чтобы sklearn KMeans не орал про MKL
os.environ["OMP_NUM_THREADS"] = "1"

# глушим основные типы предупреждений
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)


def _shuffle_and_partition(x, shuffle, partitions):
    """
    First applies the given permutation, then splits x into partitions given
    the percentages.

    Args:
        x (array): Input vector.
        shuffle (array): Shuffle vector.
        partitions (list): List of percentages. Assumed to add up to 1.0.

    Returns:
        (list of arrays): The partitions of x after shuffling.
    """
    nx = len(x)
    # shuffle
    xs = np.zeros(x.shape)
    for i in range(0, nx):
        xs[i] = x[shuffle[i]]
    # and partition
    parts = []
    start, end = 0, 0
    for p in partitions[:-1]:
        end = start + int(np.ceil(p * nx))
        parts.append(xs[start:end])
        start = end
    parts.append(xs[end:])
    return parts



def f1(x, rotation=None, shift=None):
    """
    Shifted and Rotated Bent Cigar Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][0]
    if shift is None:
        shift = transforms.shifts[0][:nx]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.bent_cigar(x_transformed) + 100.0


def f2(x, rotation=None, shift=None):
    """
    (Deprecated) Shifted and Rotated Sum of Different Power Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    if 'warned' not in f2.__dict__:
        f2.warned = True
        print('WARNING: f2 has been deprecated from the CEC 2017 benchmark suite')

    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][1]
    if shift is None:
        shift = transforms.shifts[1][:nx]
    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.sum_diff_pow(x_transformed) + 200.0


def f3(x, rotation=None, shift=None):
    """
    Shifted and Rotated Zakharov Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][2]
    if shift is None:
        shift = transforms.shifts[2][:nx]
    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.zakharov(x_transformed) + 300.0


def f4(x, rotation=None, shift=None):
    """
    Shifted and Rotated Rosenbrock's Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][3]
    if shift is None:
        shift = transforms.shifts[3][:nx]
    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.rosenbrock(x_transformed) + 400.0


def f5(x, rotation=None, shift=None):
    """
    Shifted and Rotated Rastrigin's Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][4]
    if shift is None:
        shift = transforms.shifts[4][:nx]
    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.rastrigin(x_transformed) + 500.0


def f6(x, rotation=None, shift=None):
    """
    Shifted and Rotated Schaffer's F7 Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][5]
    if shift is None:
        shift = transforms.shifts[5][:nx]
    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.schaffers_f7(x_transformed) + 600.0


def f7(x, rotation=None, shift=None):
    """
    Shifted and Rotated Lunacek Bi-Rastrigin's Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][6]
    if shift is None:
        shift = transforms.shifts[6][:nx]
    # pass the shift and rotation directly to the function
    return basic.lunacek_bi_rastrigin(x, shift, rotation) + 700.0


def f8(x, rotation=None, shift=None):
    """
    Shifted and Rotated Non-Continuous Rastrigin’s Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][7]
    if shift is None:
        shift = transforms.shifts[7][:nx]
    # pass the shift and rotation directly to the function
    return basic.non_cont_rastrigin(x, shift, rotation) + 800.0


def f9(x, rotation=None, shift=None):
    """
    Shifted and Rotated Levy Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][8]
    if shift is None:
        shift = transforms.shifts[8][:nx]
    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.levy(x_transformed) + 900.0


def f10(x, rotation=None, shift=None):
    """
    Shifted and Rotated Schwefel’s Function

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][9]
    if shift is None:
        shift = transforms.shifts[9][:nx]
    x_transformed = transforms.shift_rotate(x, shift, rotation)
    return basic.modified_schwefel(x_transformed) + 1000.0



def f11(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 1 (N=3)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][10]
    if shift is None:
        shift = transforms.shifts[10][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][0]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.2, 0.4, 0.4])

    y = basic.zakharov(x_parts[0])
    y += basic.rosenbrock(x_parts[1])
    y += basic.rastrigin(x_parts[2])
    return y + 1100.0


def f12(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 2 (N=3)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][11]
    if shift is None:
        shift = transforms.shifts[11][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][1]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.3, 0.3, 0.4])

    y = basic.high_conditioned_elliptic(x_parts[0])
    y += basic.modified_schwefel(x_parts[1])
    y += basic.bent_cigar(x_parts[2])
    return y + 1200.0


def f13(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 3 (N=3)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][12]
    if shift is None:
        shift = transforms.shifts[12][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][2]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.3, 0.3, 0.4])

    y = basic.bent_cigar(x_parts[0])
    y += basic.rosenbrock(x_parts[1])
    y += basic.lunacek_bi_rastrigin(x_parts[2])
    return y + 1300.0


def f14(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 4 (N=4)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][13]
    if shift is None:
        shift = transforms.shifts[13][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][3]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.2, 0.2, 0.2, 0.4])

    y = basic.high_conditioned_elliptic(x_parts[0])
    y += basic.ackley(x_parts[1])
    y += basic.schaffers_f7(x_parts[2])
    y += basic.rastrigin(x_parts[3])
    return y + 1400.0


def f15(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 5 (N=4)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][14]
    if shift is None:
        shift = transforms.shifts[14][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][4]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.2, 0.2, 0.3, 0.3])

    y = basic.bent_cigar(x_parts[0])
    y += basic.h_g_bat(x_parts[1])
    y += basic.rastrigin(x_parts[2])
    y += basic.rosenbrock(x_parts[3])
    return y + 1500.0


def f16(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 6 (N=4)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][15]
    if shift is None:
        shift = transforms.shifts[15][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][5]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.2, 0.2, 0.3, 0.3])

    y = basic.expanded_schaffers_f6(x_parts[0])
    y += basic.h_g_bat(x_parts[1])
    y += basic.rosenbrock(x_parts[2])
    y += basic.modified_schwefel(x_parts[3])
    return y + 1600.0


def f17(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 7 (N=5)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][16]
    if shift is None:
        shift = transforms.shifts[16][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][6]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.1, 0.2, 0.2, 0.2, 0.3])

    y = basic.katsuura(x_parts[0])
    y += basic.ackley(x_parts[1])
    y += basic.expanded_griewanks_plus_rosenbrock(x_parts[2])
    y += basic.modified_schwefel(x_parts[3])
    y += basic.rastrigin(x_parts[4])
    return y + 1700.0


def f18(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 8 (N=5)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][17]
    if shift is None:
        shift = transforms.shifts[17][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][7]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.2, 0.2, 0.2, 0.2, 0.2])

    y = basic.high_conditioned_elliptic(x_parts[0])
    y += basic.ackley(x_parts[1])
    y += basic.rastrigin(x_parts[2])
    y += basic.h_g_bat(x_parts[3])
    y += basic.discus(x_parts[4])
    return y + 1800.0


def f19(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 9 (N=5)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][18]
    if shift is None:
        shift = transforms.shifts[18][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][8]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.2, 0.2, 0.2, 0.2, 0.2])

    y = basic.bent_cigar(x_parts[0])
    y += basic.rastrigin(x_parts[1])
    y += basic.expanded_griewanks_plus_rosenbrock(x_parts[2])
    y += basic.weierstrass(x_parts[3])
    y += basic.expanded_schaffers_f6(x_parts[4])
    return y + 1900.0


def f20(x, rotation=None, shift=None, shuffle=None):
    """
    Hybrid Function 10 (N=6)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotation (matrix): Optional rotation matrix. If None (default), the
            official matrix from the benchmark suite will be used.
        shift (array): Optional shift vector. If None (default), the official
            vector from the benchmark suite will be used.
        shuffle (array): Optionbal shuffle vector. If None (default), the
            official permutation vector from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotation is None:
        rotation = transforms.rotations[nx][19]
    if shift is None:
        shift = transforms.shifts[19][:nx]
    if shuffle is None:
        shuffle = transforms.shuffles[nx][9]

    x_transformed = transforms.shift_rotate(x, shift, rotation)
    x_parts = transforms.shuffle_and_partition(x_transformed, shuffle, [0.1, 0.1, 0.2, 0.2, 0.2, 0.2])

    y = basic.happy_cat(x_parts[0])
    y += basic.katsuura(x_parts[1])
    y += basic.ackley(x_parts[2])
    y += basic.rastrigin(x_parts[3])
    y += basic.modified_schwefel(x_parts[4])
    y += basic.schaffers_f7(x_parts[5])
    return y + 2000.0

def _calc_w(x, sigma):
    nx = x.shape[1]
    w = np.sum(x*x, axis=1)
    nzmask = w != 0
    w[nzmask] = ((1.0/w)**0.5)[nzmask] * np.exp(-w / (2.0*nx*sigma*sigma))[nzmask]
    w[~nzmask] = float('inf')
    return w


def _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases):
    nv = x.shape[0]
    nx = x.shape[1]

    N = len(funcs)
    vals = np.zeros((nv, N))
    w = np.zeros((nv, N))
    for i in range(0, N):
        x_shifted = x - np.expand_dims(shifts[i][:nx], 0)
        x_t = transforms.shift_rotate(x, shifts[i][:nx], rotations[i])
        vals[:, i] = funcs[i](x_t)
        w[:, i] = _calc_w(x_shifted, sigmas[i])
    w_sm = np.sum(w, axis=1)

    nz_mask = w_sm != 0.0
    w[nz_mask, :] /= w_sm[nz_mask, None]
    w[~nz_mask, :] = 1/N

    return np.sum(w * (lambdas*vals + biases), axis=1)


def _compose_hybrids(x, rotations, shifts, shuffles, funcs, sigmas, offsets, biases):
    nv = x.shape[0]
    nx = x.shape[1]

    N = len(funcs)
    vals = np.zeros((nv, N))
    w = np.zeros((nv, N))
    for i in range(0, N):
        x_shifted = x - np.expand_dims(shifts[i][:nx], 0)
        vals[:, i] = funcs[i](x, rotation=rotations[i], shift=shifts[i][:nx], shuffle=shuffles[i]) - offsets[i]
        w[:, i] = _calc_w(x_shifted, sigmas[i])
    w_sm = np.sum(w, axis=1)

    nz_mask = w_sm != 0.0
    w[nz_mask, :] /= w_sm[nz_mask, None]
    w[~nz_mask, :] = 1/N

    return np.sum(w * (vals + biases), axis=1)


def f21(x, rotations=None, shifts=None):
    """
    Composition Function 1 (N=3)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][0]
    if shifts is None:
        shifts = transforms.shifts_cf[0]

    funcs = [basic.rosenbrock, basic.high_conditioned_elliptic, basic.rastrigin]
    sigmas = np.array([10.0, 20.0, 30.0])
    lambdas = np.array([1.0, 1.0e-6, 1.0])
    biases = np.array([0.0, 100.0, 200.0])
    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2100


def f22(x, rotations=None, shifts=None):
    """
    Composition Function 2 (N=3)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][1]
    if shifts is None:
        shifts = transforms.shifts_cf[1]

    funcs = [basic.rastrigin, basic.griewank, basic.modified_schwefel]
    sigmas = np.array([10.0, 20.0, 30.0])
    lambdas = np.array([1.0, 10.0, 1.0])
    biases = np.array([0.0, 100.0, 200.0])

    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2200


def f23(x, rotations=None, shifts=None):
    """
    Composition Function 3 (N=4)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][2]
    if shifts is None:
        shifts = transforms.shifts_cf[2]

    funcs = [basic.rosenbrock, basic.ackley, basic.modified_schwefel, basic.rastrigin]
    sigmas = np.array([10.0, 20.0, 30.0, 40.0])
    lambdas = np.array([1.0, 10.0, 1.0, 1.0])
    biases = np.array([0.0, 100.0, 200.0, 300.0])
    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2300


def f24(x, rotations=None, shifts=None):
    """
    Composition Function 4 (N=4)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][3]
    if shifts is None:
        shifts = transforms.shifts_cf[3]

    funcs = [basic.ackley, basic.high_conditioned_elliptic, basic.griewank, basic.rastrigin]
    sigmas = np.array([10.0, 20.0, 30.0, 40.0])
    lambdas = np.array([1.0, 1.0e-6, 10.0, 1.0])
    biases = np.array([0.0, 100.0, 200.0, 300.0])
    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2400


def f25(x, rotations=None, shifts=None):
    """
    Composition Function 5 (N=5)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][4]
    if shifts is None:
        shifts = transforms.shifts_cf[4]

    funcs = [basic.rastrigin, basic.happy_cat, basic.ackley, basic.discus, basic.rosenbrock]
    sigmas = np.array([10.0, 20.0, 30.0, 40.0, 50.0])
    lambdas = np.array([10.0, 1.0, 10.0, 1.0e-6, 1.0])
    biases = np.array([0.0, 100.0, 200.0, 300.0, 400.0])
    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2500


def f26(x, rotations=None, shifts=None):
    """
    Composition Function 6 (N=5)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][5]
    if shifts is None:
        shifts = transforms.shifts_cf[5]

    funcs = [basic.expanded_schaffers_f6, basic.modified_schwefel, basic.griewank, basic.rosenbrock, basic.rastrigin]
    sigmas = np.array([10.0, 20.0, 20.0, 30.0, 40.0])
    # NOTE: the lambdas specified in the problem definitions (below) differ from
    # what is used in the code
    #lambdas = np.array([1.0e-26, 10.0, 1.0e-6, 10.0, 5.0e-4])
    lambdas = np.array([5.0e-4, 1.0, 10.0, 1.0, 10.0])
    biases = np.array([0.0, 100.0, 200.0, 300.0, 400.0])
    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2600


def f27(x, rotations=None, shifts=None):
    """
    Composition Function 7 (N=6)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][6]
    if shifts is None:
        shifts = transforms.shifts_cf[6]

    funcs = [
        basic.h_g_bat,
        basic.rastrigin,
        basic.modified_schwefel,
        basic.bent_cigar,
        basic.high_conditioned_elliptic,
        basic.expanded_schaffers_f6,
    ]
    sigmas = np.array([10.0, 20.0, 30.0, 40.0, 50.0, 60.0])
    lambdas = np.array([10.0, 10.0, 2.5, 1.0e-26, 1.0e-6, 5.0e-4])
    biases = np.array([0.0, 100.0, 200.0, 300.0, 400.0, 500.0])
    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2700


def f28(x, rotations=None, shifts=None):
    """
    Composition Function 8 (N=6)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][7]
    if shifts is None:
        shifts = transforms.shifts_cf[7]

    funcs = [
        basic.ackley,
        basic.griewank,
        basic.discus,
        basic.rosenbrock,
        basic.happy_cat,
        basic.expanded_schaffers_f6,
    ]
    sigmas = np.array([10.0, 20.0, 30.0, 40.0, 50.0, 60.0])
    lambdas = np.array([10.0, 10.0, 1.0e-6, 1.0, 1.0, 5.0e-4])
    biases = np.array([0.0, 100.0, 200.0, 300.0, 400.0, 500.0])
    return _composition(x, rotations, shifts, funcs, sigmas, lambdas, biases) + 2800


def f29(x, rotations=None, shifts=None, shuffles=None):
    """
    Composition Function 9 (N=3)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
        shuffles (array): Optional shuffle vectors (NxD). If None (default), the
            official permutation vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][8]
    if shifts is None:
        shifts = transforms.shifts_cf[8]
    if shuffles is None:
        shuffles = transforms.shuffles_cf[nx][0]

    funcs = [hybrid.f15, hybrid.f16, hybrid.f17]
    sigmas = np.array([10.0, 30.0, 50.0])
    biases = np.array([0.0, 100.0, 200.0])
    offsets = np.array([1500, 1600, 1700]) # subtract F* added at the end of the functions

    return _compose_hybrids(x, rotations, shifts, shuffles, funcs, sigmas, offsets, biases) + 2900


def f30(x, rotations=None, shifts=None, shuffles=None):
    """
    Composition Function 10 (N=3)

    Args:
        x (array): Input vector of dimension 2, 10, 20, 30, 50 or 100.
        rotations (matrix): Optional rotation matrices (NxDxD). If None
            (default), the official matrices from the benchmark suite will be
            used.
        shifts (array): Optional shift vectors (NxD). If None (default), the
            official vectors from the benchmark suite will be used.
        shuffles (array): Optional shuffle vectors (NxD). If None (default), the
            official permutation vectors from the benchmark suite will be used.
    """
    x = np.array(x)
    nx = x.shape[1]

    if rotations is None:
        rotations = transforms.rotations_cf[nx][9]
    if shifts is None:
        shifts = transforms.shifts_cf[9]
    if shuffles is None:
        shuffles = transforms.shuffles_cf[nx][1]

    funcs = [hybrid.f15, hybrid.f18, hybrid.f19]
    sigmas = np.array([10.0, 30.0, 50.0])
    biases = np.array([0.0, 100.0, 200.0])
    offsets = np.array([1500, 1800, 1900]) # subtract F* added at the end of the functions
    return _compose_hybrids(x, rotations, shifts, shuffles, funcs, sigmas, offsets, biases) + 3000




C:\Users\ivan\WORK\math\!NEW\cec2017\hybrid.py


In [12]:
all_functions = [
    #f1,
    #f2,
    #f3,
    f4,
    f5,
    f6,
    f7,
    f8,
    f9,
    f10,
    f11,
    f12,
    f13,
    f14,
    f15,
    f16,
    f17,
    f18,
    f19,
    f20,
    f21,
    f22,
    f23,
    f24,
    f25,
    f26,
    f27,
    f28,
    f29,
    f30
]

In [3]:
import numpy as np
from sklearn.cluster import KMeans

from thefittest.optimizers import SelfCGA  # базовый SelfCGA


class ClusterSelfCGA(SelfCGA):
    """
    SelfCGA + кластеризация фенотипов и выкидывание
    малых/плохих кластеров (по аналогии с AGA-DSP).
    """

    def __init__(
        self,
        *args,
        enable_clustering: bool = True,
        n_clusters: int = 5,
        alpha_N: float = 1.0,
        alpha_F: float = 1.0,
        **kwargs,
    ):
        """
        Все стандартные параметры SelfCGA идут через *args/**kwargs,
        дополнительные:

        enable_clustering : включать ли кластеризацию
        n_clusters        : максимальное число кластеров KMeans
        alpha_N           : порог по размеру кластера
        alpha_F           : порог по пригодности кластера
        """
        super().__init__(*args, **kwargs)

        self._enable_clustering = enable_clustering
        self._n_clusters = n_clusters
        self._alpha_N = alpha_N
        self._alpha_F = alpha_F

        # просто для логирования
        self._last_clusters_num = 0
        self._last_pop_before = self._pop_size
        self._last_pop_after = self._pop_size

    # -------------------------------
    # переопределяем _update_data
    # -------------------------------
    def _update_data(self) -> None:
        """
        Сначала стандартная логика SelfCGA (в т.ч. статистика),
        затем — кластеризация и фильтрация (если включена).
        """
        super()._update_data()  # SelfCGA._update_data -> EA._update_data + _update_stats

        if self._enable_clustering:
            self._cluster_and_filter()

    # -------------------------------
    # кластеризация + фильтрация
    # -------------------------------
    def _cluster_and_filter(self) -> None:
        """
        Работает с:
          - self._population_ph_i  (фенотипы, X_real)
          - self._fitness_i        (значения f(x))
          - self._population_g_i   (генотипы)
          - self._selection_operators,
            self._crossover_operators,
            self._mutation_operators

        После фильтрации ВСЕ эти массивы режутся по одной и той же маске.
        """
        X = self._population_ph_i       # shape (N, dim)
        F = self._fitness_i             # shape (N,)
        N = X.shape[0]

        self._last_pop_before = N

        # мало особей — не кластеризуем
        if N <= 1 or self._n_clusters <= 1:
            self._last_clusters_num = 0
            self._last_pop_after = N
            return

        K = min(self._n_clusters, N)

        # KMeans по фенотипам
        # random_state берём аккуратно: если _random_state - RandomState,
        # используем randint, иначе None.
        if hasattr(self._random_state, "randint"):
            rs = int(self._random_state.randint(0, 1_000_000_000))
        else:
            rs = None

        kmeans = KMeans(
            n_clusters=K,
            n_init=10,
            random_state=rs,
        )
        labels = kmeans.fit_predict(X)

        # размер кластера и средний fitness
        N_k = np.bincount(labels, minlength=K)
        F_k = np.full(K, np.nan)
        for k in range(K):
            idx_k = (labels == k)
            if np.any(idx_k):
                F_k[k] = F[idx_k].mean()

        non_empty = N_k > 0
        if not np.any(non_empty):
            self._last_clusters_num = 0
            self._last_pop_after = N
            return

        N_mean = N_k[non_empty].mean()
        F_mean = F.mean()

        # режим: минимизация или максимизация
        is_minimization = getattr(self, "_minimization", True)

        # маска, кого оставить
        keep = np.ones(N, dtype=bool)

        for k in range(K):
            if N_k[k] == 0:
                continue

            small = N_k[k] < self._alpha_N * N_mean

            if is_minimization:
                # при минимизации "хуже" = больше F
                worse = F_k[k] > self._alpha_F * F_mean
            else:
                # при максимизации "хуже" = меньше F
                worse = F_k[k] < self._alpha_F * F_mean

            # маленький + "плохой" -> выкидываем
            if small and worse:
                keep[labels == k] = False

        # если никого не выкинули — ничего не делаем
        if keep.all():
            self._last_clusters_num = int(np.sum(non_empty))
            self._last_pop_after = N
            return

        # === применяем маску ко ВСЕМ связанным массивам ===
        self._population_g_i = self._population_g_i[keep]
        self._population_ph_i = self._population_ph_i[keep]
        self._fitness_i = self._fitness_i[keep]

        # те же индексы применяем к операторам SelfCGA
        self._selection_operators = self._selection_operators[keep]
        self._crossover_operators = self._crossover_operators[keep]
        self._mutation_operators = self._mutation_operators[keep]

        # обновляем размер популяции
        N_new = self._population_g_i.shape[0]
        self._pop_size = int(N_new)

        self._last_clusters_num = int(np.sum(non_empty))
        self._last_pop_after = int(N_new)


In [7]:
import os
import time
import numpy as np

# наш новый метод с кластеризацией
#from thefittest.optimizers import ClusterSelfCGA
from thefittest.utils.transformations import GrayCode

# =========================
#  Импорт CEC-функций
# =========================
# Здесь подключи свои функции и сформируй список all_functions.
# Например, если у тебя файл simple.py как раньше:
#
# from cec2017.simple import all_functions
#
# или явно:
# from cec2017.simple import f1, f2, ..., f10
# all_functions = [f1, f2, ..., f10]

# all_functions = [...]  # <-- не забудь реально импортировать!


# =========================
#  Обёртка для CEC-функции
# =========================
def make_cluster_selfcga_fitness(func):
    """
    Обёртка для CEC-функции под интерфейс ClusterSelfCGA:
    на вход: X формы (pop_size, dim),
    на выход: вектор fitness'ов формы (pop_size,).
    """
    def fitness(X):
        X = np.asarray(X, dtype=float)
        if X.ndim == 1:
            X = X.reshape(1, -1)
        val = func(X)
        arr = np.asarray(val, dtype=float).ravel()
        return arr
    return fitness


# =========================
#  Основной эксперимент для ClusterSelfCGA
# =========================
def run_experiments_cluster_selfcga():
    # размерности, как ты просил
    dims = [10, 30]#, 50, 100]

    n_runs = 20         # число независимых запусков на функцию
    n_generations = 200  # при желании сделаешь 2000
    pop_size = 2000

    # границы, как в AGA / SelfCGA-скриптах
    left_border = -100.0
    right_border = 100.0

    # шаг квантования для GrayCode
    step = 1e-3

    # базовая папка для логов нашего нового метода
    base_dir = "results_cluster_selfcga"
    os.makedirs(base_dir, exist_ok=True)

    for dim in dims:
        print(f"\n=== ClusterSelfCGA | DIMENSION {dim} ===")

        dim_dir = os.path.join(base_dir, f"D{dim}")
        os.makedirs(dim_dir, exist_ok=True)

        # -------------------------------
        #  Настройка GrayCode под dim
        # -------------------------------
        gc = GrayCode()
        gc.fit(
            left_border=left_border,
            right_border=right_border,
            num_variables=dim,
            h_per_variable=step,
        )
        bits_per_var = gc.get_bits_per_variable()
        str_len = int(bits_per_var.sum())

        print(f"GrayCode: bits per var = {bits_per_var}, total str_len = {str_len}")

        # -------------------------------
        #  Цикл по функциям
        # -------------------------------
        for fi, f in enumerate(all_functions, start=1):
            func_name = getattr(f, "__name__", f"f{fi}")
            func_dir = os.path.join(dim_dir, func_name)
            os.makedirs(func_dir, exist_ok=True)

            print(f"\n===== {func_name} (ClusterSelfCGA, D={dim}) =====")

            # -------------------------------
            #  Цикл по запускам
            # -------------------------------
            for run_id in range(1, n_runs + 1):
                seed = 100000 * dim + 1000 * fi + run_id
                np.random.seed(seed)

                print(f"--- START {func_name} | D={dim} | "
                      f"run {run_id:02d} | seed {seed} ---")

                fitness_fn = make_cluster_selfcga_fitness(f)

                start_time = time.time()
                optimizer = ClusterSelfCGA(
                    fitness_function=fitness_fn,
                    iters=n_generations,
                    pop_size=pop_size,
                    str_len=str_len,
                    tour_size=2,
                    mutation_rate=0.05,
                    parents_num=2,
                    elitism=True,
                    genotype_to_phenotype=gc.transform,
                    minimization=True,
                    optimal_value=None,
                    show_progress_each=10,
                    keep_history=True,
                    random_state=seed,
                    # наши доп. параметры:
                    enable_clustering=True,
                    n_clusters=8,   # или 8, как в AGA
                    alpha_N=1.0,
                    alpha_F=1.0,
                )


                optimizer.fit()
                elapsed = time.time() - start_time

                # --- финальный лучший результат ---
                fittest = optimizer.get_fittest()

                # забираем статистику, как в SelfCGA-скрипте
                stats = optimizer.get_stats()
                best_hist = None
                if "max_fitness" in stats:
                    internal_best = np.asarray(stats["max_fitness"], dtype=float).ravel()
                    # как и раньше: SelfCGA/ClusterSelfCGA при minimization=True
                    # обычно оптимизирует -f(x), поэтому получаем f(x) = -internal.
                    best_hist = -internal_best
                    best_fitness = float(best_hist.min())
                else:
                    best_hist = None
                    # fallback — берём из fittest
                    # (если библиотека вдруг поменяет структуру stats)
                    best_fitness = float(fittest["fitness"])

                print(f"--- DONE {func_name} | D={dim} | run {run_id:02d} | "
                      f"best = {best_fitness:.6e} | time = {elapsed:.2f}s")

                # -------------------------------
                #  ЛОГ В ФАЙЛ (как у AGA-DSP)
                # -------------------------------
                log_path = os.path.join(func_dir, f"run_{run_id:02d}.txt")
                with open(log_path, "w", encoding="utf-8") as fp:
                    fp.write(f"# {func_name}, D={dim}, run {run_id}\n")
                    fp.write(f"# best_fitness = {best_fitness:.12e}\n")
                    fp.write("# gen\tbest_fitness\tpop_before\tpop_after\tclusters\n")

                    # у SelfCGA/ClusterSelfCGA размер популяции не меняется,
                    # поэтому pop_before = pop_after = pop_size,
                    # а кол-во кластеров для нашего метода — фиксированное n_clusters.
                    clusters = getattr(optimizer, "_n_clusters", 0)

                    if best_hist is not None and len(best_hist) > 0:
                        for gen_idx, best_g in enumerate(best_hist, 1):
                            fp.write(
                                f"{gen_idx}\t{best_g:.12e}\t"
                                f"{pop_size}\t{pop_size}\t{clusters}\n"
                            )
                    else:
                        # если историю не удалось вытащить, пишем хотя бы финал
                        fp.write(
                            f"{n_generations}\t{best_fitness:.12e}\t"
                            f"{pop_size}\t{pop_size}\t{clusters}\n"
                        )


# =========================
#  Запуск
# =========================
if __name__ == "__main__":
    run_experiments_cluster_selfcga()



=== ClusterSelfCGA | DIMENSION 10 ===
GrayCode: bits per var = [18 18 18 18 18 18 18 18 18 18], total str_len = 180

===== f1 (ClusterSelfCGA, D=10) =====
--- START f1 | D=10 | run 01 | seed 1001001 ---
0-th iteration with the best fitness = 94125367798.2765
10-th iteration with the best fitness = 5187003713.0560055
20-th iteration with the best fitness = 346012815.62008065
30-th iteration with the best fitness = 35730712.49574092
40-th iteration with the best fitness = 7244050.740037011
50-th iteration with the best fitness = 697035.255380064
60-th iteration with the best fitness = 276656.8700203203
70-th iteration with the best fitness = 67182.85037459037
80-th iteration with the best fitness = 19687.188943168425
90-th iteration with the best fitness = 9143.09152172367
100-th iteration with the best fitness = 2122.0599566050505
110-th iteration with the best fitness = 634.1715614697389
120-th iteration with the best fitness = 338.7249260525224
130-th iteration with the best fitness 

AssertionError: 

In [8]:
import numpy as np
from numpy.typing import NDArray
from typing import Any, Dict, Optional, Tuple, Union

from thefittest.optimizers import SelfCGA
from sklearn.cluster import KMeans


class ClusterSelfCGA(SelfCGA):
    """
    SelfCGA + кластеризация фенотипов.
    ВАЖНО:
      - размер популяции НЕ меняем (иначе ломаем внутренности thefittest),
      - "плохие" кластеры чистим заменой особей:
          * сначала элитами из архива,
          * затем, если элит не хватает, рандомами.
    """

    def __init__(
        self,
        *args,
        enable_clustering: bool = True,
        n_clusters: int = 5,
        alpha_N: float = 1.0,
        alpha_F: float = 1.0,
        elite_archive_size: int = 20,
        replace_prob: float = 1.0,
        **kwargs,
    ):
        """
        Доп. параметры:
          enable_clustering  – включать ли кластеризацию.
          n_clusters         – базовое число кластеров (будет min(n_clusters, pop_size)).
          alpha_N            – порог по размеру кластера (малые кластеры).
          alpha_F            – порог по пригодности (сравнение по медиане).
          elite_archive_size – сколько лучших особей хранить.
          replace_prob       – вероятность заменить особь из "плохого" кластера.
        Остальные параметры такие же, как у SelfCGA.
        """
        super().__init__(*args, **kwargs)

        self._enable_clustering: bool = enable_clustering
        self._n_clusters: int = n_clusters
        self._alpha_N: float = alpha_N
        self._alpha_F: float = alpha_F
        self._elite_archive_size: int = elite_archive_size
        self._replace_prob: float = replace_prob

        # архив элиты (в генотипе)
        self._elite_bits: Optional[NDArray[np.byte]] = None
        self._elite_fit: Optional[NDArray[np.float64]] = None

    # ------------------------------------------------------------------
    # Встраиваемся в стандартный цикл GA
    # ------------------------------------------------------------------

    def _update_data(self) -> None:
        """
        Стандартный SelfCGA: обновление статистики + адаптация операторов.
        После этого – наш блок: обновление архива + кластеризация и замена.
        """
        # оригинальная логика: обновление статистики / адаптация операторов
        super()._update_data()

        # 1) обновляем элитный архив по текущему поколению
        self._update_elite_archive()

        # 2) кластеризация + замена особей в "плохих" кластерах
        if self._enable_clustering:
            self._cluster_and_replace()

    # ------------------------------------------------------------------
    # Элитный архив
    # ------------------------------------------------------------------

    def _update_elite_archive(self) -> None:
        """
        Собираем archive лучших особей по внутреннему fitness (чем больше, тем лучше).
        НИЧЕГО не ломаем в thefittest — просто берём текущую популяцию.
        """
        pop_g = self._population_g_i          # генотипы текущего поколения
        fit = self._fitness_i.astype(float)   # внутренний fitness GA

        if pop_g.shape[0] == 0:
            return

        # индексы отсортированы по fitness (максимизируем)
        order = np.argsort(fit)[::-1]
        k = min(self._elite_archive_size, len(order))
        best_idx = order[:k]

        cand_bits = pop_g[best_idx].copy()
        cand_fit = fit[best_idx].copy()

        if self._elite_bits is None:
            self._elite_bits = cand_bits
            self._elite_fit = cand_fit
        else:
            merged_bits = np.vstack([self._elite_bits, cand_bits])
            merged_fit = np.concatenate([self._elite_fit, cand_fit])

            order2 = np.argsort(merged_fit)[::-1]
            k2 = min(self._elite_archive_size, len(order2))
            keep = order2[:k2]

            self._elite_bits = merged_bits[keep]
            self._elite_fit = merged_fit[keep]

    # ------------------------------------------------------------------
    # Кластеризация + замена плохих кластеров
    # ------------------------------------------------------------------

    def _cluster_and_replace(self) -> None:
        """
        1) Кластеризуем фенотипы (population_ph_i) по KMeans.
        2) Для маленьких и "плохих" кластеров заменяем особи:
             - сначала лучшими из архива,
             - остаток – случайным генотипом.
        ВАЖНО: длина популяции self._population_g_i НЕ меняется!
        """
        X = self._population_ph_i    # фенотипы (после genotype_to_phenotype)
        fit = self._fitness_i        # внутренний fitness GA (max лучше)
        n = X.shape[0]

        if n < 2:
            return  # нечего кластеризовать

        # число кластеров не больше числа точек
        k = min(self._n_clusters, n)
        if k < 2:
            return

        # кластеризация
        kmeans = KMeans(
            n_clusters=k,
            n_init=10,
            random_state=None  # можно сюда прокинуть int(self._random_state) при желании
        )
        labels = kmeans.fit_predict(X)

        # собираем инфу по кластерам
        cluster_indices: list[NDArray[np.int64]] = []
        N_k = []
        F_k = []

        for c in range(k):
            idx = np.where(labels == c)[0]
            if len(idx) == 0:
                continue
            cluster_indices.append(idx)
            N_k.append(len(idx))
            # средний внутренний fitness (SelfCGA его максимизирует)
            F_k.append(float(fit[idx].mean()))

        if len(cluster_indices) == 0:
            return

        N_k = np.array(N_k, dtype=float)
        F_k = np.array(F_k, dtype=float)

        N_mean = float(N_k.mean())
        F_med = float(np.median(F_k))  # медиана по кластерам

        # какие кластеры считаем "плохими"?
        #   - маленький по размеру (N_k < alpha_N * N_mean)
        #   - и с fitness ниже медианы (F_k < alpha_F * F_med)
        bad_clusters = []
        for idx_c, Nk, Fk in zip(cluster_indices, N_k, F_k):
            small = Nk < self._alpha_N * N_mean
            low_fit = Fk < self._alpha_F * F_med   # GA максимально внутренний fitness
            if small and low_fit:
                bad_clusters.append(idx_c)

        if not bad_clusters:
            return  # все более-менее ок

        # === ЗАМЕНА ОСОБЕЙ В ПЛОХИХ КЛАСТЕРАХ ===
        for idx_c in bad_clusters:
            for i in idx_c:
                if np.random.rand() > self._replace_prob:
                    continue

                # 1) пытаемся взять элиту
                replaced = False
                if (self._elite_bits is not None) and (self._elite_bits.shape[0] > 0):
                    j = np.random.randint(0, self._elite_bits.shape[0])
                    self._population_g_i[i] = self._elite_bits[j].copy()
                    replaced = True

                # 2) если элиты нет – рандомный генотип
                if not replaced:
                    rand_bits = np.random.randint(
                        0, 2, size=self._str_len, dtype=np.byte
                    )
                    self._population_g_i[i] = rand_bits


In [None]:
import os
import time
import numpy as np

# наш новый метод с кластеризацией
#from thefittest.optimizers import ClusterSelfCGA
from thefittest.utils.transformations import GrayCode

# =========================
#  Импорт CEC-функций
# =========================
# Здесь подключи свои функции и сформируй список all_functions.
# Например, если у тебя файл simple.py как раньше:
#
# from cec2017.simple import all_functions
#
# или явно:
# from cec2017.simple import f1, f2, ..., f10
# all_functions = [f1, f2, ..., f10]

# all_functions = [...]  # <-- не забудь реально импортировать!


# =========================
#  Обёртка для CEC-функции
# =========================
def make_cluster_selfcga_fitness(func):
    """
    Обёртка для CEC-функции под интерфейс ClusterSelfCGA:
    на вход: X формы (pop_size, dim),
    на выход: вектор fitness'ов формы (pop_size,).
    """
    def fitness(X):
        X = np.asarray(X, dtype=float)
        if X.ndim == 1:
            X = X.reshape(1, -1)
        val = func(X)
        arr = np.asarray(val, dtype=float).ravel()
        return arr
    return fitness


# =========================
#  Основной эксперимент для ClusterSelfCGA
# =========================
def run_experiments_cluster_selfcga():
    # размерности, как ты просил
    dims = [10]#, 30]#, 50, 100]

    n_runs = 20         # число независимых запусков на функцию
    n_generations = 200  # при желании сделаешь 2000
    pop_size = 2000

    # границы, как в AGA / SelfCGA-скриптах
    left_border = -100.0
    right_border = 100.0

    # шаг квантования для GrayCode
    step = 1e-3

    # базовая папка для логов нашего нового метода
    base_dir = "results_cluster_selfcga"
    os.makedirs(base_dir, exist_ok=True)

    for dim in dims:
        print(f"\n=== ClusterSelfCGA | DIMENSION {dim} ===")

        dim_dir = os.path.join(base_dir, f"D{dim}")
        os.makedirs(dim_dir, exist_ok=True)

        # -------------------------------
        #  Настройка GrayCode под dim
        # -------------------------------
        gc = GrayCode()
        gc.fit(
            left_border=left_border,
            right_border=right_border,
            num_variables=dim,
            h_per_variable=step,
        )
        bits_per_var = gc.get_bits_per_variable()
        str_len = int(bits_per_var.sum())

        print(f"GrayCode: bits per var = {bits_per_var}, total str_len = {str_len}")

        # -------------------------------
        #  Цикл по функциям
        # -------------------------------
        for fi, f in enumerate(all_functions, start=1):
            func_name = getattr(f, "__name__", f"f{fi}")
            func_dir = os.path.join(dim_dir, func_name)
            os.makedirs(func_dir, exist_ok=True)

            print(f"\n===== {func_name} (ClusterSelfCGA, D={dim}) =====")

            # -------------------------------
            #  Цикл по запускам
            # -------------------------------
            for run_id in range(1, n_runs + 1):
                seed = 100000 * dim + 1000 * fi + run_id
                np.random.seed(seed)

                print(f"--- START {func_name} | D={dim} | "
                      f"run {run_id:02d} | seed {seed} ---")

                fitness_fn = make_cluster_selfcga_fitness(f)

                start_time = time.time()
                optimizer = ClusterSelfCGA(
                    fitness_function=fitness_fn,
                    iters=n_generations,
                    pop_size=pop_size,
                    str_len=str_len,
                    tour_size=2,
                    mutation_rate=0.05,
                    parents_num=2,
                    elitism=True,
                    genotype_to_phenotype=gc.transform,
                    minimization=True,
                    optimal_value=None,
                    show_progress_each=10,
                    keep_history=True,
                    random_state=seed,
                    # наши новые параметры:
                    enable_clustering=True,
                    n_clusters=8,
                    alpha_N=1.0,
                    alpha_F=1.0,
                    elite_archive_size=20,
                    replace_prob=1.0,
                )



                optimizer.fit()
                elapsed = time.time() - start_time

                # --- финальный лучший результат ---
                fittest = optimizer.get_fittest()

                # забираем статистику, как в SelfCGA-скрипте
                stats = optimizer.get_stats()
                best_hist = None
                if "max_fitness" in stats:
                    internal_best = np.asarray(stats["max_fitness"], dtype=float).ravel()
                    # как и раньше: SelfCGA/ClusterSelfCGA при minimization=True
                    # обычно оптимизирует -f(x), поэтому получаем f(x) = -internal.
                    best_hist = -internal_best
                    best_fitness = float(best_hist.min())
                else:
                    best_hist = None
                    # fallback — берём из fittest
                    # (если библиотека вдруг поменяет структуру stats)
                    best_fitness = float(fittest["fitness"])

                print(f"--- DONE {func_name} | D={dim} | run {run_id:02d} | "
                      f"best = {best_fitness:.6e} | time = {elapsed:.2f}s")

                # -------------------------------
                #  ЛОГ В ФАЙЛ (как у AGA-DSP)
                # -------------------------------
                log_path = os.path.join(func_dir, f"run_{run_id:02d}.txt")
                with open(log_path, "w", encoding="utf-8") as fp:
                    fp.write(f"# {func_name}, D={dim}, run {run_id}\n")
                    fp.write(f"# best_fitness = {best_fitness:.12e}\n")
                    fp.write("# gen\tbest_fitness\tpop_before\tpop_after\tclusters\n")

                    # у SelfCGA/ClusterSelfCGA размер популяции не меняется,
                    # поэтому pop_before = pop_after = pop_size,
                    # а кол-во кластеров для нашего метода — фиксированное n_clusters.
                    clusters = getattr(optimizer, "_n_clusters", 0)

                    if best_hist is not None and len(best_hist) > 0:
                        for gen_idx, best_g in enumerate(best_hist, 1):
                            fp.write(
                                f"{gen_idx}\t{best_g:.12e}\t"
                                f"{pop_size}\t{pop_size}\t{clusters}\n"
                            )
                    else:
                        # если историю не удалось вытащить, пишем хотя бы финал
                        fp.write(
                            f"{n_generations}\t{best_fitness:.12e}\t"
                            f"{pop_size}\t{pop_size}\t{clusters}\n"
                        )


# =========================
#  Запуск
# =========================
if __name__ == "__main__":
    run_experiments_cluster_selfcga()



=== ClusterSelfCGA | DIMENSION 10 ===
GrayCode: bits per var = [18 18 18 18 18 18 18 18 18 18], total str_len = 180

===== f4 (ClusterSelfCGA, D=10) =====
--- START f4 | D=10 | run 01 | seed 1001001 ---
0-th iteration with the best fitness = 1436.8582900851325
10-th iteration with the best fitness = 415.0189718814635
20-th iteration with the best fitness = 407.0960338682402
30-th iteration with the best fitness = 404.42858754145186
40-th iteration with the best fitness = 402.57595026579
50-th iteration with the best fitness = 401.76544121427395
60-th iteration with the best fitness = 401.36717482604604
70-th iteration with the best fitness = 401.094910778829
80-th iteration with the best fitness = 400.9190127054572
90-th iteration with the best fitness = 400.8702092132714
100-th iteration with the best fitness = 400.83221187151634
110-th iteration with the best fitness = 400.80852185907446
120-th iteration with the best fitness = 400.79222059141745
130-th iteration with the best fitne