In [2]:
import random
import operator
import copy
import pprint
import numpy as np
import pandas as pd
# import gp
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import KFold
import deap.gp as gp
from deap import creator, base, tools, algorithms
from deap.gp import cxOnePoint as cx_simple
from deap.gp import PrimitiveSet
from data import get_embeddings

In [19]:
def protected_div(x, y):
    mask = y == 0
    safe_y = np.where(mask, 1, y)
    return np.where(mask, 1, x / safe_y)


def protected_sqrt(x):
    x = np.abs(x)
    return np.sqrt(x)

In [None]:
class GP:
    def __init__(self, embedding_model, pop_size, dim, cx_method, mut_pb, cx_pb, n_gen, data, embeddings, x, y):
        self.embeddings_model = embedding_model
        self.pop_size = pop_size
        self.dim = dim
        self.cx_method = cx_method
        self.mut_pb = mut_pb
        self.cx_pb = cx_pb
        self.n_gen = n_gen
        self.pop = None
        self.data = data
        self.embeddings = embeddings
        self.inputword = x
        self.realword = y
        self.eval_count = 0

    def register(self):
        # 定義算術表達式的原語集（Primitive Set）
        self.pset = gp.PrimitiveSet("MAIN", 5)
        self.pset.addPrimitive(np.add, 2)
        self.pset.addPrimitive(np.subtract, 2)
        self.pset.addPrimitive(np.multiply, 2)
        self.pset.addPrimitive(protected_div, 2)  ##確認一次ok
        # self.pset.addPrimitive(np.sqrt, 1)
        self.pset.addPrimitive(protected_sqrt, 1)
        self.pset.addPrimitive(np.square, 1)
        self.pset.renameArguments(ARG0="a", ARG1="b", ARG2="c", ARG3="d", ARG4="e")
        # print("Attributes of gp.PrimitiveSet:", dir(self.pset))
        # 創建適應度類和個體類
        creator.create("FitnessMax", base.Fitness, weights=(1,))
        creator.create(
            "Individual", gp.PrimitiveTree, fitness=creator.FitnessMax, pset=self.pset
        )  # output不算fitness
        # creator.create("Individual", gp.PrimitiveTree) #output不算fitness
        # 初始化工具箱
        self.toolbox = base.Toolbox()
        self.toolbox.register("expr", gp.genHalfAndHalf, pset=self.pset, min_=1, max_=5)
        # self.toolbox.register("individual", creator.Individual, fitness=creator.FitnessMax, expr=self.toolbox.expr) #gene_gen=toolbox.gene_gen, n_genes=n_genes
        self.toolbox.register(
            "individual", tools.initIterate, creator.Individual, self.toolbox.expr
        )  # gene_gen=toolbox.gene_gen, n_genes=n_genes
        self.toolbox.register(
            "population",
            tools.initRepeat,
            list,
            self.toolbox.individual,
            n=self.pop_size,
        )  # population數ok
        # 註冊operators
        self.toolbox.register("select", tools.selRandom, k=3)
        self.toolbox.register("cx_simple", gp.cxOnePoint)  # simple crossover
        # self.toolbox.register("cx_uniform", self.cx_uniform)
        # self.toolbox.register("cx_fair", self.cx_fair)
        self.toolbox.register("cx_one", self.cxOnePoint)

        self.toolbox.register(
            "mutate", gp.mutUniform, expr=self.toolbox.expr, pset=self.pset
        )
        self.toolbox.decorate(
            "mutate", gp.staticLimit(operator.attrgetter("height"), max_value=5)
        )
        self.toolbox.register("evaluate", self.evaluate)  #
        # self.toolbox.register("compile", gep.compile_, pset=self.pset)
        # 註冊record工具
        self.stats = tools.Statistics(
            key=lambda ind: ind.fitness.values
        )  #!!!ind: ind.fitness.values[0] fitness???
        self.stats.register("avg", np.mean)
        self.stats.register("std", np.std)
        self.stats.register("min", np.min)
        self.stats.register("max", np.max)
        self.hof = tools.HallOfFame(10) #hall of fame size

    def initialize_pop(self):
        self.register()
        # print(self.pop_size)
        self.pop = self.toolbox.population(n=self.pop_size)
        # evaluate the entire population
        fitnesses = map(self.toolbox.evaluate, self.pop)
        for ind, fit in zip(self.pop, fitnesses):
            ind.fitness.values = fit

    def subtree_height(self, tree, index):
        # """Calculate the height of the subtree starting at the given index."""
        def _height(node_index):
            node = tree[node_index]
            if node.arity == 0:  # Leaf node
                return 1
            else:
                return 1 + max(
                    _height(child_index)
                    for child_index in range(
                        node_index + 1, node_index + 1 + node.arity
                    )
                )
        return _height(index)

    def searchSubtree_idx(self, tree, begin):
        end = begin + 1
        total = tree[begin].arity
        while total > 0:
            total += tree[end].arity - 1
            end += 1
        return begin, end

    def clean_data(self, data):
        data = np.where(np.isinf(data), np.finfo(np.float32).max, data)
        data = np.nan_to_num(data, nan=0.0)
        return data

    def evaluate(self, individual):
        """Evalute the fitness of an individual"""
        # print(f"individual種類:{type(individual)}")
        func = gp.compile(individual, self.pset)
        total_similarity = 0.0
        for data_index in range(len(self.inputword)):
            words = self.inputword.iloc[data_index]
            in_vectors = [self.embeddings[word] for word in words]
            a, b, c, d, e = in_vectors[:5]
            #print(f"in_vectors: {in_vectors}")
            y = self.realword.iloc[data_index]
            #print(f"y: {y}")
            out_vector = self.embeddings[y]
            #print(f"out_vector: {out_vector}")
            #if out_vector.ndim == 3:
                #out_vector = out_vector.reshape(1, -1)

            predict = self.clean_data(func(a, b, c, d, e))
            #print(predict)
            # if isinstance(predict, tuple):
            #     predict = np.array(predict)
            #     print(predict)
            # if predict.ndim == 3:
            #     print(predict)
            #     predict = predict.reshape(1, -1)

            #print("predict after clean:", np.array(predict).shape)
            #print("Shape of out_vector:", np.array(out_vector).shape)

            similarity = cosine_similarity([predict], [out_vector])[0][0]
            total_similarity += similarity
        fitness = total_similarity / len(self.inputword)
        ftiness = self.clean_data(fitness)
        self.eval_count += 1
        return (fitness,)

    def initialize_pop(self):
        self.register()
        # print(self.pop_size)
        self.pop = self.toolbox.population(n=self.pop_size)
        # evaluate the entire population
        fitnesses = map(self.toolbox.evaluate, self.pop)
        for ind, fit in zip(self.pop, fitnesses):
            ind.fitness.values = fit

    def subtree_height(self, tree, index):
        # """Calculate the height of the subtree starting at the given index."""
        def _height(node_index):
            node = tree[node_index]
            if node.arity == 0:  # Leaf node
                return 1
            else:
                return 1 + max(
                    _height(child_index)
                    for child_index in range(
                        node_index + 1, node_index + 1 + node.arity
                    )
                )
        return _height(index)

    def searchSubtree_idx(self, tree, begin):
        end = begin + 1
        total = tree[begin].arity
        while total > 0:
            total += tree[end].arity - 1
            end += 1
        return begin, end

    def clean_data(self, data):
        data = np.where(np.isinf(data), np.finfo(np.float32).max, data)
        data = np.nan_to_num(data, nan=0.0)
        return data

    def evaluate(self, individual):
        """Evalute the fitness of an individual"""
        # print(f"individual種類:{type(individual)}")
        func = gp.compile(individual, self.pset)
        total_similarity = 0.0
        for data_index in range(len(self.inputword)):
            words = self.inputword.iloc[data_index]
            in_vectors = [self.embeddings[word] for word in words]
            a, b, c, d, e = in_vectors[:5]
            #print(f"in_vectors: {in_vectors}")
            y = self.realword.iloc[data_index]
            #print(f"y: {y}")
            out_vector = self.embeddings[y]
            #print(f"out_vector: {out_vector}")
            #if out_vector.ndim == 3:
                #out_vector = out_vector.reshape(1, -1)

            predict = self.clean_data(func(a, b, c, d, e))
            #print(predict)
            # if isinstance(predict, tuple):
            #     predict = np.array(predict)
            #     print(predict)
            # if predict.ndim == 3:
            #     print(predict)
            #     predict = predict.reshape(1, -1)

            #print("predict after clean:", np.array(predict).shape)
            #print("Shape of out_vector:", np.array(out_vector).shape)

            similarity = cosine_similarity([predict], [out_vector])[0][0]
            total_similarity += similarity
        fitness = total_similarity / len(self.inputword)
        ftiness = self.clean_data(fitness)
        self.eval_count += 1
        return (fitness,)

    def traverse_tree(stack, res, parent, idx):
        while (res != 0):
            # arity1 += 1
            # print(f"arity1: {arity1}")

            res -= 1
            # print(f"[WHILE -1] res: {res}")

            idx += 1
            stack.append((parent[idx], [], idx))
            # print(f"[WHILE] append stack: {parent[idx1].name}")
            res += parent[idx].arity
            # print(f"[WHILE +arity] res: {res}")

            # print(f"stack: {stack}")
        return stack, res, idx

    def cxOnePoint(self, ind1, ind2):
        #print(f"ind1: {ind1.__str__()}\n, ind2: {ind2.__str__()}")

        idx1 = 0
        idx2 = 0
        # To track the trees
        stack1 = []
        stack2 = []
        # Store the common region
        region1 = []
        region2 = []

        # Start traversing the trees
        while idx1 < len(ind1) and idx2 < len(ind2):
            # Push the nodes to the stack
            stack1.append((ind1[idx1], [], idx1))
            stack2.append((ind2[idx2], [], idx2))

            region1.append([ind1[idx1], idx1])
            region2.append([ind2[idx2], idx2])
            # Not the same arity, then traverse the subtrees
            if stack1[-1][0].arity != stack2[-1][0].arity:
                res1 = stack1[-1][0].arity
                res2 = stack2[-1][0].arity
                stack1, res1, idx1 = self.traverse_tree(stack1, res1, ind1, idx1)
                stack2, res2, idx2 = self.traverse_tree(stack2, res2, ind2, idx2)

            print(f"stack1: {stack1}\nstack2: {stack2}")
            idx1 += 1
            idx2 += 1


        for pri, idx in region1:
            print(f"{idx}: {pri.name}")

        # Select crossover point
        if len(region1) > 0:
            point = random.randint(0, len(region1) - 1)
            print(f"crossover point: {point}")
            print(f"crossover point for trees: {region1[point]}, {region2[point]}")

        # Swap subtrees
        if len(region1) > 0:
            slice1 = ind1.searchSubtree(region1[point][1])
            slice2 = ind2.searchSubtree(region2[point][1])
            ind1[slice1], ind2[slice2] = ind2[slice2], ind1[slice1]

        return ind1, ind2

    def crossover(self, ind1, ind2):
        print(f"crossover num: {self.cx_method}")
        if random.uniform(0, 1) < self.cx_pb:
            if self.cx_method == 5:
                choice = random.choice(
                    [
                        self.toolbox.cx_simple,
                        self.toolbox.cx_uniform,
                        self.toolbox.cx_fair,
                        self.toolbox.cx_one,
                    ]
                )
                try:
                    ind1, ind2 = choice( ind1, ind2)
                except:
                    pass
            #print(f"choice:{choice}")
            if self.cx_method == 1:
                try:
                    ind1, ind2 = self.toolbox.cx_simple( ind1, ind2)
                except:
                    pass
            # if self.cx_method == 2:
            #     try:
            #         ind1, ind2 = self.toolbox.cx_uniform( ind1, ind2)
            #     except:
            #         pass
            # if self.cx_method == 3:
            #     try:
            #         ind1, ind2 = self.toolbox.cx_fair( ind1, ind2)
            #     except:
            #         pass
            if self.cx_method == 4:
                try:
                    ind1, ind2 = self.toolbox.cx_one( ind1, ind2)
                except:
                    pass

        fitness_ind1 = self.toolbox.evaluate(ind1)
        fitness_ind2 = self.toolbox.evaluate(ind2)
        if fitness_ind1 <= fitness_ind2:
            return ind1
        else:
            return ind2

    def mutate(self, child):
        if random.random() < self.mut_pb:
            try:
                self.toolbox.mutate(child)
            except:
                pass
            child.fitness.values = self.toolbox.evaluate(child)
        return child

    def select(self):
        candidates = self.toolbox.select(self.pop)
        parents = candidates[0:3]
        sorted_parents = sorted(parents, key=lambda ind: ind.fitness.values) #小到大排序
        sorted_fitness = [ind.fitness.values for ind in sorted_parents]
        offspring = self.crossover(sorted_parents[1], sorted_parents[2])
        offspring = self.mutate(offspring)
        off_fit = self.toolbox.evaluate(offspring)
        if off_fit[0] >= sorted_fitness[0]:
            idx = self.pop.index(candidates[0])
            #print(self.pop[idx])
            self.pop[idx] = offspring
            #print(f"篩選後的：{self.pop[idx]}")
            #print(off_fit[0])
            self.pop[idx].fitness.values = self.toolbox.evaluate(offspring)
        return

    def evolving(self, model):
        # for g in range(self.n_gen):
        print("開始進化！")
        with open('record.csv', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["eval_count", "avg", "std", "min", "max", "best_individual"])
            while self.eval_count < 1000:
                self.select()
                if self.eval_count % 50 == 0:
                    # self.write_record(writer)
                    print(f"ＥＶＡＬ次數：{self.eval_count}")
                    record = self.stats.compile(self.pop)
                    self.hof.update(self.pop)
                    print(record)
                    print(f"最佳個體：{self.hof[0]}")


In [24]:
import fasttext
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from deap import creator, base, tools, algorithms
import operator
import deap.gp as gp
from deap.gp import PrimitiveSet, genGrow
import csv

# import math
import random
import copy
from data import get_embeddings, load_model


def protected_div(x, y):
    mask = y == 0
    safe_y = np.where(mask, 1, y)
    return np.where(mask, 1, x / safe_y)


def protected_sqrt(x):
    x = np.abs(x)
    return np.sqrt(x)


class GP:
    def __init__(self, embedding_model, pop_size, dim, cx_method, mut_pb, cx_pb, n_gen, data, embeddings, x, y):
        self.embeddings_model = embedding_model
        self.pop_size = pop_size
        self.dim = dim
        self.cx_method = cx_method
        self.mut_pb = mut_pb
        self.cx_pb = cx_pb
        self.n_gen = n_gen
        self.pop = None
        self.data = data
        self.embeddings = embeddings
        self.inputword = x
        self.realword = y
        self.eval_count = 0

    def register(self):
        # 定義算術表達式的原語集（Primitive Set）
        self.pset = gp.PrimitiveSet("MAIN", 5)
        self.pset.addPrimitive(np.add, 2)
        self.pset.addPrimitive(np.subtract, 2)
        self.pset.addPrimitive(np.multiply, 2)
        self.pset.addPrimitive(protected_div, 2)  ##確認一次ok
        # self.pset.addPrimitive(np.sqrt, 1)
        self.pset.addPrimitive(protected_sqrt, 1)
        self.pset.addPrimitive(np.square, 1)
        self.pset.renameArguments(ARG0="a", ARG1="b", ARG2="c", ARG3="d", ARG4="e")
        # print("Attributes of gp.PrimitiveSet:", dir(self.pset))
        # 創建適應度類和個體類
        creator.create("FitnessMax", base.Fitness, weights=(1,))
        creator.create(
            "Individual", gp.PrimitiveTree, fitness=creator.FitnessMax, pset=self.pset
        )  # output不算fitness
        # creator.create("Individual", gp.PrimitiveTree) #output不算fitness
        # 初始化工具箱
        self.toolbox = base.Toolbox()
        self.toolbox.register("expr", gp.genHalfAndHalf, pset=self.pset, min_=1, max_=5)
        # self.toolbox.register("individual", creator.Individual, fitness=creator.FitnessMax, expr=self.toolbox.expr) #gene_gen=toolbox.gene_gen, n_genes=n_genes
        self.toolbox.register(
            "individual", tools.initIterate, creator.Individual, self.toolbox.expr
        )  # gene_gen=toolbox.gene_gen, n_genes=n_genes
        self.toolbox.register(
            "population",
            tools.initRepeat,
            list,
            self.toolbox.individual,
            n=self.pop_size,
        )  # population數ok
        # 註冊operators
        self.toolbox.register("select", tools.selRandom, k=3)
        self.toolbox.register("cx_simple", gp.cxOnePoint)  # simple crossover
        self.toolbox.register("cx_uniform", self.cx_uniform)
        self.toolbox.register("cx_fair", self.cx_fair)
        self.toolbox.register("cx_one", self.cxOnePoint)

        self.toolbox.register(
            "mutate", gp.mutUniform, expr=self.toolbox.expr, pset=self.pset
        )
        self.toolbox.decorate(
            "mutate", gp.staticLimit(operator.attrgetter("height"), max_value=5)
        )
        self.toolbox.register("evaluate", self.evaluate)  #
        # self.toolbox.register("compile", gep.compile_, pset=self.pset)
        # 註冊record工具
        self.stats = tools.Statistics(
            key=lambda ind: ind.fitness.values
        )  #!!!ind: ind.fitness.values[0] fitness???
        self.stats.register("avg", np.mean)
        self.stats.register("std", np.std)
        self.stats.register("min", np.min)
        self.stats.register("max", np.max)
        self.hof = tools.HallOfFame(10) #hall of fame size

        # print("reg done!")

    def initialize_pop(self):
        self.register()
        # print(self.pop_size)
        self.pop = self.toolbox.population(n=self.pop_size)
        # evaluate the entire population
        fitnesses = map(self.toolbox.evaluate, self.pop)
        for ind, fit in zip(self.pop, fitnesses):
            ind.fitness.values = fit

    def subtree_height(self, tree, index):
        # """Calculate the height of the subtree starting at the given index."""
        def _height(node_index):
            node = tree[node_index]
            if node.arity == 0:  # Leaf node
                return 1
            else:
                return 1 + max(
                    _height(child_index)
                    for child_index in range(
                        node_index + 1, node_index + 1 + node.arity
                    )
                )
        return _height(index)

    def searchSubtree_idx(self, tree, begin):
        end = begin + 1
        total = tree[begin].arity
        while total > 0:
            total += tree[end].arity - 1
            end += 1
        return begin, end

    def clean_data(self, data):
        data = np.where(np.isinf(data), np.finfo(np.float32).max, data)
        data = np.nan_to_num(data, nan=0.0)
        return data

    def evaluate(self, individual):
        """Evalute the fitness of an individual"""
        # print(f"individual種類:{type(individual)}")
        func = gp.compile(individual, self.pset)
        total_similarity = 0.0
        for data_index in range(len(self.inputword)):
            words = self.inputword.iloc[data_index]
            in_vectors = [self.embeddings[word] for word in words]
            a, b, c, d, e = in_vectors[:5]
            #print(f"in_vectors: {in_vectors}")
            y = self.realword.iloc[data_index]
            #print(f"y: {y}")
            out_vector = self.embeddings[y]
            #print(f"out_vector: {out_vector}")
            #if out_vector.ndim == 3:
                #out_vector = out_vector.reshape(1, -1)

            predict = self.clean_data(func(a, b, c, d, e))
            #print(predict)
            # if isinstance(predict, tuple):
            #     predict = np.array(predict)
            #     print(predict)
            # if predict.ndim == 3:
            #     print(predict)
            #     predict = predict.reshape(1, -1)

            #print("predict after clean:", np.array(predict).shape)
            #print("Shape of out_vector:", np.array(out_vector).shape)

            similarity = cosine_similarity([predict], [out_vector])[0][0]
            total_similarity += similarity
        fitness = total_similarity / len(self.inputword)
        ftiness = self.clean_data(fitness)
        self.eval_count += 1
        return (fitness,)

    def cx_uniform(self, ind1, ind2):

        idx1 = 0
        idx2 = 0
        # To track the trees
        stack1 = []
        stack2 = []
        # Store the common region
        region1 = []
        region2 = []

        # Start traversing the trees
        while idx1 < len(ind1) and idx2 < len(ind2):
            # Push the nodes to the stack
            stack1.append((ind1[idx1], [], idx1))
            stack2.append((ind2[idx2], [], idx2))

            region1.append([ind1[idx1], idx1])
            region2.append([ind2[idx2], idx2])
            # Not the same arity, then traverse the subtrees
            if stack1[-1][0].arity != stack2[-1][0].arity:
                res1 = stack1[-1][0].arity
                res2 = stack2[-1][0].arity
                stack1, res1, idx1 = self.traverse_tree(stack1, res1, ind1, idx1)
                stack2, res2, idx2 = self.traverse_tree(stack2, res2, ind2, idx2)

            print(f"stack1: {stack1}\nstack2: {stack2}")
            idx1 += 1
            idx2 += 1


        for pri, idx in region1:
            print(f"{idx}: {pri.name}")

        # Select points for swapping
        if len(region1) > 0:
            points = np.random..choice([0, 1], size=len(region1))
            print(f"crossover point: {points}")
            print(f"crossover point for trees: {region1[point]}, {region2[point]}")
            # Swap subtrees
            ind1[region1[point][1]], ind2[region2[point][1]] = ind2[region2[point][1]], ind1[region1[point][1]

        return ind1, ind2

    def cx_fair(self, ind1, ind2):

        if len(ind1) < 2 or len(ind2) < 2:
            # No crossover on single node tree
            return ind1, ind2

        # List all available primitive types in each individual
        types1 = gp.defaultdict(list)
        types2 = gp.defaultdict(list)
        if ind1.root.ret == gp.__type__:
            # Not STGP optimization
            types1[gp.__type__] = list(range(1, len(ind1)))
            types2[gp.__type__] = list(range(1, len(ind2)))
            common_types = [gp.__type__]
        else:
            for idx, node in enumerate(ind1[1:], 1):
                types1[node.ret].append(idx)
            for idx, node in enumerate(ind2[1:], 1):
                types2[node.ret].append(idx)
            common_types = set(types1.keys()).intersection(set(types2.keys()))

        if len(common_types) > 0:
            type_ = random.choice(list(common_types))

        index1 = random.choice(types1[type_])
        height1 = self.subtree_height(ind1, index1)

        while 1:
            index2 = random.choice(types2[type_])
            height2 = self.subtree_height(ind2, index2)
            if height2 <= height1:
                break
        slice1 = ind1.searchSubtree(index1)
        slice2 = ind2.searchSubtree(index2)
        ind1[slice1], ind2[slice2] = ind2[slice2], ind1[slice1]
        return ind1, ind2


    def traverse_tree(self, stack, res, parent, idx):
        while res != 0:
            res -= 1
            idx += 1
            stack.append((parent[idx], [], idx))
            res += parent[idx].arity
        return stack, res, idx

    def cxOnePoint(self, ind1, ind2):
        #print(f"ind1: {ind1.__str__()}\n, ind2: {ind2.__str__()}")

        idx1 = 0
        idx2 = 0
        # To track the trees
        stack1 = []
        stack2 = []
        # Store the common region
        region1 = []
        region2 = []

        # Start traversing the trees
        while idx1 < len(ind1) and idx2 < len(ind2):
            # Push the nodes to the stack
            stack1.append((ind1[idx1], [], idx1))
            stack2.append((ind2[idx2], [], idx2))

            region1.append([ind1[idx1], idx1])
            region2.append([ind2[idx2], idx2])
            # Not the same arity, then traverse the subtrees
            if stack1[-1][0].arity != stack2[-1][0].arity:
                res1 = stack1[-1][0].arity
                res2 = stack2[-1][0].arity
                stack1, res1, idx1 = self.traverse_tree(stack1, res1, ind1, idx1)
                stack2, res2, idx2 = self.traverse_tree(stack2, res2, ind2, idx2)

            print(f"stack1: {stack1}\nstack2: {stack2}")
            idx1 += 1
            idx2 += 1


        for pri, idx in region1:
            print(f"{idx}: {pri.name}")

        # Select crossover point
        if len(region1) > 0:
            point = random.randint(0, len(region1) - 1)
            print(f"crossover point: {point}")
            print(f"crossover point for trees: {region1[point]}, {region2[point]}")

        # Swap subtrees
        if len(region1) > 0:
            slice1 = ind1.searchSubtree(region1[point][1])
            slice2 = ind2.searchSubtree(region2[point][1])
            ind1[slice1], ind2[slice2] = ind2[slice2], ind1[slice1]

        return ind1, ind2


    def crossover(self, ind1, ind2):
        if random.uniform(0, 1) < self.cx_pb:
            if self.cx_method == 5:
                choice = random.choice(
                    [
                        self.toolbox.cx_simple,
                        self.toolbox.cx_uniform,
                        self.toolbox.cx_fair,
                        self.toolbox.cx_one,
                    ]
                )
                try:
                    ind1, ind2 = choice( ind1, ind2)
                except:
                    pass
            #print(f"choice:{choice}")
            if self.cx_method == 1:
                try:
                    ind1, ind2 = self.toolbox.cx_simple( ind1, ind2)
                except:
                    pass
            if self.cx_method == 2:
                try:
                    ind1, ind2 = self.toolbox.cx_uniform( ind1, ind2)
                except:
                    pass
            if self.cx_method == 3:
                try:
                    ind1, ind2 = self.toolbox.cx_fair( ind1, ind2)
                except:
                    pass
            if self.cx_method == 4:
                try:
                    ind1, ind2 = self.toolbox.cx_one( ind1, ind2)
                except:
                    pass

        fitness_ind1 = self.toolbox.evaluate(ind1)
        fitness_ind2 = self.toolbox.evaluate(ind2)
        if fitness_ind1 <= fitness_ind2:
            return ind1
        else:
            return ind2

    def mutate(self, child):
        if random.random() < self.mut_pb:
            try:
                self.toolbox.mutate(child)
            except:
                pass
            child.fitness.values = self.toolbox.evaluate(child)
        return child

    def select(self):
        candidates = self.toolbox.select(self.pop)
        parents = candidates[0:3]
        sorted_parents = sorted(parents, key=lambda ind: ind.fitness.values) #小到大排序
        sorted_fitness = [ind.fitness.values for ind in sorted_parents]
        offspring = self.crossover(sorted_parents[1], sorted_parents[2])
        offspring = self.mutate(offspring)
        off_fit = self.toolbox.evaluate(offspring)
        if off_fit[0] >= sorted_fitness[0]:
            idx = self.pop.index(candidates[0])
            #print(self.pop[idx])
            self.pop[idx] = offspring
            #print(f"篩選後的：{self.pop[idx]}")
            #print(off_fit[0])
            self.pop[idx].fitness.values = self.toolbox.evaluate(offspring)
        return


    def write_record(self, writer):
        print(f"ＥＶＡＬ次數：{self.eval_count}")
        record = self.stats.compile(self.pop)
        self.hof.update(self.pop)
        print(record)
        print(f"最佳個體：{self.hof[0]}")
        # best_ind = str(self.hof[0])
        # row = [self.eval_count] + list(record.values()) + [best_ind]
        # writer.writerow(row)
        # func_best = gp.compile(self.hof[0], self.pset)
        # a, b, c, d ,e = [self.embeddings[word] for word in self.inputword.iloc[1]]
        # predict_out = func_best(a, b, c, d, e)
        # print(f"預測結果：{predict_out}")
        # if self.embeddings_model == "word2vec":
        #     outword = model.wv.most_similar(positive=[predict_out], topn=1)
        # elif self.embeddings_model == "glove":
        #     outword = model.wv.most_similar(positive=[predict_out], topn=1)
        # elif self.embeddings_model == "fasttext":
        #     outword = model.get_nearest_neighbors(predict_out, k=1)
        # print(f"預測結果：{outword}")


    def evolving(self, model):
        # for g in range(self.n_gen):
        print("開始進化！")
        with open('record.csv', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["eval_count", "avg", "std", "min", "max", "best_individual"])
            while self.eval_count < 1000:
                self.select()
                if self.eval_count % 1 == 0:
                    self.write_record(writer)

In [25]:
embeddings_model = "word2vec"
population_size = 100
dimension = 10
cx_num = 4
mut_prob = 0.1
cross_prob = 1
num_generations = 100

In [27]:
data, embeddings, model = get_embeddings(embeddings_model, dimension, 1)

x = data[0].str.split(" ").apply(lambda x: x[:5])
y = data[0].str.split(" ").str.get(5)



In [28]:
# print(x.iloc[1],y.iloc[1])
gpp = GP(model, population_size, dimension, cx_num, mut_prob, cross_prob, num_generations, data, embeddings, x, y)
gpp.initialize_pop()
gpp.evolving(model)



開始進化！
stack1: [(<deap.gp.Primitive object at 0x7f33c09e3720>, [], 0)]
stack2: [(<deap.gp.Primitive object at 0x7f33c09e3f90>, [], 0)]
stack1: [(<deap.gp.Primitive object at 0x7f33c09e3720>, [], 0), (<deap.gp.Terminal object at 0x7f33bb8aefc0>, [], 1)]
stack2: [(<deap.gp.Primitive object at 0x7f33c09e3f90>, [], 0), (<deap.gp.Primitive object at 0x7f33c09e37c0>, [], 1), (<deap.gp.Primitive object at 0x7f33c09e3720>, [], 2), (<deap.gp.Terminal object at 0x7f33bb8aeec0>, [], 3), (<deap.gp.Primitive object at 0x7f33c09e3450>, [], 4), (<deap.gp.Terminal object at 0x7f33bb8aef40>, [], 5), (<deap.gp.Terminal object at 0x7f33bb8aef40>, [], 6), (<deap.gp.Terminal object at 0x7f33bb8aee80>, [], 7)]
stack1: [(<deap.gp.Primitive object at 0x7f33c09e3720>, [], 0), (<deap.gp.Terminal object at 0x7f33bb8aefc0>, [], 1), (<deap.gp.Terminal object at 0x7f33bb8aefc0>, [], 2)]
stack2: [(<deap.gp.Primitive object at 0x7f33c09e3f90>, [], 0), (<deap.gp.Primitive object at 0x7f33c09e37c0>, [], 1), (<deap.gp.Pr