In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 定义神经网络模型
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# 定义遗传算法的相关函数
def initialize_population(population_size, num_features):
    return torch.randint(2, (population_size, num_features), dtype=torch.float32)

def evaluate_population(population, train_data, test_data, num_features, hidden_size, output_size, epochs=10):
    fitness_scores = []
    for chromosome in population:
        selected_features = torch.nonzero(chromosome).squeeze().tolist()
        if not selected_features:
            # 防止未选择任何特征的情况
            fitness_scores.append(0)
            continue

        # 选择特征
        train_features = train_data[:, selected_features]
        test_features = test_data[:, selected_features]

        # 定义并训练模型
        if isinstance(selected_features,int):
            MLP(1,hidden_size,output_size)
        else:
            model = MLP(len(selected_features), hidden_size, output_size)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        for epoch in range(epochs):
            outputs = model(train_features)
            loss = criterion(outputs, train_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # 在测试集上评估模型性能
        with torch.no_grad():
            test_outputs = model(test_features)
            _, predicted = torch.max(test_outputs.data, 1)
            accuracy = (predicted == test_labels).sum().item() / len(test_labels)
            fitness_scores.append(accuracy)

    return np.array(fitness_scores)

# def select_parents(population, fitness_scores):
#     sorted_indices = np.argsort(fitness_scores)[::-1]
#     selected_indices = sorted_indices[:2]  # 选择适应度最高的两个个体作为父母
#     return population[selected_indices]

def select_parents(population, fitness_scores):
    # 将PyTorch张量转换为NumPy数组
    population_np = population.numpy()
    # fitness_scores_np = fitness_scores.numpy()

    sorted_indices = np.argsort(fitness_scores)[::-1]
    selected_indices = sorted_indices[:2]  # 选择适应度最高的两个个体作为父母

    # 将选定的父母转换回PyTorch张量
    selected_parents = torch.from_numpy(population_np[selected_indices])

    return selected_parents

def crossover(parents):
    crossover_point = torch.randint(1, parents.shape[1], (1,))
    child1 = torch.cat((parents[0, :crossover_point], parents[1, crossover_point:]))
    child2 = torch.cat((parents[1, :crossover_point], parents[0, crossover_point:]))
    return child1, child2

def mutate(child, mutation_rate):
    mutation_mask = (torch.rand(child.shape) < mutation_rate).float()
    child = (child + mutation_mask) % 2
    return child

# 主要的遗传算法循环
def genetic_algorithm(train_data, test_data, num_features, hidden_size, output_size, population_size=50, generations=50, mutation_rate=0.01):
    population = initialize_population(population_size, num_features)

    for generation in range(generations):
        fitness_scores = evaluate_population(population, train_data, test_data, num_features, hidden_size, output_size)
        parents = select_parents(population, fitness_scores)

        new_population = []
        for _ in range(population_size // 2):
            child1, child2 = crossover(parents)
            child1 = mutate(child1, mutation_rate)
            child2 = mutate(child2, mutation_rate)
            new_population.extend([child1, child2])

        population = torch.stack(new_population)

    best_individual_index = np.argmax(fitness_scores)
    best_individual = population[best_individual_index]

    return best_individual

# 示例用法
# 假设 train_data 和 test_data 是你的训练集和测试集
train_data = torch.randn(20000, 11)
test_data = torch.randn(10000, 11)
train_labels = torch.randint(0, 2, (20000,))
test_labels = torch.randint(0, 2, (10000,))

best_individual = genetic_algorithm(train_data, test_data, num_features=11, hidden_size=8, output_size=2)

# 输出最佳个体
print("Best Individual:", best_individual)


Best Individual: tensor([1., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0.])
