In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

# 生成模拟数据
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_classes=2, random_state=42)
X_train, X_pool, y_train, y_pool = train_test_split(X, y, test_size=0.9, random_state=42)

# 转换为PyTorch张量
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train)
X_pool_tensor = torch.FloatTensor(X_pool)
y_pool_tensor = torch.LongTensor(y_pool)

# 定义简单的神经网络
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# 初始化神经网络和优化器
input_size = X_train.shape[1]
hidden_size = 10
output_size = 2
model = SimpleNN(input_size, hidden_size, output_size)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# 定义数据集类
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# 初始化训练数据集和池子数据集
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
pool_dataset = CustomDataset(X_pool_tensor, y_pool_tensor)

# 定义主动学习循环
num_iterations = 80
batch_size = 32

for i in range(num_iterations):
    # 训练模型
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # 在池子数据中选择不确定性最大的样本
    with torch.no_grad():
        pool_loader = DataLoader(pool_dataset, batch_size=batch_size, shuffle=False)
        uncertainty_scores = []
        for inputs, _ in pool_loader:
            outputs = model(inputs)
            uncertainty = -torch.max(nn.functional.softmax(outputs, dim=1), dim=1)[0]
            uncertainty_scores.extend(uncertainty.numpy())

        # 选择不确定性最大的样本
        selected_indices = np.argsort(uncertainty_scores)[:batch_size]

    # 将选择的样本添加到训练数据集中，并从池子中移除
    for idx in selected_indices:
        if idx < len(pool_dataset):
            train_dataset.features = torch.cat([train_dataset.features, pool_dataset.features[idx].unsqueeze(0)], dim=0)
            train_dataset.labels = torch.cat([train_dataset.labels, pool_dataset.labels[idx].unsqueeze(0)], dim=0)

            pool_dataset.features = torch.cat([pool_dataset.features[:idx], pool_dataset.features[idx + 1:]], dim=0)
            pool_dataset.labels = torch.cat([pool_dataset.labels[:idx], pool_dataset.labels[idx + 1:]], dim=0)

    # 打印每次迭代的准确性
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    all_preds = []
    all_labels = []
    for inputs, labels in train_loader:
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    print(f"Iteration {i + 1}/{num_iterations}, Accuracy: {accuracy:.4f}")

# 最终测试模型的性能
test_dataset = CustomDataset(X_pool_tensor, y_pool_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

test_accuracy = accuracy_score(all_labels, all_preds)
print(f"Final Test Accuracy: {test_accuracy:.4f}")


Iteration 1/80, Accuracy: 0.3817
Iteration 2/80, Accuracy: 0.3951
Iteration 3/80, Accuracy: 0.4093
Iteration 4/80, Accuracy: 0.4574
Iteration 5/80, Accuracy: 0.4643


Iteration 6/80, Accuracy: 0.5248
Iteration 7/80, Accuracy: 0.5495
Iteration 8/80, Accuracy: 0.5814
Iteration 9/80, Accuracy: 0.6356
Iteration 10/80, Accuracy: 0.6691
Iteration 11/80, Accuracy: 0.7107
Iteration 12/80, Accuracy: 0.7489
Iteration 13/80, Accuracy: 0.7725
Iteration 14/80, Accuracy: 0.8049
Iteration 15/80, Accuracy: 0.8195
Iteration 16/80, Accuracy: 0.8509
Iteration 17/80, Accuracy: 0.8615
Iteration 18/80, Accuracy: 0.8832
Iteration 19/80, Accuracy: 0.8871
Iteration 20/80, Accuracy: 0.8864
Iteration 21/80, Accuracy: 0.8977
Iteration 22/80, Accuracy: 0.8951
Iteration 23/80, Accuracy: 0.9027
Iteration 24/80, Accuracy: 0.8989
Iteration 25/80, Accuracy: 0.9068
Iteration 26/80, Accuracy: 0.9097
Iteration 27/80, Accuracy: 0.9102
Iteration 28/80, Accuracy: 0.9117
Iteration 29/80, Accuracy: 0.9138
Iteration 30/80, Accuracy: 0.9143
Iteration 31/80, Accuracy: 0.9149
Iteration 32/80, Accuracy: 0.9130
Iteration 33/80, Accuracy: 0.9130
Iteration 34/80, Accuracy: 0.9150
Iteration 35/80, A