In [3]:
import numpy as np

# 定义函数生成二分类数据集
def generate_classification_dataset(size, feature_dim, mean_diff):
    # 生成均值为 mean_diff 和 -mean_diff，方差为 1 的正态分布数据
    X1 = np.random.normal(loc=mean_diff, scale=1, size=(size//2, feature_dim))
    X2 = np.random.normal(loc=-mean_diff, scale=1, size=(size//2, feature_dim))
    
    # 将两个数据集合并，并为它们的标签分别赋值为 0 和 1
    X = np.vstack((X1, X2))
    y = np.hstack((np.zeros(size//2), np.ones(size//2)))
    
    # 打乱数据集的顺序，确保数据的随机性
    idx = np.random.permutation(size)
    X = X[idx]
    y = y[idx]
    
    return X, y

# 定义数据集大小和特征维度
data_size = 10000
train_size = 10
test_size = 3000
feature_dim = 200
mean_difference = 2.0  # 均值差异，设置为 2.0

# 生成两个数据集
X_train1, y_train1 = generate_classification_dataset(train_size, feature_dim, mean_difference)
X_test1, y_test1 = generate_classification_dataset(test_size, feature_dim, mean_difference)
# 将数据集打印输出
print("Dataset 1 - Train Features shape:", X_train1.shape)
print("Dataset 1 - Train Labels shape:", y_train1.shape)
print("Dataset 1 - Test Features shape:", X_test1.shape)
print("Dataset 1 - Test Labels shape:", y_test1.shape)


Dataset 1 - Train Features shape: (10, 200)
Dataset 1 - Train Labels shape: (10,)
Dataset 1 - Test Features shape: (3000, 200)
Dataset 1 - Test Labels shape: (3000,)


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 定义前馈神经网络模型
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# 定义模型超参数
input_size = feature_dim
hidden_size = 100
output_size = 1
learning_rate = 0.01
num_epochs = 5

# 初始化模型
model = FeedforwardNN(input_size, hidden_size, output_size)

# 定义损失函数和优化器
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# 转换数据为PyTorch的张量
X_train = torch.tensor(X_train1, dtype=torch.float)
y_train = torch.tensor(y_train1, dtype=torch.float)
X_test = torch.tensor(X_test1, dtype=torch.float)
y_test = torch.tensor(y_test1, dtype=torch.float)

# 训练模型
for epoch in range(num_epochs):
    # 前向传播
    outputs = model(X_train)
    loss = criterion(outputs, y_train.unsqueeze(1))
    
    # 反向传播和优化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
#     if (epoch+1) % 10 == 0:
#         print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
        

# 使用测试集评估模型
with torch.no_grad():
    test_outputs = model(X_test)
    test_loss = criterion(test_outputs, y_test.unsqueeze(1))

#计算测试集准确率
def accuracy(predictions, labels):
    rounded_predictions = torch.round(torch.sigmoid(predictions))
#     print(rounded_predictions.view(-1).shape)
#     print("label+",labels.shape)
#     print((rounded_predictions.view(-1) == labels).shape)
    correct = (rounded_predictions.view(-1) == labels).sum().item()
    total = labels.size(0)
    return correct / total

test_accuracy = accuracy(test_outputs, y_test)
print(f'Test Loss: {test_loss.item():.4f}, Test Accuracy: {test_accuracy:.4f}')


Epoch [1/5], Loss: 0.5602
Epoch [2/5], Loss: 0.3914
Epoch [3/5], Loss: 0.2922
Epoch [4/5], Loss: 0.2293
Epoch [5/5], Loss: 0.1864
Test Loss: 0.1653, Test Accuracy: 1.0000


In [5]:
y_test.shape

torch.Size([3000])