In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import optuna
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score


In [4]:
# !pip install optuna

In [5]:


# Step 1: Create a simple MLP model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# Step 2: Define the objective function for Optuna
def objective(trial):
    # Step 2.1: Generate synthetic data (replace with your actual dataset)
    X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

    # Step 2.2: Split the data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_data = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))

    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=64, shuffle=False)

    # Step 2.3: Define the model
    input_size = X.shape[1]
    hidden_size = trial.suggest_int("hidden_size", 32, 256)  # Hyperparameter to tune
    output_size = 2  # Binary classification

    model = MLP(input_size, hidden_size, output_size)

    # Step 2.4: Define optimizer and loss function
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)  # Learning rate range
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # Step 2.5: Train the model
    epochs = 5
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for batch in train_loader:
            inputs, targets = batch
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Step 2.6: Evaluate the model on the validation set
        model.eval()
        val_predictions = []
        val_labels = []
        with torch.no_grad():
            for batch in val_loader:
                inputs, targets = batch
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                val_predictions.extend(predicted.cpu().numpy())
                val_labels.extend(targets.cpu().numpy())

        # Step 2.7: Calculate validation accuracy
        accuracy = accuracy_score(val_labels, val_predictions)
        trial.report(accuracy, epoch)

        # Step 2.8: Early stopping based on validation accuracy
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy


# Step 3: Run the Optuna optimization
def optimize():
    # Step 3.1: Create a study and specify the optimization goal
    study = optuna.create_study(direction="maximize")

    # Step 3.2: Optimize the objective function
    study.optimize(objective, n_trials=20)

    # Step 3.3: Print the best parameters and best value
    print(f"Best Trial: {study.best_trial.params}")
    print(f"Best Accuracy: {study.best_value}")


In [6]:
# Step 4: Execute the optimization
optimize()

"""
Optuna优化：

创建 study 对象并指定优化目标为最大化验证准确率（direction="maximize"）。
使用 study.optimize 方法开始优化过程，进行多次超参数试验。
最后，输出最佳的超参数组合和对应的验证准确率
"""

[I 2024-12-25 14:13:46,687] A new study created in memory with name: no-name-8f012565-fb7c-43d6-a99f-16a3312a750e
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)  # Learning rate range
[I 2024-12-25 14:13:47,598] Trial 0 finished with value: 0.795 and parameters: {'hidden_size': 165, 'learning_rate': 2.2934478632163862e-05}. Best is trial 0 with value: 0.795.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)  # Learning rate range
[I 2024-12-25 14:13:47,725] Trial 1 finished with value: 0.64 and parameters: {'hidden_size': 160, 'learning_rate': 1.2778131005718543e-05}. Best is trial 0 with value: 0.795.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)  # Learning rate range
[I 2024-12-25 14:13:47,860] Trial 2 finished with value: 0.79 and parameters: {'hidden_size': 144, 'learning_rate': 2.2437153839122087e-05}. Best is trial 0 with value: 0.795.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2) 

Best Trial: {'hidden_size': 250, 'learning_rate': 0.0022569882664130085}
Best Accuracy: 0.875


'\nOptuna优化：\n\n创建 study 对象并指定优化目标为最大化验证准确率（direction="maximize"）。\n使用 study.optimize 方法开始优化过程，进行多次超参数试验。\n最后，输出最佳的超参数组合和对应的验证准确率\n'