In [None]:
!pip install torchtext==0.4.0

Collecting torchtext==0.4.0
  Downloading torchtext-0.4.0-py3-none-any.whl.metadata (5.0 kB)
Downloading torchtext-0.4.0-py3-none-any.whl (53 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchtext
Successfully installed torchtext-0.4.0


In [None]:
import torch
from torchtext import data, datasets
import random
import numpy as np

# 设置随机种子，保证可复现
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# 定义 Field
TEXT = data.Field(tokenize = 'spacy',
                  tokenizer_language = 'en_core_web_sm',
                  include_lengths = True, # 源代码用到了 include_lengths
                  pad_first=True)         # 源代码 padding 在前
LABEL = data.LabelField(dtype = torch.float)

print("Loading Data...")
# 加载 IMDB 数据
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

# 划分验证集 (注意：源代码这里 random.seed(SEED) 返回 None，实际上是用默认随机状态，
# 我们这里显式调用 random.seed 确保每次运行切分一致)
train_data, valid_data = train_data.split(random_state = random.seed(SEED))

print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

# 构建词汇表
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
LABEL.build_vocab(train_data)

print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

# 创建迭代器
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size = BATCH_SIZE,
    sort_within_batch = True, # 必须为 True 因为用到了 include_lengths
    device = device)

print("Data Prepared.")

Loading Data...
downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:01<00:00, 52.2MB/s]


Number of training examples: 17500
Number of validation examples: 7500
Number of testing examples: 25000
Unique tokens in TEXT vocabulary: 25002
Unique tokens in LABEL vocabulary: 2
Data Prepared.


In [None]:
import torch.nn as nn
import time

class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text, text_lengths):
        # text = [sent len, batch size]
        embedded = self.embedding(text)
        # embedded = [sent len, batch size, emb dim]

        # PyTorch 的 RNN 实际上不需要 text_lengths (除非用 pack_padded_sequence，源代码没用)
        # 但为了保持接口一致，我们保留参数输入
        output, hidden = self.rnn(embedded)

        # hidden = [1, batch size, hid dim]
        return self.fc(hidden.squeeze(0))

# 计算准确率
def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    return acc

# 训练函数
def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.train()

    for batch in iterator:
        optimizer.zero_grad()
        text, text_lengths = batch.text
        predictions = model(text, text_lengths).squeeze(1)
        loss = criterion(predictions, batch.label)
        acc = binary_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# 评估函数
def evaluate(model, iterator, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()

    with torch.no_grad():
        for batch in iterator:
            text, text_lengths = batch.text
            predictions = model(text, text_lengths).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc = binary_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# 计时函数
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

# 每次获取一个新模型的函数 (确保参数重置)
def get_fresh_model():
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    HIDDEN_DIM = 256
    OUTPUT_DIM = 1
    model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
    return model.to(device)

In [None]:
import torch.optim as optim
import pandas as pd

# 定义参数
N_EPOCHS = 20
optimizers_to_test = ['SGD', 'Adam', 'Adagrad']
results = []

print(f"Starting comparison with {N_EPOCHS} epochs per optimizer...")

for opt_name in optimizers_to_test:
    print(f"\n{'='*20}\nRunning Optimizer: {opt_name}\n{'='*20}")

    # 1. 初始化新模型
    model = get_fresh_model()
    criterion = nn.BCEWithLogitsLoss().to(device)

    # 2. 定义优化器
    if opt_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=1e-3)
    elif opt_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
    elif opt_name == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=1e-3)

    best_valid_loss = float('inf')

    # 用于记录最佳时刻的数据
    best_record = {}
    total_time = 0

    for epoch in range(N_EPOCHS):
        start_time = time.time()

        # 训练和验证
        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        total_time += (end_time - start_time)

        # 打印部分 Epoch 信息 (比如每隔 1 或 2 个 epoch)
        print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

        # 保存最佳模型逻辑
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            # 保存此时的模型权重
            torch.save(model.state_dict(), f'best_model_{opt_name}.pt')
            # 记录此时的所有指标
            best_record = {
                'Best Epoch': epoch + 1,
                'Train Loss': train_loss,
                'Train Acc': train_acc,
                'Val Loss': valid_loss,
                'Val Acc': valid_acc
            }

    # 3. 训练结束，加载最佳模型进行 Test
    print(f"\nLoading best model for {opt_name} to test...")
    model.load_state_dict(torch.load(f'best_model_{opt_name}.pt'))
    test_loss, test_acc = evaluate(model, test_iterator, criterion)

    # 4. 汇总结果
    results.append({
        'Optimizer': opt_name,
        'Best Epoch': best_record['Best Epoch'],
        'Train Loss': best_record['Train Loss'],
        'Train Acc (%)': best_record['Train Acc'] * 100,
        'Val Loss': best_record['Val Loss'],
        'Val Acc (%)': best_record['Val Acc'] * 100,
        'Test Loss': test_loss,
        'Test Acc (%)': test_acc * 100,
        'Avg Time/Epoch (s)': total_time / N_EPOCHS
    })

print("\nAll experiments finished.")

Starting comparison with 20 epochs per optimizer...

Running Optimizer: SGD
Epoch: 01 | Epoch Time: 0m 3s
	Train Loss: 0.696 | Train Acc: 50.30%
	 Val. Loss: 0.693 |  Val. Acc: 50.59%
Epoch: 02 | Epoch Time: 0m 3s
	Train Loss: 0.693 | Train Acc: 51.89%
	 Val. Loss: 0.692 |  Val. Acc: 52.11%
Epoch: 03 | Epoch Time: 0m 3s
	Train Loss: 0.691 | Train Acc: 52.44%
	 Val. Loss: 0.690 |  Val. Acc: 53.27%
Epoch: 04 | Epoch Time: 0m 3s
	Train Loss: 0.690 | Train Acc: 53.07%
	 Val. Loss: 0.689 |  Val. Acc: 53.45%
Epoch: 05 | Epoch Time: 0m 3s
	Train Loss: 0.689 | Train Acc: 53.55%
	 Val. Loss: 0.688 |  Val. Acc: 53.69%
Epoch: 06 | Epoch Time: 0m 3s
	Train Loss: 0.689 | Train Acc: 53.60%
	 Val. Loss: 0.688 |  Val. Acc: 53.98%
Epoch: 07 | Epoch Time: 0m 3s
	Train Loss: 0.688 | Train Acc: 54.03%
	 Val. Loss: 0.687 |  Val. Acc: 54.07%
Epoch: 08 | Epoch Time: 0m 3s
	Train Loss: 0.688 | Train Acc: 53.95%
	 Val. Loss: 0.687 |  Val. Acc: 54.10%
Epoch: 09 | Epoch Time: 0m 3s
	Train Loss: 0.687 | Train Acc

In [None]:
# 创建 DataFrame
df = pd.DataFrame(results)

# 设置显示格式，保留 4 位小数
pd.options.display.float_format = '{:,.2f}'.format

print("\n=== Final Experiment Results ===")
display(df)



=== Final Experiment Results ===


Unnamed: 0,Optimizer,Best Epoch,Train Loss,Train Acc (%),Val Loss,Val Acc (%),Test Loss,Test Acc (%),Avg Time/Epoch (s)
0,SGD,20,0.68,56.47,0.68,55.76,0.68,55.03,3.59
1,Adam,6,0.54,72.54,0.63,66.03,0.67,63.08,3.78
2,Adagrad,20,0.53,74.63,0.56,71.76,0.56,71.48,3.7


In [None]:
import torch.optim as optim
import pandas as pd
import time

# --- 这里单独定义 Adam 的配置 ---
opt_name = 'Adam'
LEARNING_RATE = 1e-3
N_EPOCHS = 20

print(f"Retrying {opt_name} with Learning Rate {LEARNING_RATE}...")

# 1. 初始化新模型 (使用之前定义的函数)
# 确保你之前的 cell 运行过，内存里有 get_fresh_model 和 device
model = get_fresh_model()
criterion = nn.BCEWithLogitsLoss().to(device)

# 2. 定义优化器 (使用新的学习率)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

best_valid_loss = float('inf')
best_record = {}
total_time = 0

# 3. 训练循环
for epoch in range(N_EPOCHS):
    start_time = time.time()

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

    end_time = time.time()
    total_time += (end_time - start_time)

    # 打印 log 方便观察
    print(f'Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')

    # 保存最佳模型
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), f'best_model_{opt_name}_fixed.pt')
        best_record = {
            'Best Epoch': epoch + 1,
            'Train Loss': train_loss,
            'Train Acc': train_acc,
            'Val Loss': valid_loss,
            'Val Acc': valid_acc
        }

# 4. 测试
print(f"\nLoading best model for {opt_name} to test...")
model.load_state_dict(torch.load(f'best_model_{opt_name}_fixed.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion)

# 5. 展示结果
adam_result = {
    'Optimizer': opt_name,
    'Best Epoch': best_record['Best Epoch'],
    'Train Loss': best_record['Train Loss'],
    'Train Acc (%)': best_record['Train Acc'] * 100,
    'Val Loss': best_record['Val Loss'],
    'Val Acc (%)': best_record['Val Acc'] * 100,
    'Test Loss': test_loss,
    'Test Acc (%)': test_acc * 100,
    'Avg Time/Epoch (s)': total_time / N_EPOCHS
}

print("\nNew Adam Result:")
display(pd.DataFrame([adam_result]))

Retrying Adam with Learning Rate 0.001...
Epoch: 01 | Train Loss: 0.682 | Val. Loss: 0.675 | Val. Acc: 56.89%
Epoch: 02 | Train Loss: 0.671 | Val. Loss: 0.703 | Val. Acc: 51.08%
Epoch: 03 | Train Loss: 0.680 | Val. Loss: 0.666 | Val. Acc: 58.81%
Epoch: 04 | Train Loss: 0.603 | Val. Loss: 0.630 | Val. Acc: 65.00%
Epoch: 05 | Train Loss: 0.599 | Val. Loss: 0.607 | Val. Acc: 66.46%
Epoch: 06 | Train Loss: 0.591 | Val. Loss: 0.685 | Val. Acc: 55.74%
Epoch: 07 | Train Loss: 0.678 | Val. Loss: 0.675 | Val. Acc: 56.77%
Epoch: 08 | Train Loss: 0.673 | Val. Loss: 0.660 | Val. Acc: 60.69%
Epoch: 09 | Train Loss: 0.620 | Val. Loss: 0.620 | Val. Acc: 65.02%
Epoch: 10 | Train Loss: 0.507 | Val. Loss: 0.559 | Val. Acc: 70.86%
Epoch: 11 | Train Loss: 0.448 | Val. Loss: 0.568 | Val. Acc: 72.75%
Epoch: 12 | Train Loss: 0.424 | Val. Loss: 0.542 | Val. Acc: 72.77%
Epoch: 13 | Train Loss: 0.378 | Val. Loss: 0.595 | Val. Acc: 75.64%
Epoch: 14 | Train Loss: 0.364 | Val. Loss: 0.564 | Val. Acc: 73.26%
Epoch:

Unnamed: 0,Optimizer,Best Epoch,Train Loss,Train Acc (%),Val Loss,Val Acc (%),Test Loss,Test Acc (%),Avg Time/Epoch (s)
0,Adam,12,0.424177,81.659769,0.542477,72.77101,0.550962,72.081202,3.697536


新的运行的adam如上。因为想要搞到好一点的best epoch


新的运行adagrad

In [None]:
import torch.optim as optim
import pandas as pd
import time

# --- 单独定义 Adagrad 的配置 ---
opt_name = 'Adagrad'
LEARNING_RATE = 1e-3  # 保持和之前一致的标准学习率
N_EPOCHS = 20

print(f"Re-running {opt_name} experiment...")

# 1. 初始化新模型 (使用之前定义的函数，确保环境纯净)
# 确保内存里还有 get_fresh_model, train, evaluate 等函数
model = get_fresh_model()
criterion = nn.BCEWithLogitsLoss().to(device)

# 2. 定义 Adagrad 优化器
optimizer = optim.Adagrad(model.parameters(), lr=LEARNING_RATE)

best_valid_loss = float('inf')
best_record = {}
total_time = 0

# 用于记录 Epoch 5, 10, 15, 20 的数据 (方便填 Table 2)
epoch_checkpoints = {}

# 3. 训练循环
for epoch in range(N_EPOCHS):
    start_time = time.time()

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

    end_time = time.time()
    total_time += (end_time - start_time)

    # 打印 Log
    print(f'Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')

    # 自动记录关键 Epoch 的数据 (用于 Table 2)
    if (epoch + 1) in [5, 10, 15, 20]:
        epoch_checkpoints[epoch + 1] = valid_acc * 100

    # 保存最佳模型 (用于 Table 1)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), f'best_model_{opt_name}_rerun.pt')
        best_record = {
            'Best Epoch': epoch + 1,
            'Train Loss': train_loss,
            'Train Acc': train_acc,
            'Val Loss': valid_loss,
            'Val Acc': valid_acc
        }

# 4. 测试 (读取最佳模型)
print(f"\nLoading best model for {opt_name} to test...")
model.load_state_dict(torch.load(f'best_model_{opt_name}_rerun.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion)

# 5. 展示结果
adagrad_result = {
    'Optimizer': opt_name,
    'Best Epoch': best_record['Best Epoch'],
    'Train Loss': best_record['Train Loss'],
    'Train Acc (%)': best_record['Train Acc'] * 100,
    'Val Loss': best_record['Val Loss'],
    'Val Acc (%)': best_record['Val Acc'] * 100,
    'Test Loss': test_loss,
    'Test Acc (%)': test_acc * 100,
    'Avg Time/Epoch (s)': total_time / N_EPOCHS
}

print("\n=== New Adagrad Result (Table 1) ===")
display(pd.DataFrame([adagrad_result]))

print("\n=== Checkpoints for Table 2 ===")
print(epoch_checkpoints)

Re-running Adagrad experiment...
Epoch: 01 | Train Loss: 0.677 | Val. Loss: 0.844 | Val. Acc: 50.20%
Epoch: 02 | Train Loss: 0.633 | Val. Loss: 0.618 | Val. Acc: 66.14%
Epoch: 03 | Train Loss: 0.610 | Val. Loss: 0.614 | Val. Acc: 66.51%
Epoch: 04 | Train Loss: 0.605 | Val. Loss: 0.650 | Val. Acc: 61.75%
Epoch: 05 | Train Loss: 0.614 | Val. Loss: 0.600 | Val. Acc: 68.21%
Epoch: 06 | Train Loss: 0.593 | Val. Loss: 0.593 | Val. Acc: 69.29%
Epoch: 07 | Train Loss: 0.580 | Val. Loss: 0.588 | Val. Acc: 69.59%
Epoch: 08 | Train Loss: 0.573 | Val. Loss: 0.584 | Val. Acc: 69.65%
Epoch: 09 | Train Loss: 0.571 | Val. Loss: 0.579 | Val. Acc: 69.85%
Epoch: 10 | Train Loss: 0.564 | Val. Loss: 0.575 | Val. Acc: 70.63%
Epoch: 11 | Train Loss: 0.560 | Val. Loss: 0.570 | Val. Acc: 71.00%
Epoch: 12 | Train Loss: 0.555 | Val. Loss: 0.571 | Val. Acc: 70.97%
Epoch: 13 | Train Loss: 0.558 | Val. Loss: 0.579 | Val. Acc: 70.30%
Epoch: 14 | Train Loss: 0.548 | Val. Loss: 0.566 | Val. Acc: 71.46%
Epoch: 15 | Tra

Unnamed: 0,Optimizer,Best Epoch,Train Loss,Train Acc (%),Val Loss,Val Acc (%),Test Loss,Test Acc (%),Avg Time/Epoch (s)
0,Adagrad,20,0.541213,73.454608,0.555671,72.303143,0.564365,71.720748,3.619664



=== Checkpoints for Table 2 ===
{5: 68.20709745762711, 10: 70.6302966101695, 15: 70.4758121805676, 20: 72.30314268904218}


In [None]:
# ... (前面的 Setup 和 get_model 函数保持不变) ...

optimizers_to_test = ['SGD', 'Adam', 'Adagrad']
results = []

N_EPOCHS = 20

print(f"Starting experiments with {N_EPOCHS} epochs each...")

for opt_name in optimizers_to_test:
    print(f"\nTraining with Optimizer: {opt_name}...")

    # 1. 重置模型
    model = get_model()
    criterion = nn.BCEWithLogitsLoss().to(device)

    # 2. 设置优化器
    if opt_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=1e-3)
    elif opt_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
    elif opt_name == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=1e-3)

    best_valid_loss = float('inf')

    # 用于临时存储最佳Val时刻对应的Train指标
    best_epoch_train_loss = 0
    best_epoch_train_acc = 0
    best_epoch_idx = 0

    total_train_time = 0

    # 3. 训练循环
    for epoch in range(N_EPOCHS):
        start_time = time.time()

        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

        end_time = time.time()
        total_train_time += (end_time - start_time)

        # 核心逻辑：只在 Validation Loss 变好的时候记录一切
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_epoch_idx = epoch + 1
            # 记录此时的 Train 指标
            best_epoch_train_loss = train_loss
            best_epoch_train_acc = train_acc
            # 保存此时的模型参数，以便稍后测试
            torch.save(model.state_dict(), f'best_model_{opt_name}.pt')

        # 打印过程 (可选)
        # print(f'Epoch: {epoch+1:02} | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')

    # 4. 循环结束，加载最佳模型跑 Test Set (只跑一次)
    model.load_state_dict(torch.load(f'best_model_{opt_name}.pt'))
    test_loss, test_acc = evaluate(model, test_iterator, criterion)

    # 5. 存入结果
    results.append({
        'Optimizer': opt_name,
        'Best Epoch': best_epoch_idx,
        'Train Loss': best_epoch_train_loss,
        'Train Acc (%)': best_epoch_train_acc * 100,
        'Val Loss': best_valid_loss,
        'Val Acc (%)': best_valid_acc * 100,
        'Test Loss': test_loss,
        'Test Acc (%)': test_acc * 100,
        'Avg Time/Epoch (s)': total_train_time / N_EPOCHS
    })

# 生成表格
df_results = pd.DataFrame(results)
pd.options.display.float_format = '{:,.4f}'.format
print("\nSummary of Task 2 Experiments:")
display(df_results)

Starting experiments with 20 epochs each...

Training with Optimizer: SGD...

Training with Optimizer: Adam...

Training with Optimizer: Adagrad...

Summary of Task 2 Experiments:


Unnamed: 0,Optimizer,Best Epoch,Train Loss,Train Acc (%),Val Loss,Val Acc (%),Test Loss,Test Acc (%),Avg Time/Epoch (s)
0,SGD,19,0.6925,51.3409,0.6926,69.1472,0.6941,50.4588,3.666
1,Adam,18,0.6918,52.813,0.6901,69.1472,0.6853,54.5444,3.7635
2,Adagrad,20,0.5876,70.2595,0.6108,69.1472,0.6115,67.7653,3.7004
