In [99]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

In [101]:
batch_size = 128
NUM_EPOCH = 300

In [107]:
class OttoDataset(Dataset):
    def __init__(self, is_train_set=True, feature_means=None, feature_stds=None):
        self.is_train_set = is_train_set
        filepath = 'otto_train.csv.zip' if is_train_set else 'otto_test.csv.zip'
        self.df = pd.read_csv(filepath, compression='zip')

        if not is_train_set:
            self.id = self.df['id'].copy()


        self.feature_means = feature_means
        self.feature_stds = feature_stds
        
        self._preprocess()

        features_df = self.df.drop('target', axis=1) if 'target' in self.df.columns else self.df
        self.features = torch.tensor(features_df.values, dtype=torch.float32)
        self.labels = torch.tensor(self.df['target'].values, dtype=torch.long) if 'target' in self.df.columns else torch.zeros(len(self.df), dtype=torch.long)
        self.len = len(self.df)

    def _preprocess(self):
        self.df = self.df.drop('id', axis=1)
        if 'target' not in self.df.columns:
            self.df['target'] = -1
        if self.is_train_set and 'target' in self.df.columns:
            self.df['target'] = self.df['target'].map({'Class_1': 0, 
                                                       'Class_2': 1, 
                                                       'Class_3': 2, 
                                                       'Class_4': 3, 
                                                       'Class_5': 4, 
                                                       'Class_6': 5, 
                                                       'Class_7': 6, 
                                                       'Class_8': 7, 
                                                       'Class_9': 8})

        if 'target' in self.df.columns:
            features = self.df.drop('target', axis=1)
            targets = self.df['target']
        else:
            features = self.df.copy()
            targets = None

        if self.is_train_set:
            self.feature_means = features.mean(axis=0)
            self.feature_stds = features.std(axis=0)
            self.feature_stds = self.feature_stds.replace(0, 1)

        elif self.feature_means is None or self.feature_stds is None:
            raise ValueError("Test set requires precomputed feature means/stds")

        features = (features - self.feature_means) / (self.feature_stds + 1e-8)

        if targets is not None:
            self.df = pd.concat([features, targets], axis=1)
        else:
            self.df = features

    

    def __getitem__(self, index):
        return self.features[index], self.labels[index]

    def __len__(self):
        return self.len

In [115]:
train_set = OttoDataset(is_train_set=True)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)


test_set = OttoDataset(
    is_train_set=False,
    feature_means=train_set.feature_means,
    feature_stds=train_set.feature_stds
)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [117]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = torch.nn.Linear(93, 256)
        self.fc2 = torch.nn.Linear(256, 128)
        self.fc3 = torch.nn.Linear(128, 128)
        self.fc4 = torch.nn.Linear(128, 64)
        self.fc5 = torch.nn.Linear(64, 32)
        self.fc6 = torch.nn.Linear(32, 9)
        self.bn1 = torch.nn.BatchNorm1d(256)
        self.bn2 = torch.nn.BatchNorm1d(128)
        self.bn3 = torch.nn.BatchNorm1d(128)
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(0.3)

    def forward(self, x):
        x = x.view(-1, 93)
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.relu(self.fc4(x))
        x = self.dropout(x)
        x = self.relu(self.fc5(x))
        x = self.fc6(x)
        return x

model = Model()

In [122]:
criterion = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)


best_accuracy = 0
no_improve_count = 0
patience = 10

In [126]:
def train(epoch):
    model.train()
    running_loss = 0.0
    total_batches = len(train_loader)
    
    for batch_idx, (inputs, labels) in enumerate(train_loader, 0):
        optimizer.zero_grad()
        
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()

        running_loss += loss.item()
        if batch_idx % 100 == 99:
            avg_loss = running_loss / 100
            current_lr = optimizer.param_groups[0]['lr']
            print(f'Epoch [{epoch+1}], Batch [{batch_idx+1}/{total_batches}], Loss: {avg_loss:.4f}, LR: {current_lr:.6f}')
            running_loss = 0.0
            
    return running_loss / len(train_loader) if len(train_loader) > 0 else 0.0


def evaluate(): # 在训练集上评估准确率
    model.eval()
    correct = 0
    total = 0
    eval_loss = 0.0

    with torch.no_grad():
        for inputs, labels in train_loader:
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    accuracy = 100 * correct / total
    avg_loss = eval_loss / len(train_loader)
    print(f'Evaluation Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')
    return accuracy, avg_loss

train_losses = []
val_accuracies = []

for epoch in range(NUM_EPOCH):
    epoch_loss = train(epoch)
    train_losses.append(epoch_loss)

    accuracy, eval_loss = evaluate()
    val_accuracies.append(train_accuracy)

    scheduler.step(accuracy)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        no_improve_count = 0
        torch.save(model.state_dict(), 'best_model.pth')
        print(f'New best model saved with accuracy: {best_accuracy:.2f}%')
    else:
        no_improve_count += 1
        print(f'No improvement for {no_improve_count}/{patience} epochs')

        if no_improve_count >= patience:
            print(f'Early stopping at epoch {epoch+1}')
            break
            
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{NUM_EPOCH}], Train Loss: {epoch_loss:.4f}, Acc: {train_accuracy: .2f}%')

print('Finished Training')

Epoch [1], Batch [100/484], Loss: 1.3803, LR: 0.001000
Epoch [1], Batch [200/484], Loss: 0.9031, LR: 0.001000
Epoch [1], Batch [300/484], Loss: 0.7976, LR: 0.001000
Epoch [1], Batch [400/484], Loss: 0.7638, LR: 0.001000
Evaluation Loss: 0.6269, Accuracy: 75.31%
New best model saved with accuracy: 75.31%
Epoch [2], Batch [100/484], Loss: 0.7031, LR: 0.001000
Epoch [2], Batch [200/484], Loss: 0.6996, LR: 0.001000
Epoch [2], Batch [300/484], Loss: 0.6785, LR: 0.001000
Epoch [2], Batch [400/484], Loss: 0.6876, LR: 0.001000
Evaluation Loss: 0.5749, Accuracy: 77.33%
New best model saved with accuracy: 77.33%
Epoch [3], Batch [100/484], Loss: 0.6429, LR: 0.001000
Epoch [3], Batch [200/484], Loss: 0.6598, LR: 0.001000
Epoch [3], Batch [300/484], Loss: 0.6521, LR: 0.001000
Epoch [3], Batch [400/484], Loss: 0.6471, LR: 0.001000
Evaluation Loss: 0.5557, Accuracy: 78.44%
New best model saved with accuracy: 78.44%
Epoch [4], Batch [100/484], Loss: 0.6533, LR: 0.001000
Epoch [4], Batch [200/484], Lo

In [136]:
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

all_predictions = []
test_id = test_set.id.tolist() if hasattr(test_set, 'id') else []

with torch.no_grad():
    for inputs, _ in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        all_predictions.append(predicted)

all_predictions = torch.cat(all_predictions, dim=0).numpy()


In [138]:
class_columns = [f'Class_{i}' for i in range(1, 10)]
one_hot_predictions = np.zeros((len(all_predictions), len(class_columns)))
one_hot_predictions[np.arange(len(all_predictions)), all_predictions] = 1

submission = pd.DataFrame(one_hot_predictions, columns=class_columns)
submission.insert(0, 'id', test_id[:len(all_predictions)])

In [140]:
submission.to_csv('otto_submission.csv', index=False)
print('Submission file created')

Submission file created
