In [4]:
%pip install torch

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-2.2.1-cp310-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting filelock (from torch)
  Downloading filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)
Collecting sympy (from torch)
  Downloading sympy-1.12-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Downloading networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)
Collecting jinja2 (from torch)
  Downloading Jinja2-3.1.3-py3-none-any.whl.metadata (3.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.3.0-py3-none-any.whl.metadata (6.8 kB)
Collecting mpmath>=0.19 (from sympy->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.2.1-cp310-none-macosx_11_0_arm64.whl (59.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.7/59.7 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading filelock-3.13.1-py3-none-any

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from scipy import stats
import multiprocessing
from tensorflow.keras.datasets import mnist

In [3]:
# Define the SVM model using PyTorch
class SVM(nn.Module):
    def __init__(self):
        super(SVM, self).__init__()
        self.linear = nn.Linear(28 * 28, 10)  # 28x28 input, 10 classes

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        return self.linear(x)

# Initialize the SVM model
svm_model = SVM()

# Define optimizer and loss function
optimizer = optim.SGD(svm_model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [4]:
# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [5]:

# Convert data to PyTorch tensors
train_images_tensor = torch.from_numpy(train_images).float()
train_labels_tensor = torch.from_numpy(train_labels).long()
test_images_tensor = torch.from_numpy(test_images).float()
test_labels_tensor = torch.from_numpy(test_labels).long()


In [6]:

# Create DataLoader for training data
train_dataset = TensorDataset(train_images_tensor, train_labels_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


In [10]:

# Train the SVM model
def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()


In [11]:

# Train the model
train_model(svm_model, criterion, optimizer, train_loader)


In [21]:

# Test the model
def test_model(model, test_images, test_labels):
    outputs = model(test_images)
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == test_labels).sum().item()
    accuracy = correct / test_labels.size(0)
    return accuracy

# Calculate accuracy on the original test set
accuracy = test_model(svm_model, test_images_tensor.view(-1, 28 * 28), test_labels_tensor)
print("Accuracy:", accuracy)


Accuracy: 0.8618


In [32]:
# Number of permutation test iterations
n_iterations = 20

# List to store accuracies from permutation tests
permutation_test_accuracies = []

for i in range(n_iterations):
    print(i)
    # Permute labels of the training set
    permuted_train_labels = np.random.permutation(train_labels)
    permuted_train_labels_tensor = torch.from_numpy(permuted_train_labels).long()
    # Create DataLoader for training data

    permuted_train_dataset = TensorDataset(train_images_tensor, permuted_train_labels_tensor)
    permuted_train_loader = DataLoader(permuted_train_dataset, batch_size=64, shuffle=True)
    
    # Retrain SVM model on permuted training labels
    train_model(svm_model, criterion, optimizer, permuted_train_loader)
    
    # Calculate accuracy on the original test set
    accuracy = test_model(svm_model, test_images_tensor.view(-1, 28 * 28), test_labels_tensor)
    permutation_test_accuracies.append(accuracy)
    print("Permutation test accuracy:", accuracy)

# Observed accuracy from the original model
train_model(svm_model, criterion, optimizer, train_loader)

observed_accuracy = test_model(svm_model, test_images_tensor.view(-1, 28 * 28), test_labels_tensor)

# Calculate the p-value
p_value = (np.sum(np.array(permutation_test_accuracies) >= observed_accuracy) + 1) / (n_iterations + 1)

# Compute the z-score corresponding to the significance level (alpha)
alpha = 0.05
z_score = stats.norm.ppf(1 - alpha / 2)

# Compute the standard error of the permutation test accuracies
std_error = np.std(permutation_test_accuracies) / np.sqrt(n_iterations)

# Compute the margin of error
margin_of_error = z_score * std_error

# Calculate the lower and upper bounds of the confidence interval
lower_bound = observed_accuracy - margin_of_error
upper_bound = observed_accuracy + margin_of_error

print("p-value:", p_value)
print("95% Confidence Interval:", (lower_bound, upper_bound))

0
Permutation test accuracy: 0.0793
1
Permutation test accuracy: 0.1585
2
Permutation test accuracy: 0.0281
3
Permutation test accuracy: 0.1732
4
Permutation test accuracy: 0.1025
5
Permutation test accuracy: 0.0918
6
Permutation test accuracy: 0.13
7
Permutation test accuracy: 0.0852
8
Permutation test accuracy: 0.1032
9
Permutation test accuracy: 0.0884
10
Permutation test accuracy: 0.1314
11
Permutation test accuracy: 0.1219
12
Permutation test accuracy: 0.0871
13
Permutation test accuracy: 0.1017
14
Permutation test accuracy: 0.1145
15
Permutation test accuracy: 0.0899
16
Permutation test accuracy: 0.0712
17
Permutation test accuracy: 0.0938
18
Permutation test accuracy: 0.1382
19
Permutation test accuracy: 0.0402
p-value: 0.047619047619047616
95% Confidence Interval: (0.8574704786112861, 0.887529521388714)
