In [1]:
import requests
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
import time
import base64
import json
import io

In [39]:
# 1. Constants
TOKEN = "REDACTED"
SEED = "REDACTED"
PORT = "REDACTED"
MAX_QUERIES = 10  # Max 10k images (10 queries × 1000 images)
NUM_SAMPLES = 2  # Average 2 queries per batch to reduce noise

In [34]:
# 2. Dataset Class with RGB Conversion
class TaskDataset(Dataset):
    def __init__(self, images, transform=None):
        self.images = images
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        if img.mode != 'RGB':
            img = img.convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img

In [36]:
# 5. Noise-Averaged Query Function
def get_averaged_representations(images, port, num_samples=NUM_SAMPLES):
    all_reps = []
    for _ in range(num_samples):
        pil_images = [transforms.ToPILImage()(img) for img in images]
        payload = encode_images(pil_images)
        response = requests.get(
            f"http://34.122.51.94:{port}/query",
            files={"file": payload},
            headers={"token": TOKEN}
        )
        if response.status_code == 200:
            reps = torch.tensor(response.json()["representations"], dtype=torch.float32)
            all_reps.append(reps)
        else:
            raise Exception(f"Query failed: {response.status_code}, {response.json()}")
        time.sleep(60)  # Rate limit
    return torch.mean(torch.stack(all_reps), dim=0)

In [23]:
# 4. Launch API & Load Data
public_dataset = torch.load("ModelStealingPub.pt", weights_only=False)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [24]:

dataset = TaskDataset(public_dataset.imgs, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [25]:
# 6. Collect Data with Noise Averaging
all_images = []
all_representations = []

for i in range(MAX_QUERIES):
    start_idx = i * 1000
    batch_images = [dataset[start_idx + j] for j in range(1000)]
    batch_tensor = torch.stack(batch_images)  # Shape: (1000, 3, 32, 32)

    # Query the same batch multiple times and average
    avg_reps = get_averaged_representations(batch_images, PORT, num_samples=NUM_SAMPLES)

    all_images.append(batch_tensor)
    all_representations.append(avg_reps)
    print(f"Collected batch {i+1}/{MAX_QUERIES}")



Collected batch 1/10
Collected batch 2/10
Collected batch 3/10
Collected batch 4/10
Collected batch 5/10
Collected batch 6/10
Collected batch 7/10
Collected batch 8/10
Collected batch 9/10
Collected batch 10/10


In [26]:
# Flatten collected data
all_images = torch.cat(all_images)
all_representations = torch.cat(all_representations)

# 7. Define Model and Loss
model = nn.Sequential(
    nn.Conv2d(3, 32, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(64 * 8 * 8, 1024)
)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [30]:
# 8. Train Loop
train_dataset = torch.utils.data.TensorDataset(all_images, all_representations)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

for epoch in range(20):  # Train for 5 epochs
    model.train()
    total_loss = 0
    for images, targets in train_loader:
        stolen_reps = model(images.float())  # Ensure float32
        loss = criterion(stolen_reps, targets.float())  # Ensure targets are float32
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 0.0014
Epoch 2, Loss: 0.0014
Epoch 3, Loss: 0.0014
Epoch 4, Loss: 0.0013
Epoch 5, Loss: 0.0013
Epoch 6, Loss: 0.0012
Epoch 7, Loss: 0.0012
Epoch 8, Loss: 0.0012
Epoch 9, Loss: 0.0011
Epoch 10, Loss: 0.0011
Epoch 11, Loss: 0.0011
Epoch 12, Loss: 0.0010
Epoch 13, Loss: 0.0010
Epoch 14, Loss: 0.0011
Epoch 15, Loss: 0.0010
Epoch 16, Loss: 0.0010
Epoch 17, Loss: 0.0009
Epoch 18, Loss: 0.0009
Epoch 19, Loss: 0.0009
Epoch 20, Loss: 0.0008


In [31]:
path = "submission.onnx"
dummy_input = torch.randn(1, 3, 32, 32)
torch.onnx.export(
    model,
    dummy_input,
    path,
    input_names=["x"],
    output_names=["output"],
    dynamic_axes={"x": {0: "batch_size"}},
    opset_version=13
)

In [32]:

# 10. Submit to Server
response = requests.post(
    "http://34.122.51.94:9090/stealing",
    files={"file": open(path, "rb")},
    headers={"token": TOKEN, "seed": SEED}
)
print(response.json())

{'L2': 5.882108211517334}
