# Installing Necessary Libraries

In [None]:
!pip install onnx -q
!pip install onnxruntime -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m109.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m108.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m490.8 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h

# Configuration and Remote Embedding Extraction from Victim API

In [None]:
import requests, torch, json, io, base64, pickle, os, time
import numpy as np
from PIL import Image

# Configuration
TOKEN = "93145372"
SEED = "53027020"
PORT = "9935"
BATCH_SIZE = 1000
SAVE_DIR = "embedding_batches"

# Load dataset
# Load the image dataset stored in a PyTorch .pt file format.
# Assumes dataset.imgs is a list of PIL Images.
dataset = torch.load("ModelStealingPub.pt", weights_only=False)
os.makedirs(SAVE_DIR, exist_ok=True)
print(f"Loaded dataset with {len(dataset.imgs)} images.")

# Victim API Query Function
# Sends a batch of base64-encoded PNG images to the model API via HTTP GET.
# Returns the corresponding output embeddings (representations).
def model_stealing(images, port):
    url = f"http://34.122.51.94:{port}/query"  # API endpoint
    image_data = []

    # Convert each image to base64 after saving as PNG in-memory
    for img in images:
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='PNG')
        img_byte_arr.seek(0)
        img_base64 = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
        image_data.append(img_base64)

    # Send the encoded image list as a JSON string
    r = requests.get(url, files={"file": json.dumps(image_data)}, headers={"token": TOKEN})

    # Parse and return output if successful
    if r.status_code == 200:
        return r.json()["representations"]
    else:
        # Raise an error if the API returns a failure
        raise RuntimeError(f"Query failed: {r.status_code} - {r.text}")

# Query dataset in 1000-image batches
# Iterate over the dataset and query the victim model in chunks of BATCH_SIZE.

total_images = len(dataset.imgs)
num_batches = (total_images + BATCH_SIZE - 1) // BATCH_SIZE  # Calculate total number of batches

for i in range(num_batches):
    path = os.path.join(SAVE_DIR, f"embeddings_batch_{i}.pickle")

    # Skip already processed batches to avoid redundant work
    if os.path.exists(path):
        print(f"Skipping batch {i}, already saved.")
        continue

    start = i * BATCH_SIZE
    end = min(start + BATCH_SIZE, total_images)
    print(f"Querying batch {i+1}/{num_batches} [{start}:{end}]...")

    batch_imgs = dataset.imgs[start:end]

    # Try querying the victim model and handle potential API errors
    try:
        batch_embeddings = model_stealing(batch_imgs, PORT)
    except RuntimeError as e:
        print(f"Error for batch {i}: {e}")
        print("Sleeping 65 seconds before retrying batch...")
        time.sleep(65)  # Wait before retrying to avoid rate-limiting
        continue

    # Save the embeddings to a local file using pickle
    with open(path, "wb") as f:
        pickle.dump((list(range(start, end)), batch_embeddings), f)
    print(f"Saved batch {i} to {path}")

    # Delay between batches to prevent hitting the server too quickly
    print("Waiting 65 seconds before next query to avoid rate limit...")
    time.sleep(65)

# Training an Encoder to Mimic API Embedding

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import onnxruntime as ort
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet18
import pickle, os

# --- Custom Dataset class for images and target embeddings ---
class DatasetWithTargets(Dataset):
    def __init__(self, imgs, embeddings, transform=None):
        self.imgs = imgs
        self.embeddings = embeddings
        self.transform = transform

    def __getitem__(self, idx):
        img = self.transform(self.imgs[idx]) if self.transform else self.imgs[idx]
        return img, torch.tensor(self.embeddings[idx], dtype=torch.float32)

    def __len__(self): return len(self.imgs)

# --- Modified ResNet18 to output 1024-dimensional embeddings ---
class ResNet18Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        base = resnet18(weights=None)  # Load ResNet18 without pretrained weights
        base.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)  # Modify first conv layer for smaller images
        base.maxpool = nn.Identity()  # Remove max pooling to preserve spatial dimensions
        self.backbone = nn.Sequential(*list(base.children())[:-1])  # Use all layers except the final FC
        self.head = nn.Sequential(nn.Flatten(), nn.Linear(512, 1024))  # Project to 1024-dimensional space

    def forward(self, x):
        return self.head(self.backbone(x))

# --- Hybrid Loss: Combines Cosine Similarity and L2 Distance ---
class CosineL2Loss(nn.Module):
    def __init__(self, alpha=0.5):
        super().__init__()
        self.alpha = alpha  # Weight for cosine vs L2

    def forward(self, pred, target):
        cos_loss = 1 - F.cosine_similarity(pred, target, dim=1).mean()
        l2_loss = F.mse_loss(pred, target)
        return self.alpha * cos_loss + (1 - self.alpha) * l2_loss

# --- Load images and their stolen embeddings from disk ---
embedding_dir = "embedding_batches"
dataset = torch.load("ModelStealingPub.pt", weights_only=False)
all_imgs, all_embeddings = [], []

for file in sorted(os.listdir(embedding_dir)):
    with open(os.path.join(embedding_dir, file), "rb") as f:
        indices, embeddings = pickle.load(f)
        all_imgs.extend([dataset.imgs[i] for i in indices])  # Retrieve original images
        all_embeddings.extend(embeddings)  # Corresponding stolen embeddings

print(f"Loaded {len(all_imgs)} images and embeddings.")

# --- Data augmentation and normalization for training ---
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.RandomGrayscale(p=0.1),
    transforms.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

# --- Create DataLoader for training ---
train_dataset = DatasetWithTargets(all_imgs, all_embeddings, transform=train_transforms)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# --- Model, optimizer, and learning scheduler setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet18Encoder().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.3)
criterion = CosineL2Loss(alpha=0.5)  # Loss combining cosine and L2

# --- Training loop ---
model.train()
for epoch in range(40):
    total_loss = 0
    for imgs, targets in train_loader:
        imgs, targets = imgs.to(device), targets.to(device)
        optimizer.zero_grad()
        loss = criterion(model(imgs), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * imgs.size(0)
    scheduler.step()
    print(f"Epoch {epoch+1}/40, Loss: {total_loss / len(train_loader.dataset):.4f}")

# Exporting the Stolen Encoder to ONNX and Evaluating Its Accuracy

In [None]:
# --- Export to ONNX ---
dummy_input = torch.randn(1, 3, 32, 32).to(device)
onnx_path = "stolen_encoder.onnx"
torch.onnx.export(model, dummy_input, onnx_path,
    input_names=["x"], output_names=["output"],
    dynamic_axes={"x": {0: "batch_size"}, "output": {0: "batch_size"}},
    export_params=True, opset_version=11)

# --- ONNX Test ---
try:
    ort_session = ort.InferenceSession(onnx_path)
    dummy_np = np.random.randn(1, 3, 32, 32).astype(np.float32)
    onnx_out = ort_session.run(None, {"x": dummy_np})[0]
    print("ONNX model output shape:", onnx_out.shape)
except Exception as e:
    print("ONNX test failed:", e)

# --- Evaluate L2 Distance ---
val_transforms = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

val_dataset = DatasetWithTargets(all_imgs, all_embeddings, transform=val_transforms)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

model.eval()
with torch.no_grad():
    stolen_outputs = []
    for imgs, _ in val_loader:
        imgs = imgs.to(device)
        outputs = model(imgs).cpu().numpy()
        stolen_outputs.append(outputs)

    stolen_outputs = np.concatenate(stolen_outputs, axis=0)
    victim_outputs = np.array(all_embeddings)

    l2_distances = np.linalg.norm(victim_outputs - stolen_outputs, axis=1)
    print(f"\n Raw Average L2 distance: {l2_distances.mean():.4f}")
    print(f" Normalized L2 distance: {(l2_distances / np.linalg.norm(victim_outputs, axis=1)).mean():.4f}")

# Submitting the Model to the Server

In [None]:
# --- Submit the ONNX model to the server ---
headers = {
    "token": TOKEN,
    "seed": SEED,
}

try:
    with open(onnx_path, "rb") as f:
        response = requests.post(
            "http://34.122.51.94:9090/stealing",
            files={"file": f},
            headers=headers
        )
        print("Submission status code:", response.status_code)
        print("Server response:", response.text)
except Exception as e:
    print("Submission failed:", e)