In [None]:
import numpy as np

def targeted_class_label_flip(X, y, target_class=0, flip_to_class=1, flip_fraction=0.5, seed=42):
    """
    Flips a fraction of labels from the target_class to the flip_to_class.
    This simulates a targeted label flipping attack.
    """
    np.random.seed(seed)

    # Find indices of the target class
    target_indices = np.where(y == target_class)[0]

    # Determine number of samples to flip
    n_to_flip = int(len(target_indices) * flip_fraction)

    # Randomly choose which target_class samples to flip
    flip_indices = np.random.choice(target_indices, n_to_flip, replace=False)

    # Copy labels to avoid modifying original
    y_poisoned = np.copy(y)
    y_poisoned[flip_indices] = flip_to_class

    return X, y_poisoned


In [None]:
import numpy as np
import json
import requests
from sklearn.linear_model import LogisticRegression
import os

dataset_filename = "label_flipping_dataset.npz"
random_seed = 1337  # Seed for reproducibility in attack & model training
np.random.seed(random_seed)  # Apply seed globally if needed, or pass to functions

# >>> IMPORTANT: SET THIS VARIABLE TO YOUR SPAWNED INSTANCE IP AND PORT<<<
evaluator_base_url = "http://94.237.54.192:49383"  # CHANGE THIS
# Example: evaluator_base_url = "http://10.10.10.1:5555"

# Attack Configuration
TARGET_CLASS_TO_POISON = 0  # We want to make the model bad at identifying Class 0
NEW_LABEL_FOR_POISONED = 1  # We want it to predict Class 1 instead
POISON_FRACTION = 0.7

# Load Data
print(f"Loading data from: {dataset_filename}")
try:
    data = np.load(dataset_filename)
    X_train = data["Xtr"]
    y_train = data["ytr"]
    X_test = data["Xte"]
    y_test = data["yte"]
    print("Data loaded successfully.")
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    unique_classes_train = np.unique(y_train)
    print(f"Unique classes in training data: {unique_classes_train}")
    if (
        TARGET_CLASS_TO_POISON not in unique_classes_train
        or NEW_LABEL_FOR_POISONED not in unique_classes_train
    ):
        print("Warning: Target or new label class not found in training data.")
    data.close()
except FileNotFoundError:
    print(f"Error: Dataset file '{dataset_filename}' not found.")
    raise
except KeyError as e:
    print(f"Error: Could not find expected array key '{e}' in the .npz file.")
    raise
except Exception as e:
    print(f"An unexpected error occurred during data loading: {e}")
    raise

In [None]:
def targeted_class_label_flip(y, target_class=0, new_label=1, poison_fraction=0.7, seed=1337):
    np.random.seed(seed)
    target_indices = np.where(y == target_class)[0]
    n_to_flip = int(len(target_indices) * poison_fraction)
    flip_indices = np.random.choice(target_indices, n_to_flip, replace=False)
    y_poisoned = np.copy(y)
    y_poisoned[flip_indices] = new_label
    return y_poisoned, flip_indices

y_train_poisoned, flipped_idx = targeted_class_label_flip(
    y_train,
    target_class=TARGET_CLASS_TO_POISON,
    new_label=NEW_LABEL_FOR_POISONED,
    poison_fraction=POISON_FRACTION,
    seed=random_seed,
)

print(f"✅ Flipped {len(flipped_idx)} labels in training data.")


In [None]:
# %%
random_seed = 1337  # Make sure this is defined

print("\n--- Training Model on Poisoned Labels ---")
model = LogisticRegression(solver="lbfgs", max_iter=1000, random_state=random_seed)

try:
    model.fit(X_train, y_train_poisoned)
    print("Logistic Regression model trained successfully.")
except Exception as e:
    print(f"Error during model training: {e}")
    raise


In [None]:
# Train a simple Logistic Regression model
model = LogisticRegression(solver="lbfgs", max_iter=1000, random_state=random_seed)
model.fit(X_train, y_train_poisoned)


In [None]:
import numpy as np

dataset_filename = "label_flipping_dataset.npz"
data = np.load(dataset_filename)
X_train = data["Xtr"]
y_train = data["ytr"]
X_test = data["Xte"]
y_test = data["yte"]
data.close()


In [None]:
def targeted_class_label_flip(y, target_class=0, new_label=1, poison_fraction=0.7, seed=1337):
    np.random.seed(seed)
    target_indices = np.where(y == target_class)[0]
    n_to_flip = int(len(target_indices) * poison_fraction)
    flip_indices = np.random.choice(target_indices, n_to_flip, replace=False)
    y_poisoned = np.copy(y)
    y_poisoned[flip_indices] = new_label
    return y_poisoned, flip_indices


In [None]:
y_train_poisoned, flipped_idx = targeted_class_label_flip(
    y_train,
    target_class=0,
    new_label=1,
    poison_fraction=0.7,
    seed=1337,
)


In [None]:
from sklearn.linear_model import LogisticRegression

random_seed = 1337

print("\n--- Training Model on Poisoned Labels ---")
model = LogisticRegression(solver="lbfgs", max_iter=1000, random_state=random_seed)

try:
    model.fit(X_train, y_train_poisoned)
    print("Logistic Regression model trained successfully.")
except Exception as e:
    print(f"Error during model training: {e}")
    raise


In [None]:
print("\n--- Extracting Model Parameters ---")
try:
    weights = model.coef_
    intercept = model.intercept_
    print(f"Extracted weights shape: {weights.shape}")
    print(f"Extracted intercept shape: {intercept.shape}")
    weights_list = weights.tolist()
    intercept_list = intercept.tolist()
    parameters_extracted = True
except Exception as e:
    print(f"An unexpected error occurred during parameter extraction: {e}")
    weights_list = None
    intercept_list = None
    parameters_extracted = False

In [None]:
health_check_url = f"{evaluator_base_url}/health"
print(f"Checking evaluator health at: {health_check_url}")
if "<EVALUATOR_IP>" in evaluator_base_url:
    print("\n--- WARNING ---")
    print(
        "Please update the 'evaluator_base_url' variable with the correct IP and Port before running!"
    )
    print("-------------")
else:
    try:
        response = requests.get(health_check_url, timeout=10)
        response.raise_for_status()
        health_status = response.json()
        print("\n--- Health Check Response ---")
        print(f"Status: {health_status.get('status', 'N/A')}")
        print(f"Message: {health_status.get('message', 'No message received.')}")
        if health_status.get("status") != "healthy":
            print(
                "\nWarning: Evaluator service reported an unhealthy status. It might still be starting up or encountered an issue (like loading data)."
            )
    except requests.exceptions.ConnectionError as e:
        print(f"\nConnection Error: Could not connect to {health_check_url}.")
        print("Please check:")
        print("  1. The evaluator URL (IP address and port) is correct.")
        print("  2. The evaluator Docker container is running.")
        print(
            "  3. There are no network issues (firewalls, etc.) blocking the connection."
        )
    except requests.exceptions.Timeout:
        print(f"\nTimeout Error: The request to {health_check_url} timed out.")
        print(
            "The server might be taking too long to respond or there could be network issues."
        )
    except requests.exceptions.RequestException as e:
        print(f"\nError during health check request: {e}")
        print("Check the URL format and ensure the server is running.")
    except json.JSONDecodeError:
        print("\nError: Could not decode JSON response from health check.")
        print("The server might have sent an invalid response.")
        print(
            f"Raw response status: {response.status_code}, Raw response text: {response.text}"
        )
    except Exception as e:
        print(f"\nAn unexpected error occurred during health check: {e}")

In [None]:
evaluator_url = f"{evaluator_base_url}/evaluate_targeted"
print(f"\nAttempting submission to: {evaluator_url}")

if not parameters_extracted:
    print("Error: Cannot submit - parameters not extracted.")
elif "<EVALUATOR_IP>" in evaluator_base_url or "<PORT>" in evaluator_base_url:
    print("\n--- WARNING: Update evaluator_base_url ---")
else:
    payload = {"coef": weights_list, "intercept": intercept_list}
    print(f"Payload preview: {json.dumps(payload, indent=2)}")

    try:
        response = requests.post(evaluator_url, json=payload, timeout=30)
        response.raise_for_status()
        result = response.json()

        print("\n--- Evaluator Response ---")
        if result.get("success"):
            print(f"{'=' * 10} Attack Successful! {'=' * 10}")
            oa_str = (
                f"{result.get('overall_accuracy', 'N/A'):.4f}"
                if isinstance(result.get("overall_accuracy"), (int, float))
                else "N/A"
            )
            c0a_str = (
                f"{result.get('class0_accuracy', 'N/A'):.4f}"
                if isinstance(result.get("class0_accuracy"), (int, float))
                else "N/A"
            )  # Get Class 0 Accuracy
            print(f"Overall Accuracy evaluated: {oa_str}")
            print(f"Accuracy on Class 0 samples: {c0a_str}")  # Display Class 0 Accuracy
            print(f"Message: {result.get('message', 'N/A')}")
            print(f"\nFLAG: {result.get('flag')}")
            print(f"{'=' * 38}")
        else:
            print("Evaluation Failed.")
            oa_val = result.get("overall_accuracy")
            c0a_val = result.get("class0_accuracy")  # Get Class 0 Accuracy
            oa_str = f"{oa_val:.4f}" if oa_val is not None else "N/A"
            c0a_str = (
                f"{c0a_val:.4f}" if c0a_val is not None else "N/A"
            )  # Get Class 0 Accuracy

            print(f"Overall Accuracy evaluated: {oa_str}")
            print(f"Accuracy on Class 0 samples: {c0a_str}")  # Display Class 0 Accuracy
            print(f"Message: {result.get('message', 'No message provided.')}")
            print(
                "\nHints: Did the attack significantly reduce accuracy specifically for Class 0 samples?"
            )
            print("Did the overall accuracy remain above the required threshold?")
            print("Consider adjusting the POISON_FRACTION.")

    except requests.exceptions.ConnectionError:
        print(f"\nConnection Error: Could not connect to {evaluator_url}.")
    except requests.exceptions.Timeout:
        print(f"\nTimeout Error: Request to {evaluator_url} timed out.")
    except requests.exceptions.RequestException as e:
        print(f"\nError during submission request: {e}")
        if e.response is not None:
            print(f"Server Response Status Code: {e.response.status_code}")
            
            try:
                print(f"Server Response Body: {e.response.json()}")
            except json.JSONDecodeError:
                print(f"Server Response Body (non-JSON): {e.response.text}")
    except Exception as e:
        print(f"\nAn unexpected error occurred during submission: {e}")

# Targeted Label Flipping Attack

This notebook demonstrates a targeted label flipping attack against a logistic regression model.

## Overview

Label flipping is a type of data poisoning attack where an adversary intentionally modifies the labels of a subset of training data to compromise the integrity of a machine learning model. In a *targeted* attack, the goal is to cause the model to misclassify specific instances or classes, rather than simply degrading overall performance.

This project focuses on a targeted attack where the goal is to make a logistic regression model perform poorly on samples belonging to a specific target class (Class 0 in this case) by flipping a fraction of their labels to another class (Class 1).

## Dataset

The project uses a synthetic dataset loaded from `label_flipping_dataset.npz`. This dataset contains features (`X_train`, `X_test`) and corresponding labels (`y_train`, `y_test`). The dataset is designed to be simple enough to demonstrate the attack effectively.

## Attack Methodology

1.  **Data Loading:** Load the training and testing data.
2.  **Targeted Label Flipping:** Implement a function (`targeted_class_label_flip`) to select a fraction of samples from the `target_class` and change their labels to the `new_label`.
3.  **Model Training:** Train a logistic regression model on the poisoned training data (`X_train`, `y_train_poisoned`).
4.  **Evaluation:** Evaluate the performance of the trained model, specifically focusing on:
    *   Overall accuracy.
    *   Accuracy on the `target_class` samples.
5.  **Submission:** Submit the trained model's parameters (weights and intercept) to an external evaluator service to verify the attack's success against predefined criteria (e.g., significantly reduced accuracy on the target class while maintaining a minimum overall accuracy).

## Code Structure

*   `targeted_class_label_flip(y, target_class, new_label, poison_fraction, seed)`: Function to perform the label flipping.
*   Data loading and splitting code.
*   Model training code using `sklearn.linear_model.LogisticRegression`.
*   Code to extract model parameters.
*   Code to interact with an external evaluator service via HTTP requests.

## Running the Notebook

1.  Ensure the `label_flipping_dataset.npz` file is accessible.
2.  Update the `evaluator_base_url` variable with the correct IP and port of the running evaluator service.
3.  Run the cells sequentially.

The output will show the training process, parameter extraction, health check of the evaluator, and the final evaluation result, including whether the targeted attack was successful based on the evaluator's criteria.

## Requirements

*   Python 3.x
*   numpy
*   scikit-learn
*   requests

(These dependencies are typically available in a Colab environment).