# PePR: Membership Inference Attack (MIA)
Attack a single target model with a singe attack configuration.


## Prepare Environment
**Important: Restart the Runtime after this Cell!**
The restart is needed because of `pip install ./ml-pepr`.

In [None]:
!git clone https://github.com/hallojs/ml-pepr.git
%pip install ./ml-pepr
%pip install pylatex

## Imports

In [None]:
from pepr.privacy import mia

import tensorflow as tf
from tensorflow.keras import optimizers

import numpy as np
import logging

## Setup Logging

In [None]:
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s', '%Y-%m-%d %H:%M:%S')

# TensorFlow Logger
file_handler_tf = logging.FileHandler('tf.log')
file_handler_tf.setLevel(logging.INFO)
file_handler_tf.setFormatter(formatter)

tf.get_logger().setLevel(logging.INFO)
logger_tf = tf.get_logger()
logger_tf.addHandler(file_handler_tf)

# PePR Logger
file_handler_pr = logging.FileHandler('pepr.privacy.mia.log')
file_handler_pr.setLevel(logging.DEBUG)
file_handler_pr.setFormatter(formatter)

stream_handler_pr = logging.StreamHandler()
stream_handler_pr.setLevel(logging.DEBUG)
stream_handler_pr.setFormatter(formatter)

logger_pr = logging.getLogger('pepr.privacy.mia')
logger_pr.addHandler(file_handler_pr)
logger_pr.addHandler(stream_handler_pr)

## Functions
Functions for creating models and preparing the dataset.

In [None]:
def get_target_model(input_shape, number_of_labels):
    target_model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3,3), activation="tanh", padding='same', input_shape=input_shape),
        tf.keras.layers.MaxPool2D((2,2)),
        tf.keras.layers.Conv2D(64, (3,3), activation="tanh", padding='same'),
        tf.keras.layers.MaxPool2D((2,2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="tanh"),
        tf.keras.layers.Dense(number_of_labels),
        tf.keras.layers.Softmax()
    ])
    return target_model

def get_attack_model(number_of_labels):
    attack_model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation="relu", input_shape=(number_of_labels,)),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ])
    return attack_model

def create_compile_shadow_model():
    """Create compiled target/shadow model.

    Returns
    -------
    tensorflow.python.keras.engine.sequential.Sequential
        A compiled tensorflow model.
    """
    input_shape = (32, 32, 3)
    number_classes = 100

    model = get_target_model(input_shape, number_classes)

    optimizer = optimizers.Adam(lr=0.0001)
    loss = 'sparse_categorical_crossentropy'
    metrics = ["accuracy"]
    model.compile(optimizer, loss=loss, metrics=metrics)

    return model

def create_compile_attack_model():
    """Create compiled attack model.

    Returns
    -------
    tensorflow.python.keras.engine.sequential.Sequential
        A compiled tensorflow model.
    """
    number_classes = 100

    model = get_attack_model(number_classes)

    optimizer = optimizers.Adam(lr=0.0001)
    loss = 'binary_crossentropy'
    metrics = ["accuracy"]
    model.compile(optimizer, loss=loss, metrics=metrics)

    return model

def load_cifar100():
    """Loads and preprocesses the CIFAR100 dataset.

    Returns
    -------
    tuple
        (training data, training labels, test data, test labels)
    """
    train, test = tf.keras.datasets.cifar100.load_data()
    train_data, train_labels = train
    test_data, test_labels = test

    # Normalize the data to a range between 0 and 1
    train_data = np.array(train_data, dtype=np.float32) / 255
    test_data = np.array(test_data, dtype=np.float32) / 255

    # Reshape the images to (32, 32, 3)
    train_data = train_data.reshape(train_data.shape[0], 32, 32, 3)
    test_data = test_data.reshape(test_data.shape[0], 32, 32, 3)

    train_labels = np.reshape(np.array(train_labels, dtype=np.int32), (train_labels.shape[0],))
    test_labels = np.reshape(np.array(test_labels, dtype=np.int32), (test_labels.shape[0],))

    return np.vstack((train_data, test_data)), np.hstack((train_labels, test_labels))

## Init Dataset

In [None]:
data, labels = load_cifar100()

## Train Target Model

In [None]:
target_model = create_compile_shadow_model()
target_model.fit(data[40000:50000],
                 labels[40000:50000],
                 epochs=100,
                 batch_size=50,
                 verbose=0)
target_model.save('data/target_model')

## Run the Attack

In [None]:
attack_pars = {
    'number_classes': 100,
    'number_shadow_models': 100,
    'shadow_training_set_size': 2500,
    'create_compile_shadow_model': create_compile_shadow_model,
    'create_compile_attack_model': create_compile_attack_model,
    'shadow_epochs': 100,
    'shadow_batch_size': 50,
    'attack_epochs': 50,
    'attack_batch_size': 50,
}

# single target
data_conf = {
    'shadow_indices': list(range(40000)),
    'target_indices': list(range(40000, 50000)),
    'evaluation_indices': list(range(40000, 60000)),
    'record_indices_per_target': np.array([np.arange(10000)])
}

mia_attack = mia.Mia("MIA", attack_pars, data, labels, data_conf, [target_model])

mia_attack.run(save_path="data")

mia_attack.create_attack_report()

### Save Generated Data and Report
To download the `data` or `mia_report` directory from Google Colab, it needs to be compressed as a .zip file.

In [None]:
!zip -r -q data.zip data

In [None]:
!zip -r -q report.zip mia_report