In [4]:
!pip install tensorflow==2.12 tensorflow-privacy==0.7.3

Collecting tensorflow==2.12
  Downloading tensorflow-2.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting tensorflow-privacy==0.7.3
  Downloading tensorflow_privacy-0.7.3-py3-none-any.whl.metadata (609 bytes)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow==2.12)
  Downloading gast-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting keras<2.13,>=2.12.0 (from tensorflow==2.12)
  Downloading keras-2.12.0-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting numpy<1.24,>=1.22 (from tensorflow==2.12)
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 (from tensorflow==2.12)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting tensorboard<2.13,>=2.12 (from tensorflow==2.12)
  Downloading tensorboard-2.12.3-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow-estim

In [10]:
!pip install numpy==1.23.5 tensorflow==2.12.0 tensorflow-privacy==0.7.3 --quiet

In [11]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np


## Load Dataset

In [2]:
df = pd.read_csv("athletes_v2.csv")

df['gender'] = df['gender'].map({'Male': 0, 'Female': 1})
X = df[['age', 'weight', 'height', 'gender']].values
df['total_lift'] = df[['deadlift', 'candj', 'snatch', 'backsq']].sum(axis=1)
y = df['total_lift'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## DP Model

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [6]:
from sklearn.preprocessing import StandardScaler

scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).flatten()


In [8]:
batch_size = 32
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train_scaled)) \
    .shuffle(1000) \
    .batch(batch_size, drop_remainder=True)

In [9]:
print(np.any(np.isnan(X_train_scaled)), np.any(np.isnan(y_train_scaled)))


False False


In [13]:
learning_rate = 0.01
noise_multiplier = 1.1
l2_norm_clip = 1.0
batch_size = 32
epochs = 20

model = tf.keras.Sequential([
    tf.keras.Input(shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

optimizer = DPKerasSGDOptimizer(
    l2_norm_clip=1.0,
    noise_multiplier=1.1,
    num_microbatches=batch_size,
    learning_rate=0.01
)

loss_fn = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)

train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train_scaled)) \
    .shuffle(1000) \
    .batch(batch_size, drop_remainder=True)

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    for x_batch, y_batch in train_dataset:
        with tf.GradientTape() as tape:
            predictions = model(x_batch, training=True)
            loss = loss_fn(y_batch, predictions)
            loss = tf.reshape(loss, [-1])
            mean_loss = tf.reduce_mean(loss)

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer._was_dp_gradients_called = True  # avoid assertion error
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    print(f"Loss: {mean_loss.numpy():.4f}")



Epoch 1/20
Loss: 0.2453
Epoch 2/20
Loss: 0.2587
Epoch 3/20
Loss: 0.4268
Epoch 4/20
Loss: 0.5557
Epoch 5/20
Loss: 0.3226
Epoch 6/20
Loss: 0.3790
Epoch 7/20
Loss: 0.3216
Epoch 8/20
Loss: 0.3812
Epoch 9/20
Loss: 0.4263
Epoch 10/20
Loss: 0.3078
Epoch 11/20
Loss: 0.3610
Epoch 12/20
Loss: 0.2928
Epoch 13/20
Loss: 0.3415
Epoch 14/20
Loss: 0.3380
Epoch 15/20
Loss: 0.3720
Epoch 16/20
Loss: 0.3028
Epoch 17/20
Loss: 0.4541
Epoch 18/20
Loss: 0.2645
Epoch 19/20
Loss: 0.2460
Epoch 20/20
Loss: 0.2587


## Evaluate

In [20]:
preds = model.predict(X_test_scaled).flatten()
mae = mean_absolute_error(y_test_scaled, preds)
rmse = np.sqrt(mean_squared_error(y_test_scaled, preds))
r2 = r2_score(y_test_scaled, preds)

print(f"DP Model - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")


DP Model - MAE: 0.46, RMSE: 0.60, R²: 0.64


In [21]:
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy

# Parameters
num_examples = len(X_train_scaled)
batch_size = 32
noise_multiplier = 1.1
epochs = 20
delta = 1e-5

# Compute ε
epsilon, _ = compute_dp_sgd_privacy(
    n=num_examples,
    batch_size=batch_size,
    noise_multiplier=noise_multiplier,
    epochs=epochs,
    delta=delta
)

print(f"DP-SGD Privacy guarantee: ε = {epsilon:.2f} with δ = {delta}")


DP-SGD with sampling rate = 0.133% and noise_multiplier = 1.1 iterated over 15015 steps satisfies differential privacy with eps = 0.83 and delta = 1e-05.
The optimal RDP order is 15.0.
DP-SGD Privacy guarantee: ε = 0.83 with δ = 1e-05


 ε = 0.83 means the DP training process reveals very little about any individual in the dataset.