In [15]:
!pip install torch torchvision opacus




In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from opacus import PrivacyEngine


In [None]:
import tensorflow_privacy

from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy


In [4]:
# Load v2 data
v2 = pd.read_csv("v2.csv")

# Clean
v2_model = v2.dropna(subset=[
    "weight", "height", "age", "gender", "howlong",
    "deadlift", "candj", "snatch", "backsq"
])

v2_model["total_lift"] = v2_model[["deadlift", "candj", "snatch", "backsq"]].sum(axis=1)


In [6]:
# Features: weight, height, age
X = v2_model[["weight", "height", "age"]].values
y = v2_model["total_lift"].values

# Standardize
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.reshape(-1,1)).ravel()

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)


In [8]:
# Convert to tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train.reshape(-1,1), dtype=torch.float32)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test.reshape(-1,1), dtype=torch.float32)

train_ds = TensorDataset(X_train_torch, y_train_torch)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)



In [9]:
# Baseline Model (non-DP)
model = nn.Sequential(
    nn.Linear(X_train.shape[1], 32),
    nn.ReLU(),
    nn.Linear(32, 1)
)

optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

# Train baseline
model.train()
for epoch in range(5):
    for Xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(Xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

# Evaluate baseline
model.eval()
with torch.no_grad():
    preds = model(X_test_torch).numpy().flatten()

mse_non_dp = mean_squared_error(y_test, preds)
r2_non_dp = r2_score(y_test, preds)

print("Baseline Non-DP Model:")
print(f"MSE: {mse_non_dp:.2f}")
print(f"R2: {r2_non_dp:.2f}")


Baseline Non-DP Model:
MSE: 0.49
R2: 0.51


In [10]:
# DP Model
model_dp = nn.Sequential(
    nn.Linear(X_train.shape[1], 32),
    nn.ReLU(),
    nn.Linear(32, 1)
)

optimizer_dp = optim.SGD(model_dp.parameters(), lr=0.01)
privacy_engine = PrivacyEngine()

model_dp, optimizer_dp, train_loader_dp = privacy_engine.make_private(
    module=model_dp,
    optimizer=optimizer_dp,
    data_loader=train_loader,
    noise_multiplier=1.1,
    max_grad_norm=1.0,
)

# Train DP
model_dp.train()
for epoch in range(5):
    for Xb, yb in train_loader_dp:
        optimizer_dp.zero_grad()
        preds = model_dp(Xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer_dp.step()




In [14]:
# Evaluate DP
model_dp.eval()
with torch.no_grad():
    preds_dp = model_dp(X_test_torch).numpy().flatten()

mse_dp = mean_squared_error(y_test, preds_dp)
r2_dp = r2_score(y_test, preds_dp)

print("DP Model:")
print(f"MSE: {mse_dp:.2f}")
print(f"R2: {r2_dp:.2f}")

epsilon = privacy_engine.accountant.get_epsilon(delta=1e-5)
print(f"DP epsilon: {epsilon:.2f}, delta=1e-5")

DP Model:
MSE: 0.52
R2: 0.48
DP epsilon: 0.34, delta=1e-5
