In [None]:
%run 2-population-attack.ipynb
verbose_population_diffpriv = True

## Protect against leakage

Now that we know how much that we're leaking privacy-wise, we can apply the `Opacus / DiffPrivLib` library
to our model and see how the ROC curve flattens.

In [None]:
from diffprivlib.models import RandomForestClassifier as dp_RFC

def train_model_dp(X_train, y_train, seed=42):
    rfc_dp = dp_RFC(
        n_estimators=100,
        max_depth=12,
        random_state=42,
        epsilon=np.inf,
        bounds=(np.min(X_train, axis=0), np.max(X_train, axis=0)),
        classes=np.unique(y_train),
    )
    return rfc_dp.fit(X_train, y_train)

target_model_dp = train_model_dp(X_train, y_train)
# The classifier's accuracy vs. random baseline. We are doing a bit better than the baseline.
print(f"Baseline: {max(y_test.mean(), 1 - y_test.mean()):0.2f}")
print(f"Our test-score: {target_model.score(X_test, y_test):0.2f}" )

In [None]:
# Extract the features for the membership inference attack.
logits_train_dp = logits(target_model_dp, X_train, y_train)
logits_test_dp = logits(target_model_dp, X_test, y_test)

if verbose_population_diffpriv:
    plot_rfc_auroc(y_test, target_model_dp.predict_proba(X=X_test)[:,1], "ROC of classifier using DP")
    # Visualize the features. If it is possible to tell train data from test data, then
    # our model is vulnerable to membership inference.
    visualize_vals(logits_train_dp, logits_test_dp)

In [None]:
if verbose_population_diffpriv:
    membership_labels = np.concatenate([[1] * len(logits_train_dp), [0] * len(logits_test_dp)])
    plot_rfc_auroc(membership_labels, np.concatenate([logits_train_dp, logits_test_dp]),
                  "ROC of Population attack metric")