Copyright 2021 Google LLC

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

# Setup (run first)

In [None]:
!pip install folktables
import numpy as np
import pandas as pd
from sklearn import neural_network, linear_model, cluster
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from collections import defaultdict
from scipy.special import logit, expit
from itertools import product
clip = lambda x: np.maximum(np.minimum(x, 1-1e-7), 1e-7)

# Uncorrelated synthetic data + standard attribute inference

In [None]:
all_res = defaultdict(list)
model_fns = {'lr': linear_model.LogisticRegression, 'nn': neural_network.MLPClassifier}
for (datasize, d, model_name, expid) in product([25, 100, 1000], [10, 100, 1000], model_fns, range(10)):
  if expid==0:
    print(datasize, d, model_name)
  np.random.seed(expid)
  x = -1 + 2*np.random.randint(0, 2, size=(datasize, d))
  y = np.random.randint(0, 2, size=datasize)
  model_fn = model_fns[model_name]
  target_col = 0

  def get_scores_basic(model, x, y):
    preds = []
    for i in range(2):
      x[:, target_col] = -1  + 2*i
      preds.append(model.predict_proba(x))
    preds = np.array(preds)
    return preds

  @ignore_warnings(category=ConvergenceWarning)
  def run_experiment(x, y, model_fn):
    ss_inds = np.random.choice(x.shape[0], x.shape[0]//2, replace=False)
    ss_mask = np.zeros(x.shape[0], dtype=np.bool)
    ss_mask[ss_inds] = True

    x_trn, y_trn = x[ss_mask], y[ss_mask]
    model = model_fn().fit(x_trn, y_trn)
    test_preds = get_scores_basic(model, x[~ss_mask].copy(), y[~ss_mask])
    test_preds = np.multiply(test_preds, np.eye(2)[y[~ss_mask]][None, :, :]).sum(axis=2)
    test_acc = ((-1+2*test_preds.argmax(0))==x[~ss_mask, target_col]).mean()

    train_preds = get_scores_basic(model, x[ss_mask].copy(), y[ss_mask])
    train_preds = np.multiply(train_preds, np.eye(2)[y[ss_mask]][None, :, :]).sum(axis=2)
    train_acc = ((-1+2*train_preds.argmax(0))==x[ss_mask, target_col]).mean()
    return train_acc, test_acc

  res_li = []
  for i in range(10):
    res_li.append(run_experiment(x, y, model_fn))
  all_res[datasize, d, model_name].append(np.mean(res_li, axis=0))

In [None]:
print("n, d, model")
for res in all_res:
  mean_res = np.mean(all_res[res], axis=0)[0]
  print(res, mean_res)

# Uncorrelated synthetic data + poisoning attribute inference

In [None]:
all_res = defaultdict(list)
model_fns = {'lr': linear_model.LogisticRegression, 'nn': neural_network.MLPClassifier}
for datasize, d, model_name in product([25, 100, 1000], [10, 100, 1000], model_fns):
  print(datasize, d, model_name)
  res_li = []
  for expid in range(20):
    np.random.seed(expid)
    x = -1 + 2*np.random.randint(0, 2, size=(datasize, d))
    y = np.random.randint(0, 2, size=datasize)
    model_fn = model_fns[model_name]
    target_col = 0

    def make_poison(x, y, repeats=1):
      pois_x, pois_y = x.copy(), y.copy()
      pois_x[:, target_col] = -1
      pois_y = 1-pois_y
      pois_x = np.concatenate([pois_x]*repeats)
      pois_y = np.concatenate([pois_y]*repeats)
      return pois_x, pois_y

    def get_scores_basic(model, x, y):
      preds = []
      for i in range(2):
        x[:, target_col] = -1  + 2*i
        preds.append(model.predict_proba(x))
      preds = np.array(preds)
      return preds

    @ignore_warnings(category=ConvergenceWarning)
    def run_experiment(x, y, model_fn):
      ss_inds = np.random.choice(x.shape[0], x.shape[0]//2, replace=False)
      pois_inds = ss_inds[:5]
      ss_mask = np.zeros(x.shape[0], dtype=np.bool)
      pois_mask = np.zeros(x.shape[0], dtype=np.bool)
      ss_mask[ss_inds] = True
      pois_mask[pois_inds] = True

      pois_x, pois_y = make_poison(x[pois_mask], y[pois_mask], repeats=2)
      x_trn = np.concatenate([x[ss_mask], pois_x])
      y_trn = np.concatenate([y[ss_mask], pois_y])
      model = model_fn().fit(x_trn, y_trn)
      test_preds = get_scores_basic(model, x[~ss_mask].copy(), y[~ss_mask])
      test_preds = np.multiply(test_preds, np.eye(2)[y[~ss_mask]][None, :, :]).sum(axis=2)
      test_acc = ((-1+2*test_preds.argmax(0))==x[~ss_mask, target_col]).mean()

      train_preds = get_scores_basic(model, x[ss_mask & ~pois_mask].copy(), y[ss_mask & ~pois_mask])
      train_preds = np.multiply(train_preds, np.eye(2)[y[ss_mask & ~pois_mask]][None, :, :]).sum(axis=2)
      train_acc = ((-1+2*train_preds.argmax(0))==x[ss_mask & ~pois_mask, target_col]).mean()

      poised_preds = get_scores_basic(model, x[pois_mask].copy(), y[pois_mask].copy())
      poised_preds = np.multiply(poised_preds, np.eye(2)[y[pois_mask]][None, :, :]).sum(axis=2)
      pois_preds = get_scores_basic(model, pois_x.copy(), pois_y)
      pois_preds = np.multiply(pois_preds, np.eye(2)[pois_y][None, :, :]).sum(axis=2)
      pois_preds = pois_preds[:, :poised_preds.shape[1]]
      #print(pois_preds - poised_preds)
      #print(poised_preds[1] - poised_preds[0])
      #print(pois_preds[1]-pois_preds[0])
      pred_diffs = poised_preds - pois_preds
      #print(pred_diffs[0] - pred_diffs[1])
      #print(poised_preds, pois_preds)
      #print(x[pois_mask, target_col])
      train_scores, train_targs = train_preds[1]-train_preds[0], x[ss_mask & ~pois_mask, target_col]
      pois_scores, pois_targs = pred_diffs[1]-pred_diffs[0], x[pois_mask, target_col]
      return train_acc, test_acc, pois_scores, pois_targs, train_scores, train_targs

    for i in range(20):
      res_li.append(run_experiment(x, y, model_fn)[2:])
  trn_scores = np.array([v[2] for v in res_li]).ravel()
  trn_targs = np.array([v[3] for v in res_li]).ravel()
  trn_acc = np.mean((trn_scores>np.median(trn_scores))==(trn_targs>0))

  scores = np.array([v[0] for v in res_li]).ravel()
  targs = np.array([v[1] for v in res_li]).ravel()
  pois_acc = np.mean((scores>np.median(scores))==(targs>0))
  all_res[datasize, d, model_name] = trn_acc, pois_acc
  print(datasize, d, model_name, trn_acc, pois_acc)
for key in all_res:
  print(key, all_res[key])

# Correlated synthetic data + poisoning attribute inference

In [None]:
all_res = defaultdict(list)
model_fns = {'lr': linear_model.LogisticRegression, 'nn': neural_network.MLPClassifier}
for datasize, d, model_name in product([25, 100, 1000], [10, 100, 1000], model_fns):
  print(datasize, d, model_name)
  res_li = []
  for expid in range(20):
    np.random.seed(expid)
    x = -1 + 2*np.random.randint(0, 2, size=(datasize, d))
    # add correlation
    w = np.zeros(d)
    w[:5] = 1
    preds = np.dot(x, w)
    preds = expit(preds)
    y = (np.random.random(size=preds.shape) < preds).astype(np.int)
    model_fn = model_fns[model_name]
    target_col = 0

    def make_poison(x, y, repeats=1):
      pois_x, pois_y = x.copy(), y.copy()
      pois_x[:, target_col] = -1
      pois_y = 1-pois_y
      pois_x = np.concatenate([pois_x]*repeats)
      pois_y = np.concatenate([pois_y]*repeats)
      return pois_x, pois_y

    def get_scores_basic(model, x, y):
      preds = []
      for i in range(2):
        x[:, target_col] = -1  + 2*i
        preds.append(model.predict_proba(x))
      preds = np.array(preds)
      return preds

    @ignore_warnings(category=ConvergenceWarning)
    def run_experiment(x, y, model_fn):
      ss_inds = np.random.choice(x.shape[0], x.shape[0]//2, replace=False)
      pois_inds = ss_inds[:5]
      ss_mask = np.zeros(x.shape[0], dtype=np.bool)
      pois_mask = np.zeros(x.shape[0], dtype=np.bool)
      ss_mask[ss_inds] = True
      pois_mask[pois_inds] = True

      pois_x, pois_y = make_poison(x[pois_mask], y[pois_mask], repeats=2)
      x_trn = np.concatenate([x[ss_mask], pois_x])
      y_trn = np.concatenate([y[ss_mask], pois_y])
      model = model_fn().fit(x_trn, y_trn)
      test_preds = get_scores_basic(model, x[~ss_mask].copy(), y[~ss_mask])
      test_preds = np.multiply(test_preds, np.eye(2)[y[~ss_mask]][None, :, :]).sum(axis=2)
      test_acc = ((-1+2*test_preds.argmax(0))==x[~ss_mask, target_col]).mean()

      train_preds = get_scores_basic(model, x[ss_mask & ~pois_mask].copy(), y[ss_mask & ~pois_mask])
      train_preds = np.multiply(train_preds, np.eye(2)[y[ss_mask & ~pois_mask]][None, :, :]).sum(axis=2)
      train_acc = ((-1+2*train_preds.argmax(0))==x[ss_mask & ~pois_mask, target_col]).mean()

      poised_preds = get_scores_basic(model, x[pois_mask].copy(), y[pois_mask].copy())
      poised_preds = np.multiply(poised_preds, np.eye(2)[y[pois_mask]][None, :, :]).sum(axis=2)
      pois_preds = get_scores_basic(model, pois_x.copy(), pois_y)
      pois_preds = np.multiply(pois_preds, np.eye(2)[pois_y][None, :, :]).sum(axis=2)
      pois_preds = pois_preds[:, :poised_preds.shape[1]]
      #print(pois_preds - poised_preds)
      #print(poised_preds[1] - poised_preds[0])
      #print(pois_preds[1]-pois_preds[0])
      pred_diffs = poised_preds - pois_preds
      #print(pred_diffs[0] - pred_diffs[1])
      #print(poised_preds, pois_preds)
      #print(x[pois_mask, target_col])
      train_scores, train_targs = train_preds[1]-train_preds[0], x[ss_mask & ~pois_mask, target_col]
      pois_scores, pois_targs = pred_diffs[1]-pred_diffs[0], x[pois_mask, target_col]
      return train_acc, test_acc, pois_scores, pois_targs, train_scores, train_targs

    for i in range(20):
      res_li.append(run_experiment(x, y, model_fn)[2:])
  trn_scores = np.array([v[2] for v in res_li]).ravel()
  trn_targs = np.array([v[3] for v in res_li]).ravel()
  trn_acc = np.mean((trn_scores>np.median(trn_scores))==(trn_targs>0))

  scores = np.array([v[0] for v in res_li]).ravel()
  targs = np.array([v[1] for v in res_li]).ravel()
  pois_acc = np.mean((scores>np.median(scores))==(targs>0))
  all_res[datasize, d, model_name] = trn_acc, pois_acc
  print(datasize, d, model_name, trn_acc, pois_acc)

In [None]:
# baseline accuracy
newd = 10
x = -1 + 2*np.random.randint(0, 2, size=(10000, newd))
# add correlation
w = np.zeros(newd)
w[:5] = 1
preds = np.dot(x, w)
preds = expit(preds)
y = (np.random.random(size=preds.shape) < preds)
print((y==(x[:, 0]>0)).mean())

# Adult dataset standard attribute inference

In [None]:
from folktables import ACSDataSource, ACSIncome

data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
ca_data = data_source.get_data(states=["CA"], download=True)
ca_features, ca_labels, _ = ACSIncome.df_to_numpy(ca_data)
def feature_fn(preproc_fea):
  all_feature_names = ['AGEP', 'COW', 'SCHL', 'MAR', 'OCCP', 'POBP', 'RELP', 'WKHP', 'SEX', 'RAC1P']
  to_categorical = ['COW', 'SCHL', 'MAR', 'RAC1P', 'SEX']
  to_remove = ['POBP', 'RELP', 'OCCP']
  cols = {}
  all_features = []
  for i, fea_name in enumerate(all_feature_names):
    fea_start = len(all_features)
    if fea_name in to_remove:
      continue
    elif fea_name in to_categorical:
      uniques = np.unique(preproc_fea[:, i], return_counts=True)
      for fea_value in uniques[0]:
        all_features.append(preproc_fea[:, i]==fea_value)
    else:
      all_features.append(preproc_fea[:, i])
      continue
    cols[fea_name] = (fea_start, len(all_features))
    # age, type of job, schooling level, marriage, occupation code, birth state, unk, hours worked, sex, race
    #print(['AGEP', 'COW', 'SCHL', 'MAR', 'OCCP', 'POBP', 'RELP', 'WKHP', 'SEX', 'RAC1P'][i])
    #print(np.unique(ca_features[:, i], return_counts=True))
  return np.array(all_features).T, cols

ca_postproc, cols = feature_fn(ca_features)
print(ca_postproc.shape)
print(cols)

# Plug-in your method for tabular datasets
model = linear_model.LogisticRegression()

# Train on CA data
model.fit(ca_postproc, ca_labels)

print(model.score(ca_postproc, ca_labels))

target_col = 'SEX'

start_i, end_i = cols[target_col]
target_col_vals = ca_postproc[:, start_i:end_i]
unique_vals, unique_counts = np.unique(target_col_vals, axis=0, return_counts=True)
min_count = unique_counts.min()
print(unique_vals, unique_counts)
balanced_fea, balanced_lab = [], []
for unique_val in unique_vals:
  match_inds = np.where(ca_postproc[:, start_i:end_i]==unique_val)[0]
  print(match_inds.shape)
  sampled_inds = np.random.choice(match_inds, min_count, replace=False)
  print(sampled_inds.shape)
  balanced_fea.append(ca_postproc[sampled_inds])
  balanced_lab.append(ca_labels[sampled_inds])
balanced_fea, balanced_lab = np.concatenate(balanced_fea), np.concatenate(balanced_lab)
print(balanced_fea.shape, balanced_lab.shape)
print(np.unique(balanced_fea[:, start_i:end_i], axis=0, return_counts=True))

def get_scores_basic_adult(model, x, y):
    new_x = x.copy()
    preds = []
    start_i, end_i = cols[target_col]
    for i in range(end_i-start_i):
      x[:, start_i:end_i] = np.eye(end_i-start_i)[None, i]
      preds.append(model.predict_proba(x))
    preds = np.array(preds)
    return preds

In [None]:
all_res = defaultdict(list)
model_fns = {'lr': linear_model.LogisticRegression, 'nn': neural_network.MLPClassifier}
for n, model_name in product([25, 100, 1000], model_fns):
  res_li = []
  for expid in range(400):
    np.random.seed(expid)
    x = balanced_fea
    y = balanced_lab.astype(np.int)
    model_fn = model_fns[model_name]
    start_i, end_i = cols[target_col]


    def make_poison(x, y, repeats=1):
      pois_x, pois_y = x.copy(), y.copy()
      start_i, end_i = cols[target_col]
      pois_x[:, start_i:end_i] = np.eye(end_i-start_i)[None, 0]
      pois_y = 1-pois_y
      pois_x = np.concatenate([pois_x]*repeats)
      pois_y = np.concatenate([pois_y]*repeats)
      return pois_x, pois_y

    @ignore_warnings(category=ConvergenceWarning)
    def run_experiment(x, y, model_fn):
      ss_inds = np.random.choice(x.shape[0], n, replace=False)
      pois_inds = ss_inds[:5]
      ss_mask = np.zeros(x.shape[0], dtype=np.bool)
      pois_mask = np.zeros(x.shape[0], dtype=np.bool)
      ss_mask[ss_inds] = True
      pois_mask[pois_inds] = True

      pois_x, pois_y = make_poison(x[pois_mask], y[pois_mask], repeats=1)
      x_trn = np.concatenate([x[ss_mask], pois_x])
      y_trn = np.concatenate([y[ss_mask], pois_y])
      model = model_fn().fit(x_trn, y_trn)
      test_preds = get_scores_basic_adult(model, x[~ss_mask].copy(), y[~ss_mask])
      test_preds = np.multiply(test_preds, np.eye(2)[y[~ss_mask]][None, :, :]).sum(axis=2)
      test_acc = ((test_preds.argmax(0))==x[~ss_mask, start_i:end_i].argmax(1)).mean()

      train_preds = get_scores_basic_adult(model, x[ss_mask & ~pois_mask].copy(), y[ss_mask & ~pois_mask])
      train_preds = np.multiply(train_preds, np.eye(2)[y[ss_mask & ~pois_mask]][None, :, :]).sum(axis=2)
      train_acc = ((train_preds.argmax(0))==x[ss_mask & ~pois_mask, start_i:end_i].argmax(1)).mean()

      poised_preds = get_scores_basic_adult(model, x[pois_mask].copy(), y[pois_mask].copy())
      poised_preds = np.multiply(poised_preds, np.eye(2)[y[pois_mask]][None, :, :]).sum(axis=2)
      pois_preds = get_scores_basic_adult(model, pois_x.copy(), pois_y)
      pois_preds = np.multiply(pois_preds, np.eye(2)[pois_y][None, :, :]).sum(axis=2)
      pois_preds = pois_preds[:, :poised_preds.shape[1]]
      #print(pois_preds)
      pred_diffs = poised_preds - pois_preds
      #print(x[pois_mask, start_i:end_i])
      train_scores1, train_scores0, train_targs = train_preds[1], train_preds[0], x[ss_mask & ~pois_mask, start_i:end_i].argmax(1)
      pois_scores1, pois_scores0, pois_targs = pois_preds[1], pois_preds[0], x[pois_mask, start_i:end_i].argmax(1)
      #print(pois_scores)
      return train_acc, test_acc, pois_scores1, pois_scores0, pois_targs, train_scores1, train_scores0, train_targs

    for i in range(1):
      res_li.append(run_experiment(x, y, model_fn)[2:])
  trn_scores1 = np.array([v[3] for v in res_li]).ravel()
  trn_scores0 = np.array([v[4] for v in res_li]).ravel()
  trn_targs = np.array([v[5] for v in res_li]).ravel()
  #print((trn_scores1 - np.mean(trn_scores1))[:5], (trn_scores0 - np.mean(trn_scores0))[:5], trn_targs[:5])
  trn_score_mat = np.array([trn_scores1, trn_scores0]).T
  first_half = trn_score_mat.shape[0]//2
  trn_lr = linear_model.LogisticRegression().fit(trn_score_mat[:first_half], trn_targs[:first_half])
  trn_acc = trn_lr.score(trn_score_mat[first_half:], trn_targs[first_half:])
  #print(trn_lr.intercept_, trn_lr.coef_, trn_lr.score(trn_score_mat, trn_targs))

  scores1 = np.array([v[0] for v in res_li]).ravel()
  scores0 = np.array([v[1] for v in res_li]).ravel()
  targs = np.array([v[2] for v in res_li]).ravel()
  score_mat = np.array([scores1, scores0]).T
  first_half = score_mat.shape[0]//2
  pois_lr = linear_model.LogisticRegression().fit(score_mat[:first_half], targs[:first_half])
  pois_acc = pois_lr.score(score_mat[first_half:], targs[first_half:])
  #print(pois_lr.intercept_, pois_lr.coef_, pois_lr.score(score_mat, targs))
  #pois_acc = np.mean((scores>np.median(scores))==(targs>0))
  all_res[n, model_name] = trn_acc, pois_acc
  print(n, model_name, trn_acc, pois_acc)

# Adult data canaries

In [None]:
from folktables import ACSDataSource, ACSIncome

data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
ca_data = data_source.get_data(states=["CA"], download=True)
ca_features, ca_labels, _ = ACSIncome.df_to_numpy(ca_data)
def feature_fn(preproc_fea):
  all_feature_names = ['AGEP', 'COW', 'SCHL', 'MAR', 'OCCP', 'POBP', 'RELP', 'WKHP', 'SEX', 'RAC1P']
  to_categorical = ['COW', 'SCHL', 'MAR', 'RAC1P', 'SEX']
  to_remove = ['POBP', 'RELP', 'OCCP']
  cols = {}
  all_features = []
  for i, fea_name in enumerate(all_feature_names):
    fea_start = len(all_features)
    if fea_name in to_remove:
      continue
    elif fea_name in to_categorical:
      uniques = np.unique(preproc_fea[:, i], return_counts=True)
      for fea_value in uniques[0]:
        all_features.append(preproc_fea[:, i]==fea_value)
    else:
      all_features.append(preproc_fea[:, i])
      continue
    cols[fea_name] = (fea_start, len(all_features))
    # age, type of job, schooling level, marriage, occupation code, birth state, unk, hours worked, sex, race
    #print(['AGEP', 'COW', 'SCHL', 'MAR', 'OCCP', 'POBP', 'RELP', 'WKHP', 'SEX', 'RAC1P'][i])
    #print(np.unique(ca_features[:, i], return_counts=True))
  return np.array(all_features).T, cols

ca_postproc, cols = feature_fn(ca_features)
print(ca_postproc.shape)
print(cols)

# Plug-in your method for tabular datasets
model = linear_model.LogisticRegression()

# Train on CA data
model.fit(ca_postproc, ca_labels)

print(model.score(ca_postproc, ca_labels))

target_col = 'SEX'

start_i, end_i = cols[target_col]
target_col_vals = ca_postproc[:, start_i:end_i]
unique_vals, unique_counts = np.unique(target_col_vals, axis=0, return_counts=True)
min_count = unique_counts.min()
print(unique_vals, unique_counts)
balanced_fea, balanced_lab = [], []
for unique_val in unique_vals:
  match_inds = np.where(ca_postproc[:, start_i:end_i]==unique_val)[0]
  print(match_inds.shape)
  sampled_inds = np.random.choice(match_inds, min_count, replace=False)
  print(sampled_inds.shape)
  balanced_fea.append(ca_postproc[sampled_inds])
  balanced_lab.append(ca_labels[sampled_inds])
balanced_fea, balanced_lab = np.concatenate(balanced_fea), np.concatenate(balanced_lab)
print(balanced_fea.shape, balanced_lab.shape)
print(np.unique(balanced_fea[:, start_i:end_i], axis=0, return_counts=True))

def get_scores_basic_adult(model, x, y):
    new_x = x.copy()
    preds = []
    start_i, end_i = cols[target_col]
    for i in range(end_i-start_i):
      x[:, start_i:end_i] = np.eye(end_i-start_i)[None, i]
      preds.append(model.predict_proba(x))
    preds = np.array(preds)
    return preds

In [None]:
all_res = defaultdict(list)
model_fns = {'lr': linear_model.LogisticRegression, 'nn': neural_network.MLPClassifier}
for n, model_name in product([25, 100, 1000], model_fns):
  res_li = []
  for expid in range(400):
    np.random.seed(expid)
    x = balanced_fea
    y = balanced_lab.astype(np.int)
    model_fn = model_fns[model_name]
    start_i, end_i = cols[target_col]


    def make_poison(x, y, repeats=1):
      pois_x, pois_y = x.copy(), y.copy()
      start_i, end_i = cols[target_col]
      pois_x[:, start_i:end_i] = np.eye(end_i-start_i)[None, 0]
      pois_y = 1-pois_y
      pois_x = np.concatenate([pois_x]*repeats)
      pois_y = np.concatenate([pois_y]*repeats)
      return pois_x, pois_y

    @ignore_warnings(category=ConvergenceWarning)
    def run_experiment(x, y, model_fn):
      ss_inds = np.random.choice(x.shape[0], n, replace=False)
      pois_inds = ss_inds[:5]
      ss_mask = np.zeros(x.shape[0], dtype=np.bool)
      pois_mask = np.zeros(x.shape[0], dtype=np.bool)
      ss_mask[ss_inds] = True
      pois_mask[pois_inds] = True

      pois_x, pois_y = make_poison(x[pois_mask], y[pois_mask], repeats=1)
      x_trn = np.concatenate([x[ss_mask], pois_x])
      y_trn = np.concatenate([y[ss_mask], pois_y])
      model = model_fn().fit(x_trn, y_trn)
      test_preds = get_scores_basic_adult(model, x[~ss_mask].copy(), y[~ss_mask])
      test_preds = np.multiply(test_preds, np.eye(2)[y[~ss_mask]][None, :, :]).sum(axis=2)
      test_acc = ((test_preds.argmax(0))==x[~ss_mask, start_i:end_i].argmax(1)).mean()

      train_preds = get_scores_basic_adult(model, x[ss_mask & ~pois_mask].copy(), y[ss_mask & ~pois_mask])
      train_preds = np.multiply(train_preds, np.eye(2)[y[ss_mask & ~pois_mask]][None, :, :]).sum(axis=2)
      train_acc = ((train_preds.argmax(0))==x[ss_mask & ~pois_mask, start_i:end_i].argmax(1)).mean()

      poised_preds = get_scores_basic_adult(model, x[pois_mask].copy(), y[pois_mask].copy())
      poised_preds = np.multiply(poised_preds, np.eye(2)[y[pois_mask]][None, :, :]).sum(axis=2)
      pois_preds = get_scores_basic_adult(model, pois_x.copy(), pois_y)
      pois_preds = np.multiply(pois_preds, np.eye(2)[pois_y][None, :, :]).sum(axis=2)
      pois_preds = pois_preds[:, :poised_preds.shape[1]]
      #print(pois_preds)
      pred_diffs = poised_preds - pois_preds
      #print(x[pois_mask, start_i:end_i])
      train_scores1, train_scores0, train_targs = train_preds[1], train_preds[0], x[ss_mask & ~pois_mask, start_i:end_i].argmax(1)
      pois_scores1, pois_scores0, pois_targs = pois_preds[1], pois_preds[0], x[pois_mask, start_i:end_i].argmax(1)
      #print(pois_scores)
      return train_acc, test_acc, pois_scores1, pois_scores0, pois_targs, train_scores1, train_scores0, train_targs

    for i in range(1):
      res_li.append(run_experiment(x, y, model_fn)[2:])
  trn_scores1 = np.array([v[3] for v in res_li]).ravel()
  trn_scores0 = np.array([v[4] for v in res_li]).ravel()
  trn_targs = np.array([v[5] for v in res_li]).ravel()
  #print((trn_scores1 - np.mean(trn_scores1))[:5], (trn_scores0 - np.mean(trn_scores0))[:5], trn_targs[:5])
  trn_score_mat = np.array([trn_scores1, trn_scores0]).T
  first_half = trn_score_mat.shape[0]//2
  trn_lr = linear_model.LogisticRegression().fit(trn_score_mat[:first_half], trn_targs[:first_half])
  trn_acc = trn_lr.score(trn_score_mat[first_half:], trn_targs[first_half:])
  #print(trn_lr.intercept_, trn_lr.coef_, trn_lr.score(trn_score_mat, trn_targs))

  scores1 = np.array([v[0] for v in res_li]).ravel()
  scores0 = np.array([v[1] for v in res_li]).ravel()
  targs = np.array([v[2] for v in res_li]).ravel()
  score_mat = np.array([scores1, scores0]).T
  first_half = score_mat.shape[0]//2
  pois_lr = linear_model.LogisticRegression().fit(score_mat[:first_half], targs[:first_half])
  pois_acc = pois_lr.score(score_mat[first_half:], targs[first_half:])
  #print(pois_lr.intercept_, pois_lr.coef_, pois_lr.score(score_mat, targs))
  #pois_acc = np.mean((scores>np.median(scores))==(targs>0))
  all_res[n, model_name] = trn_acc, pois_acc
  print(n, model_name, trn_acc, pois_acc)