In [46]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [73]:
#Black-Box Jacobian Augmentation

class SubstituteModel: 
    def __init__(self, victim, x_init, model_init):
        self.victim = victim
        self.x = x_init
        self.model = model_init

    def getPrediction(self, x):
        return self.victim.predict(x)
    
    def train(self, iteration, alpha):
        x = self.x
        y = self.getPrediction(x)
        model = self.model
        model.fit(x, y)

        for augment_step in range(iteration):
            
            print('iteration: ' + str(augment_step + 1))

            #create synthetic sample
            x_prob = model.predict_proba(x)[:, 1]
            theta = model.coef_

            # (h(x)-y)*theta/m
            gradient_wrt_x = (np.sum(x_prob - y) * theta)/y.shape[0]
            x_adv = x + gradient_wrt_x * alpha
            x = np.concatenate((x, x_adv), axis=0)
            y = self.getPrediction(x)
            model.fit(x, y)
        
        self.model = model

In [88]:
#Run

iris = datasets.load_iris()
x = iris.data
y = (iris.target != 0) * 1
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.2, random_state=0)

victim = LogisticRegression().fit(xtrain, ytrain)

np.random.seed(0)   
idx = np.random.randint(150, size=3)
x_init = x[idx,:]

model_init = LogisticRegression()

submodel = SubstituteModel(victim, x_init, model_init)
submodel.train(iteration=5, alpha=0.5)

victim_pred = victim.predict(x)
submodel_pred = submodel.model.predict(x)
submodel_accuracy = accuracy_score(victim_pred, submodel_pred)
print(submodel_accuracy)

iteration: 1
iteration: 2
iteration: 3
iteration: 4
iteration: 5
1.0


In [None]:
#Test

iris = datasets.load_iris()
x = iris.data
y = iris.target
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.2, random_state=0)

model = LogisticRegression(multi_class="ovr").fit(xtrain, ytrain)
theta = model.coef_

xpred = model.predict(xtrain)
print("model's training accuracy: ", accuracy_score(xpred, ytrain))
xpred = model.predict(xtest)
print("model's testing accuracy: ", accuracy_score(xpred, ytest))

In [45]:
 #create adversary exmaple for one class (k=2)

 np.random.seed(0)   
 idx = np.random.randint(150, size=10)
 xadv = x[idx,:]
 yadv = y[idx]
 ytrue = []
 alpha = 1000

 adv_pred = model.predict(xadv)
 adv_prob = model.predict_proba(xadv)

 for i in yadv:
     ytrue.append(int(i==2))

 # cal for sum(h-y)
 h_minus_y = adv_prob[:, 2] - ytrue
 sumhy = 0
 for i in h_minus_y:
     sumhy += i

 gradient_wrt_x = (theta[2] * sumhy)/y.shape[0]
 xadvs = xadv + gradient_wrt_x * alpha
 adv_preds = model.predict(xadvs)

 print(xadv)
 print(gradient_wrt_x * alpha)
 print(xadvs)
 print(adv_pred)
 print(adv_preds)

[[4.6 3.2 1.4 0.2]
 [7.7 3.8 6.7 2.2]
 [5.8 2.7 4.1 1. ]
 [6.3 2.9 5.6 1.8]
 [4.9 3.1 1.5 0.1]
 [5.1 3.7 1.5 0.4]
 [5.5 3.5 1.3 0.2]
 [6.3 2.3 4.4 1.3]
 [5.9 3.2 4.8 1.8]
 [5.6 3.  4.1 1.3]]
[-0.91087191 -1.77775084 11.92459587  9.26554611]
[[ 3.68912809  1.42224916 13.32459587  9.46554611]
 [ 6.78912809  2.02224916 18.62459587 11.46554611]
 [ 4.88912809  0.92224916 16.02459587 10.26554611]
 [ 5.38912809  1.12224916 17.52459587 11.06554611]
 [ 3.98912809  1.32224916 13.42459587  9.36554611]
 [ 4.18912809  1.92224916 13.42459587  9.66554611]
 [ 4.58912809  1.72224916 13.22459587  9.46554611]
 [ 5.38912809  0.52224916 16.32459587 10.56554611]
 [ 4.98912809  1.42224916 16.72459587 11.06554611]
 [ 4.68912809  1.22224916 16.02459587 10.56554611]]
[0 2 1 2 0 0 0 1 2 1]
[2 2 2 2 2 2 2 2 2 2]
