In [1]:
from SafeTransformer import SafeTransformer
from sklearn.datasets import load_boston
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
import random
import matplotlib.pyplot as plt
from copy import deepcopy

In [2]:
def y_function(value, value2):
    if value2 > 3.5:
        return 0
    if value > -2 and value < 2:
        return 1
    if value < -4:
        return 1
    if value > 4:
        return 0
    else:
        return 0

In [3]:
n = 500

In [4]:
np.random.seed(123)
X = pd.DataFrame(data={'X': np.linspace(-5, 5, n),'X2': np.random.uniform(low=0, high=5, size=n)})
y = pd.Series(map(lambda value: y_function(value[0], value[1]), zip(X['X'], X['X2'])))
random.seed(123)
X['X'] = X['X'] + np.random.normal(size=n, scale=0.4)
X.head()

Unnamed: 0,X,X2
0,-5.196389,3.482346
1,-5.503626,1.430697
2,-4.963384,1.134257
3,-4.549155,2.756574
4,-5.620268,3.597345


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 123)

In [6]:
surrogate_model = XGBClassifier().fit(X_train, y_train)
surrogate_model_predictions = surrogate_model.predict(X_test)
accuracy_score(y_test, surrogate_model_predictions)

0.912

In [7]:
base_model = LogisticRegression(solver = 'lbfgs').fit(X_train, y_train)
base_predictions = base_model.predict(X_test)
print(accuracy_score(y_test, base_predictions))

0.744


In [8]:
pens = np.linspace(0.01, 10, 25)
best_score = float('-Inf')
best_pen = 0
aucs = []

for pen in pens:
    surrogate_model = XGBClassifier(random_state = 123)
    logistic_model_simple = LogisticRegression(solver='lbfgs')
    safe_transformer = SafeTransformer(surrogate_model, penalty = pen)
    pipe = Pipeline(steps=[('safe', safe_transformer), ('linear', logistic_model_simple)])
    pipe = pipe.fit(X_train, y_train)
    predictions = pipe.predict(X_test)
    auc = accuracy_score(y_test, predictions)
    aucs.append(auc)
    print([pen,auc])
    if auc > best_score:
        best_transformer = deepcopy(safe_transformer)
        best_model = deepcopy(logistic_model_simple)
        best_score = auc
        best_pen = pen

[0.01, 0.88]
[0.42625, 0.912]
[0.8425, 0.92]
[1.25875, 0.912]
[1.675, 0.912]
[2.0912499999999996, 0.912]
[2.5075, 0.912]
[2.92375, 0.912]
[3.34, 0.912]
[3.7562499999999996, 0.912]
[4.172499999999999, 0.912]
[4.58875, 0.912]
[5.005, 0.912]
[5.42125, 0.912]
[5.8375, 0.912]
[6.25375, 0.912]
[6.67, 0.912]
[7.08625, 0.912]
[7.5024999999999995, 0.912]
[7.91875, 0.912]
[8.334999999999999, 0.912]
[8.75125, 0.912]
[9.1675, 0.912]
[9.58375, 0.912]
[10.0, 0.912]
