In [3]:
%load_ext autoreload
%autoreload 2
from sklearn import datasets
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from lark import Lark
import time 
from sklearn.preprocessing import MinMaxScaler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# prepare dataset

In [4]:
def sklearn_to_df(sklearn_dataset):
    """ 
    Convert sklearn dataset to a dataframe, the class-label is renamed to "target"
    """
    df = pd.DataFrame(sklearn_dataset.data, columns=sklearn_dataset.feature_names)
    df['target'] = pd.Series(sklearn_dataset.target)
    return df


target = "target"
dataset = datasets.load_iris()
dataset[target] = np.where(dataset[target]==2, 0, dataset[target])



# get df
data = sklearn_to_df(dataset)

# get X,y
X = data.drop(['target'], axis=1)
y = data['target']

scaler = MinMaxScaler()
X[X.columns] = scaler.fit_transform(X[X.columns])



X.columns = ['sepal length', 'sepal width', 'petal length', 'petal width']


# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) # 70% training and 30% test


X_train

Unnamed: 0,sepal length,sepal width,petal length,petal width
52,0.722222,0.458333,0.661017,0.583333
48,0.277778,0.708333,0.084746,0.041667
116,0.611111,0.416667,0.762712,0.708333
40,0.194444,0.625000,0.050847,0.083333
79,0.388889,0.250000,0.423729,0.375000
...,...,...,...,...
88,0.361111,0.416667,0.525424,0.500000
122,0.944444,0.333333,0.966102,0.791667
18,0.388889,0.750000,0.118644,0.083333
57,0.166667,0.166667,0.389831,0.375000


# train a black-box

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

clf_rf=RandomForestClassifier(n_estimators=100)
clf_mlp = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)

clf_rf.fit(X_train,y_train)
clf_mlp.fit(X_train,y_train)

y_pred=clf_mlp.predict(X_test)

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))


print(clf_rf.predict([[3, 5, 4, 2]]))
print(clf_mlp.predict([[3, 5, 4, 2]]))

Accuracy: 1.0
[0]
[0]


# create a random generator

In [6]:

import random
def random_generator(X):
    num_attributes = len(X.columns)
    x=[]
    for i in range(num_attributes):
        x.append(random.uniform(X[X.columns[i]].min(),X[X.columns[i]].max()))
    return x

for i in range(5):
    x = random_generator(X)
    print(x)
    print(clf_rf.predict([x]))
    print(clf_mlp.predict([x]))

[0.9732016535240183, 0.8082277456960519, 0.969532256501753, 0.12025199920529406]
[0]
[1]
[0.9848535135245551, 0.24106511385383222, 0.08084031599239716, 0.3758672091438924]
[0]
[1]
[0.1623359079550395, 0.24381385461971572, 0.9972108407224352, 0.6847181026245915]
[0]
[1]
[0.31899360857929326, 0.7716619341429445, 0.2030555130055769, 0.9973000177236226]
[0]
[0]
[0.6658975698676909, 0.22466363688295432, 0.7689939617338137, 0.23984531292860767]
[0]
[1]


# implementation of learner

In [7]:
from sygus_if import SyGuS_IF
# X = [
#     [1,2,1,1],
#     [-1.1,1,1,42]
# ]
# y = [
#     1,
#     0
# ]

In [8]:
# start_ = time.time()



# y_pred_test = sgf.predict_z3(X_test)
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred_test))
# y_pred_train = sgf.predict_z3(X_train)
# print("Accuracy:",metrics.accuracy_score(y_train, y_pred_train))
# print(y_pred_test)
# print(time.time() - start_)

In [9]:


# start_ = time.time()

# y_pred_test = sgf.predict(X_test, y_test)
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred_test))
# y_pred_train = sgf.predict(X_train, y_train)
# print("Accuracy:",metrics.accuracy_score(y_train, y_pred_train))

# print(time.time() - start_)

# implementation of the query

In [47]:
import query
import numpy as np
def predict_function_query(x):
    a = np.array([0.5, 0.5, 0.5, 0.5])
    dist = np.linalg.norm(a-np.array(x))
    if(dist < 0.5):
        return True
    else:
        return False

q = query.Query(model = None, prediction_function = predict_function_query)
q.classify_example([1, 0.5, 0.5, 0.55])

False

# implementation of black-box

In [48]:
from blackbox import BlackBox
bb = BlackBox(clf_mlp, clf_mlp.predict)
bb.classify_example([3, 5, 4, 2])

False

# implementation of teacher

In [59]:
from teacher import Teacher
from learner import Learner
sgf = SyGuS_IF()
l = Learner(model = sgf, prediction_function = sgf.predict_z3, train_function = sgf.fit, X = [], y=[] )

t = Teacher(max_iterations=3,epsilon=0.03, delta=0.03)
new_l, flag = t.teach(blackbox = bb, learner = l, query = q, random_example_generator = random_generator, params_generator = X)
print()
print("\nLearning complete")
print("Is learning complete?", flag)
print(new_l.model.synthesized_function)
print(new_l.y)

0%|          | 0/3 [00:00<?, ?it/s]True False False 0
100%|██████████| 3/3 [00:00<00:00,  3.80it/s]False True True 1
True False False 0
Total examples checked: 26


Learning complete
Is learning complete? False
(define-fun func ((x_0 Real) (x_1 Real) (x_2 Real) (x_3 Real)) Real (ite (>= x_2 (/ 1 2)) (ite (< x_1 (/ 1 2)) 0 0) 1))
[0, 1, 0]



In [56]:
start_ = time.time()

cnt = 0
for example in X.values.tolist():

    blackbox_verdict = bb.classify_example(example)
    learner_verdict = new_l.classify_example(example)
    query_verdict = q.classify_example(example)

    if(learner_verdict == (blackbox_verdict and query_verdict)):
        cnt += 1
print("correct: ", cnt, "out of ", len(y), "examples. Percentage: ", cnt/len(y))
print(time.time() - start_)

correct:  119 out of  150 examples. Percentage:  0.7933333333333333
3.652432680130005
