In [1]:
import pandas as pd
import requests
import numpy as np
import spacy
import art.config
import os
art.config.ART_NUMPY_DTYPE = 'str' # override dtype to str instead of float

from art.estimators.classification import BlackBoxClassifier
from art.attacks.extraction import KnockoffNets
from wex_clf import OnewexClassifier
from spacy_clf import SpacyClassifier
from sklearn.metrics import precision_recall_fscore_support

# hide ssl errors
import warnings
warnings.filterwarnings("ignore")

In [2]:
target_class_dict = {
    'fake-news': ['False', 'True'],
    'spam': ['spam', 'ham'],
    'hate-speech': ['Offensive_Language', 'Hate_Speech', 'Neither']    
}

collection_dict = {
    'fake-news': 'e54f9c0d-5b66-4c10-0000-01737d592c53',
    'spam': 'e54f9c0d-5b66-4c10-0000-01738025f3a2',
    'hate-speech': 'e54f9c0d-5b66-4c10-0000-0173802bad98'
}

def eval_clf(art_clf, texts, labels, use_case):
    preds = art_clf.predict(texts)
    if use_case == 'fake-news':
        # boolean indicators
        return precision_recall_fscore_support(labels.astype(int), np.argmax(preds, axis=1), average='macro')
    
    # string indicators
    target_labels = target_class_dict.get(use_case)
    pred_labels = [target_labels[x] for x in np.argmax(preds,axis=1)]
    return precision_recall_fscore_support(labels, pred_labels, average='macro')

session = requests.Session()
session.auth = ('admin', 'admin') # dummy credentials

for setting in ['random', 'adaptive']:
    print("Evaluating setting", setting)
    for use_case in ['fake-news', 'spam', 'hate-speech']:
        print("Evaluating use case", use_case)

        print("Loading data..")
        df = pd.read_csv(f'res/{use_case}/train.csv')
        texts = df['text'].to_numpy()
        labels = df['target'].to_numpy()
        action_ids = np.array([target_class_dict.get(use_case).index(str(x)) for x in labels]) # required for knockoff action sampling

        df_train_eval = df.sample(n=2000, random_state=212132)
        train_eval_texts = df_train_eval['text'].to_numpy()
        train_labels = df_train_eval['target'].to_numpy()

        df_test = pd.read_csv(f'res/{use_case}/test.csv')
        test_eval_texts = df['text'].to_numpy()
        test_eval_labels = df['target'].to_numpy()

        print("Loading Victim model..")
        blackbox_classifier = OnewexClassifier(
            prediction_url=f'https://localhost/api/v1/collections/{collection_dict.get(use_case)}/analyze',
            target_classes=target_class_dict.get(use_case),
            web_session=session
        )

        if os.path.exists(f'eval/{use_case}/wex_{setting}_res.csv'):
            df_stats = pd.read_csv(f'eval/{use_case}/wex_{setting}_res.csv', index_col=0)
            print("Skipping already performed baseline eval")
        else:
            print("Calculating performance baselines with blackbox..")
            p,r,f,_ = eval_clf(blackbox_classifier, train_eval_texts, train_labels, use_case)
            df_stats = pd.DataFrame(data=np.array([p,r,f,'train']).reshape((1,4)), columns=['precision', 'recall', 'fscore', 'set'], index=['baseline_r'])
            print("Train", (p,r,f))
            p,r,f,_ = eval_clf(blackbox_classifier, test_eval_texts, test_eval_labels, use_case)
            df_stats = df_stats.append(pd.DataFrame(data=np.array([p,r,f,'test']).reshape((1,4)), columns=['precision', 'recall', 'fscore', 'set'], index=['baseline_e']))
            print("Test", (p,r,f))


        print("Evaluating random strategy..")
        for nb_stolen in [1,10,100,250,500,1000,2500,5000,10000,25000,50000]:
            if f'q_{nb_stolen}_e' in df_stats.index.tolist():
                print(f"Already evaluated {nb_stolen} queries - skipping")
                if nb_stolen > texts.shape[0]:
                    # fully evaluated
                    break
                continue

            print(f"Training with {nb_stolen} queries to black box")
            knockoff = KnockoffNets(classifier = blackbox_classifier, batch_size_fit=32, batch_size_query=32, nb_stolen=nb_stolen, sampling_strategy=setting)
            np.random.seed(23435432)
            thieved_classifier = SpacyClassifier(model = spacy.load("en_core_web_sm"), target_classes=target_class_dict.get(use_case))
            knockoff.extract(x = texts, y=action_ids, thieved_classifier=thieved_classifier)

            print("Evaluating on train/test set")
            p,r,f,_ = eval_clf(thieved_classifier, train_eval_texts, train_labels, use_case)
            df_stats = df_stats.append(pd.DataFrame(data=np.array([p,r,f,'train']).reshape((1,4)), columns=['precision', 'recall', 'fscore', 'set'], index=[f'q_{nb_stolen}_r']))
            print("Train", (p,r,f))
            p,r,f,_ = eval_clf(thieved_classifier, test_eval_texts, test_eval_labels, use_case)
            df_stats = df_stats.append(pd.DataFrame(data=np.array([p,r,f,'test']).reshape((1,4)), columns=['precision', 'recall', 'fscore', 'set'], index=[f'q_{nb_stolen}_e']))
            print("Test", (p,r,f))

            print("Saving classifier")
            thieved_classifier.save(f'stolen-models-wex/{use_case}/{setting}_{nb_stolen}_queries/')
            df_stats.to_csv(f'eval/{use_case}/wex_{setting}_res.csv')

            if nb_stolen > texts.shape[0]:
                print("Evaluated max query size - Stopping")
                break


Evaluating setting random
Evaluating use case fake-news
Loading data..
Loading Victim model..
Calculating performance baselines with blackbox..
Train (0.9500277759623554, 0.9411477352809277, 0.9425551470588236)
Test (0.9527223973982473, 0.9435230777683176, 0.9453816334906979)
Evaluating random strategy..
Training with 1 queries to black box
Evaluating on train/test set
Train (0.242, 0.5, 0.32614555256064687)
Test (0.23937385103893932, 0.5, 0.3237521190431343)
Saving classifier
Training with 10 queries to black box
Evaluating on train/test set
Train (0.9729604022627278, 0.9707820808507912, 0.9714181917192485)
Test (0.9660830098556379, 0.9632402614229467, 0.9641369804726445)
Saving classifier
Training with 100 queries to black box
Evaluating on train/test set
Train (0.9372881355932203, 0.9235537190082644, 0.9251590875072313)
Test (0.941743059611477, 0.9290450899766804, 0.9311701269753341)
Saving classifier
Training with 250 queries to black box
Evaluating on train/test set
Train (0.96806

Knock-off nets: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.91it/s]


Evaluating on train/test set
Train (0.258, 0.5, 0.3403693931398417)
Test (0.2606261489610607, 0.5, 0.3426468434158489)
Saving classifier
Training with 10 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 11.78it/s]


Evaluating on train/test set
Train (0.9680602531293154, 0.9644876353385867, 0.9653647058823529)
Test (0.9627228015789493, 0.9577193983917589, 0.9590327536141029)
Saving classifier
Training with 100 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:06<00:00, 14.72it/s]


Evaluating on train/test set
Train (0.9899361099101696, 0.9900538151066692, 0.9899909918927035)
Test (0.9888066578859732, 0.988933903198866, 0.9888677754871757)
Saving classifier
Training with 250 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 250/250 [00:16<00:00, 15.18it/s]


Evaluating on train/test set
Train (0.9854807399207457, 0.984664296239349, 0.9849734932420791)
Test (0.984122799157995, 0.9831143983555711, 0.9835210926354738)
Saving classifier
Training with 500 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 500/500 [00:33<00:00, 14.94it/s]


Evaluating on train/test set
Train (0.9832381520071368, 0.982049618809661, 0.9824636954655375)
Test (0.9762269390558815, 0.9740697609117053, 0.9748100896842968)
Saving classifier
Training with 1000 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 1000/1000 [01:07<00:00, 14.81it/s]


Evaluating on train/test set
Train (0.9764249027675551, 0.9743016849253636, 0.9749295771823051)
Test (0.9800262980213148, 0.9783418808437698, 0.9789562141133024)
Saving classifier
Training with 2500 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 2500/2500 [02:46<00:00, 15.00it/s]


Evaluating on train/test set
Train (0.9676279606603952, 0.9639711064129668, 0.9648605314493224)
Test (0.9693040166869937, 0.9653523579173184, 0.9664865716156164)
Saving classifier
Training with 5000 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 5000/5000 [05:50<00:00, 14.25it/s]


Evaluating on train/test set
Train (0.9631956912028725, 0.9576446280991735, 0.9587803404341737)
Test (0.9611578807242309, 0.954460941394103, 0.9560350662350515)
Saving classifier
Training with 10000 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████| 10000/10000 [12:05<00:00, 13.79it/s]


Evaluating on train/test set
Train (0.968636875431311, 0.9649721314626177, 0.9658645162650559)
Test (0.9694761153325442, 0.9654111629337249, 0.9665677318117866)
Saving classifier
Training with 25000 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████| 25000/25000 [31:05<00:00, 13.40it/s]


Evaluating on train/test set
Train (0.9513882375967934, 0.9432458837849959, 0.9445997330714412)
Test (0.9535450206931626, 0.9446647065118887, 0.9464907885894822)
Saving classifier
Training with 50000 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████| 50000/50000 [5:10:22<00:00,  2.68it/s]


Evaluating on train/test set
Train (0.9525884240279086, 0.9447954705618553, 0.9461243656081111)
Test (0.9572916374855238, 0.9495856713262748, 0.9512838283540197)
Saving classifier
Evaluated max query size - Stopping
Evaluating use case spam
Loading data..
Loading Victim model..
Calculating performance baselines with blackbox..
Train (0.9915033512240972, 0.9931027873067492, 0.9923012244902447)
Test (0.9833797116114344, 0.9867866847826088, 0.9850745442858422)
Evaluating random strategy..
Training with 1 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.43it/s]


Evaluating on train/test set
Train (0.43475, 0.5, 0.46509761968440766)
Test (0.43262730959891843, 0.5, 0.4638801642908916)
Saving classifier
Training with 10 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  7.96it/s]


Evaluating on train/test set
Train (0.5896000675624953, 0.6683003619907508, 0.41610361450448186)
Test (0.5952746057134355, 0.6784672867892977, 0.43344541724155716)
Saving classifier
Training with 100 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:07<00:00, 13.48it/s]


Evaluating on train/test set
Train (0.7312708585263423, 0.8965054563000271, 0.7697070073741079)
Test (0.7393300224592794, 0.9008966520345596, 0.7781977722498394)
Saving classifier
Training with 250 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 250/250 [00:17<00:00, 13.97it/s]


Evaluating on train/test set
Train (0.8643521590433417, 0.9391621996170786, 0.8963242975971162)
Test (0.8689038780109506, 0.9353321836677815, 0.897788456447945)
Saving classifier
Training with 500 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 500/500 [00:37<00:00, 13.51it/s]


Evaluating on train/test set
Train (0.8732086619860738, 0.9553504348075148, 0.9079367892014802)
Test (0.8726828151416266, 0.9522396704292084, 0.9063708945825)
Saving classifier
Training with 1000 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 1000/1000 [01:12<00:00, 13.72it/s]


Evaluating on train/test set
Train (0.8948025439025624, 0.9640697190220302, 0.9251737687545882)
Test (0.8931738456997631, 0.9588598278985507, 0.9220831962653708)
Saving classifier
Training with 2500 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 2500/2500 [03:04<00:00, 13.51it/s]


Evaluating on train/test set
Train (0.9324320294249218, 0.9792081149381222, 0.9540437336541916)
Test (0.9210763896013296, 0.9736561106465997, 0.9450193881607771)
Saving classifier
Training with 5000 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 5000/5000 [06:29<00:00, 12.84it/s]


Evaluating on train/test set
Train (0.9804772327737754, 0.9913776579220452, 0.9858400062303971)
Test (0.9710091417563662, 0.9855394718506132, 0.9781131894225256)
Saving classifier
Evaluated max query size - Stopping
Evaluating use case hate-speech
Loading data..
Loading Victim model..
Calculating performance baselines with blackbox..
Train (0.9374741762302738, 0.8616457303559427, 0.8960040468886737)
Test (0.923283677313214, 0.8675451469239016, 0.8926359387667132)
Evaluating random strategy..
Training with 1 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.33it/s]


Evaluating on train/test set
Train (0.05616666666666667, 0.3333333333333333, 0.09613464555698188)
Test (0.05668050352671189, 0.3333333333333333, 0.09688631217640865)
Saving classifier
Training with 10 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 11.29it/s]


Evaluating on train/test set
Train (0.3391824827667346, 0.3701050139687017, 0.30586026613287437)
Test (0.34553419548098013, 0.3872117543920257, 0.3181799592333833)
Saving classifier
Training with 100 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:07<00:00, 13.79it/s]


Evaluating on train/test set
Train (0.47733333333333333, 0.4603148574036207, 0.367103419853954)
Test (0.4780643414358435, 0.4633338477959518, 0.36022778029261643)
Saving classifier
Training with 250 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 250/250 [00:18<00:00, 13.28it/s]


Evaluating on train/test set
Train (0.5017729601026021, 0.5673053633281943, 0.47420788509902473)
Test (0.5038820858323964, 0.5568934029880938, 0.47198564249322156)
Saving classifier
Training with 500 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████████| 500/500 [00:38<00:00, 12.82it/s]


Evaluating on train/test set
Train (0.4852657411828343, 0.605781853133264, 0.4425900368139291)
Test (0.48309912882461453, 0.5814402775235095, 0.4359509936761207)
Saving classifier
Training with 1000 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 1000/1000 [01:20<00:00, 12.41it/s]


Evaluating on train/test set
Train (0.5311686717057401, 0.6320146151810557, 0.5314784118714974)
Test (0.5363532455499446, 0.6303616211061499, 0.5312237542558221)
Saving classifier
Training with 2500 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 2500/2500 [03:05<00:00, 13.50it/s]


Evaluating on train/test set
Train (0.5482896827665854, 0.6838280254950379, 0.567092082781461)
Test (0.5652607603515979, 0.7076671134243527, 0.587162527476886)
Saving classifier
Training with 5000 queries to black box


Knock-off nets: 100%|██████████████████████████████████████████████████████████████| 5000/5000 [06:10<00:00, 13.50it/s]


Evaluating on train/test set
Train (0.6253188329334163, 0.768473081416747, 0.6589029759778163)
Test (0.6326185910193859, 0.7776450849021709, 0.6654613584695898)
Saving classifier
Training with 10000 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████| 10000/10000 [12:49<00:00, 12.99it/s]


Evaluating on train/test set
Train (0.656919962074937, 0.8100822432560024, 0.6990411566061638)
Test (0.6557277579227243, 0.8046115473129808, 0.6957909482798755)
Saving classifier
Training with 25000 queries to black box


Knock-off nets: 100%|████████████████████████████████████████████████████████████| 25000/25000 [35:30<00:00, 11.74it/s]


Evaluating on train/test set
Train (0.7663632618341222, 0.8270062647973943, 0.7865072713909923)
Test (0.7872611221538746, 0.8423629908828364, 0.8063913291636542)
Saving classifier
Evaluated max query size - Stopping
