In [5]:
import random
import scipy.io as sio
from scipy import optimize
from sklearn import svm
from google.colab import files
from sklearn.model_selection import GridSearchCV
import numpy as np
# Upload files
uploaded = files.upload()
# Access uploaded files
for filename in uploaded.keys():
    print(f"Uploaded file '{filename}' with length {len(uploaded[filename])} bytes")
from utilities import *
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, message="divide by zero")

In [7]:
seed = 1234
random.seed(seed)
np.random.seed(seed)

# Variables
param_grid = {
    'C': [0.1, 0.3, 0.5, 1, 5, 10],   # range for C
    'gamma': [0.01, 0.1, 0.2, 0.3, 0.5, 1, 2, 3]  # range for GAMMA
}
LID_GAMMA = 0.5
BATCH_SIZE = 80
epsilon = 1e-6
###########

WEIGHT_LB = 0.3

# load the data
data1file = 'iris_dataset_0.3.mat'  # pass the name of the distorted data file as string, along with file extension (.mat), e.g. dist_0.4_radio_dataset.mat
mat_contents = sio.loadmat(data1file) # distorted data file, for training on distorted data
mat_contents2 = sio.loadmat('iris_dataset_0.3.mat') # pass the name of the clean data file, for training on normal data, e.g. radio_dataset.mat
YY_tr = mat_contents2['Ytr'] # original correct labels
# training data
xtr = mat_contents['Xtr']
# training labels
ytr = mat_contents['Ytr']
# testing data
xtt = mat_contents['Xtt']
# testing labels
ytt = mat_contents['Ytt']

X = xtr
y = ytr
row_count = len(X)

n_classes = len(np.unique(ytr))
print('Number of classes:', n_classes)
###############################################################
# No attack
###############################################################
grid_s = svm.SVC(kernel='rbf')
grid_search = GridSearchCV(estimator=grid_s, param_grid=param_grid, cv=5)
grid_search.fit(X, YY_tr.ravel())
best_params = grid_search.best_params_
C, GAMMA = best_params['C'], best_params['gamma']
model = svm.SVC(kernel='rbf', gamma=GAMMA, C=C, decision_function_shape='ovr')
model.fit(X, YY_tr.ravel())
y_hat = model.predict(xtt)

if X.shape[1]<=2:
    draw_contours(X, ytr, model, [], 'SVM - no attack', 'no_attack')
else:  # If X.shape[1] > 2
    column_names = ['scenario', 'index', 'accuracy', 'error_rate',
                'precision', 'average_precision',
                'recall', 'average_recall',
                'fscore', 'average_fscore']
    # Call classification_perf method
    scores_df = classification_perf(ytt, y_hat, 'SVM - no attack', 0, column_names)
    # Print the scores
    print("Accuracy:", scores_df['accuracy'])
    print("Precision for all classes:", scores_df['precision'])
    print("Average precision:", scores_df['average_precision'])
    print("Recall for all classes:", scores_df['recall'])
    print("Average recall:", scores_df['average_recall'])
    print("f1score for all classes:", scores_df['fscore'])
    print("Average f1score:", scores_df['average_fscore'])
    print("\n")

###############################################################
# Attack
###############################################################

# flipped indices
fi = mat_contents['fi']
# correct matlab 1 indexing
if X.shape[1]<=2:
  fi = fi - 1
# flipped labels
fl = mat_contents['fl']
y = fl

model = svm.SVC(kernel='rbf', gamma=GAMMA, C=C, decision_function_shape='ovr')
model.fit(X, y.ravel())
y_hat = model.predict(xtt)
# print(ytt, y_hat)
if X.shape[1]<=2:
    draw_contours(X, ytr, model, fi, 'SVM - alfa attack', 'alfa_attack')
else:  # If X.shape[1] > 2
    column_names = ['scenario', 'index', 'accuracy', 'error_rate',
                'precision', 'average_precision',
                'recall', 'average_recall',
                'fscore', 'average_fscore']
    # Call classification_perf method
    scores_df = classification_perf(ytt, y_hat, 'SVM - random label flip attack', 0, column_names)
    # Print the scores
    print("Accuracy:", scores_df['accuracy'])
    print("Precision for all classes:", scores_df['precision'])
    print("Average precision:", scores_df['average_precision'])
    print("Recall for all classes:", scores_df['recall'])
    print("Average recall:", scores_df['average_recall'])
    print("f1score +for all classes:", scores_df['fscore'])
    print("Average f1score:", scores_df['average_fscore'])
    print("\n")

###############################################################
# Test defense - LID
###############################################################

y = fl

lids = np.zeros(row_count)
# lids_opp = np.zeros(len(X))
lids_cross = np.zeros(len(X))
if fi.shape[0] >= 1 and fi.shape[1] == 1:
  fi = fi[:, 0]
elif fi.shape[0] == 1 and fi.shape[1] >= 1:
  fi = fi [0, :]

# get the indices of rows that are not flipped
normal_idx = list(set(range(row_count)) - set(fi))
normal_lid_values = lids[normal_idx]
flipped_labels = y[fi]
normal_labels = np.squeeze(y[normal_idx])

all_classes_lids = {}

for i in np.unique(ytr):
  class_name = f"class{i}"
  class_indices = np.where(normal_labels == i)[0]
  neg_indices = np.where(normal_labels != i)[0]
  class_data = xtr[class_indices, :]
  neg_data = xtr[neg_indices, :]
  class_lids = get_lids_random_batch(class_data, LID_GAMMA, lid_type='kernel', k=20, batch_size=BATCH_SIZE)
  # initialize the weights to 0
  lids[class_indices] = class_lids
  # LIDs w.r.t to the opposite class
  class_lids_opp = get_cross_lids_random_batch(class_data, neg_data, LID_GAMMA, lid_type='kernel', k=20,
                                           batch_size=BATCH_SIZE)
  # Cross LID values
  class_cross_lids = np.divide(class_lids, class_lids_opp + epsilon)
  lids_cross[class_indices] = class_cross_lids
  class_normal_lids = normal_lid_values[class_indices]
  all_classes_lids[class_name] = class_normal_lids

original_lids = lids.copy()
lids = lids_cross
fl_lid_values = lids[fi]

# weights_dict = {}
for i in np.unique(ytr):
  class_name = f"class{i}"
  class_normal_lids = all_classes_lids[class_name]
  class_fl_indices = np.where(flipped_labels == i)[0]
  class_fl_lids = fl_lid_values[class_fl_indices]
  weights_class_normal = np.ones((len(class_normal_lids),))
  weights_class_fl = np.ones((len(class_fl_lids),))
  all_classes_lids[class_name] = (all_classes_lids[class_name], [weights_class_normal, weights_class_fl, class_fl_lids])

# If there are labels flipped to positive
for i in np.unique(ytr):
  class_name = f"class{i}"
  weights_class_normal = all_classes_lids[class_name][1][0]
  weights_class_fl = all_classes_lids[class_name][1][1]
  class_normal_lids = all_classes_lids[class_name][0]
  class_fl_lids = all_classes_lids[class_name][1][2]
  if class_fl_lids.size > 1:
    class_density_normal = get_kde(class_normal_lids, bw=0.2)
    class_density_fl = get_kde(class_fl_lids, bw=0.2)

    lr_class_normal, lr_class_fl = weight_calculation(class_normal_lids, class_fl_lids, class_density_normal,
                                                  class_density_fl,
                                                  WEIGHT_LB)

    tmp_lid_values = np.concatenate((class_normal_lids, class_fl_lids), axis=0)
    tmp_lr = np.concatenate((lr_class_normal, lr_class_fl), axis=0)

    # fit a tanh function
    params, params_covariance = optimize.curve_fit(tanh_func, tmp_lid_values, tmp_lr)

    # obtain the weights from the fitted function
    weights_class_normal = tanh_func(class_normal_lids, params[0], params[1])
    weights_class_fl = tanh_func(class_fl_lids, params[0], params[1])
    all_classes_lids[class_name] = (all_classes_lids[class_name][0], all_classes_lids[class_name][1], [weights_class_normal, weights_class_fl])

weights_fl = np.zeros((len(fl_lid_values),))
weights_normal = np.zeros((len(normal_lid_values),))
weights = np.zeros((row_count,))

for i in np.unique(ytr):
  class_fl_indices = np.where(flipped_labels == i)[0]
  class_indices = np.where(normal_labels == i)[0]
  class_name = f"class{i}"
  weights_fl[class_fl_indices] = all_classes_lids[class_name][2][1]
  weights_normal[class_indices] = all_classes_lids[class_name][2][0]

weights[fi] = weights_fl
weights[normal_idx] = weights_normal

model = svm.SVC(kernel='rbf', gamma=GAMMA, C=C, decision_function_shape='ovr')
model.fit(X, y.ravel(), sample_weight=weights)
y_hat = model.predict(xtt)

if X.shape[1]<=2:
    draw_contours(X, ytr, model, fi, 'LID-SVM - alfa attack', 'lid_svm')
else:  # If X.shape[1] > 2
    column_names = ['scenario', 'index', 'accuracy', 'error_rate',
                'precision', 'average_precision',
                'recall', 'average_recall',
                'fscore', 'average_fscore']
    # Call classification_perf method
    scores_df = classification_perf(ytt, y_hat, 'LID SVM - random label flip attack', 0, column_names)
    # Print the scores
    print("Accuracy:", scores_df['accuracy'])
    print("Precision for all classes:", scores_df['precision'])
    print("Average precision:")
    print(scores_df['average_precision'])
    print("Recall for all classes:", scores_df['recall'])
    print("Average recall:", scores_df['average_recall'])
    print("f1score for all classes:", scores_df['fscore'])
    print("Average f1score:", scores_df['average_fscore'])
    print("\n")

Number of classes: 3
SVM - no attack
Accuracy: 0    0.966667
Name: accuracy, dtype: float64
Precision for all classes: 0    [1.0, 1.0, 0.9090909090909091]
Name: precision, dtype: object
Average precision: 0    0.969697
Name: average_precision, dtype: float64
Recall for all classes: 0    [1.0, 0.9, 1.0]
Name: recall, dtype: object
Average recall: 0    0.966667
Name: average_recall, dtype: float64
f1score for all classes: 0    [1.0, 0.9473684210526316, 0.9523809523809523]
Name: fscore, dtype: object
Average f1score: 0    0.966583
Name: average_fscore, dtype: float64


SVM - random label flip attack
Accuracy: 0    0.966667
Name: accuracy, dtype: float64
Precision for all classes: 0    [1.0, 1.0, 0.9090909090909091]
Name: precision, dtype: object
Average precision: 0    0.969697
Name: average_precision, dtype: float64
Recall for all classes: 0    [1.0, 0.9, 1.0]
Name: recall, dtype: object
Average recall: 0    0.966667
Name: average_recall, dtype: float64
f1score +for all classes: 0    [1.

100%|██████████| 1/1 [00:00<00:00, 397.64it/s]
100%|██████████| 1/1 [00:00<00:00, 487.14it/s]
100%|██████████| 1/1 [00:00<00:00, 506.07it/s]
100%|██████████| 1/1 [00:00<00:00, 484.33it/s]
100%|██████████| 1/1 [00:00<00:00, 508.09it/s]
100%|██████████| 1/1 [00:00<00:00, 541.48it/s]

LID SVM - random label flip attack
Accuracy: 0    0.966667
Name: accuracy, dtype: float64
Precision for all classes: 0    [1.0, 1.0, 0.9090909090909091]
Name: precision, dtype: object
Average precision:
0    0.969697
Name: average_precision, dtype: float64
Recall for all classes: 0    [1.0, 0.9, 1.0]
Name: recall, dtype: object
Average recall: 0    0.966667
Name: average_recall, dtype: float64
f1score for all classes: 0    [1.0, 0.9473684210526316, 0.9523809523809523]
Name: fscore, dtype: object
Average f1score: 0    0.966583
Name: average_fscore, dtype: float64





