In [1]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import hashlib
import struct
from sklearn.metrics import accuracy_score, f1_score

# Loading and normalizing the dataset

Importing the mnist dataset

In [2]:
# load train and test dataset
def load_dataset():
    # load dataset
    (trainX, trainY), (testX, testY) = cifar10.load_data()
    # one hot encode target values
    trainY = to_categorical(trainY)
    testY = to_categorical(testY)
    return trainX, trainY, testX, testY

In [3]:
# scale pixels
def prep_pixels(train, test):
# convert from integers to floats
    train_norm = train.astype('float32')
    test_norm = test.astype('float32')
    # normalize to range 0-1
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0
    # return normalized images
    return train_norm, test_norm

In [4]:
x_train, y_train, x_test, y_test = load_dataset()
x_train, x_test = prep_pixels(x_train, x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((50000, 32, 32, 3), (50000, 10), (10000, 32, 32, 3), (10000, 10))

# Training the model with watermark and signature

In signcoder is used to create the signature that would be taught to the model during training process.


*   We take a string S as our watermark of arbitraty length.
*   We also take a size input that stands for the size of our sign.
*   Hash S using blake2b hashing algorithm, it returns a 64 byte digest.
*   Split the digest into 8 parts. So each of them are 8 bytes.
*   Each of these 8 bytes are turned into 64 bit unsigned integers and stored in an array.
*   Then the array is normalized and turned into floats.
*   Then the array is padded so it matches the desired size of signature.

Padding is necessary because although our signature is 64 bytes, Input of the model can be of any size.

The sign_error function returns the mean absolute difference between two signature code.

In [5]:
TRAIN_WITH_WATERMARK = True

def normalize(data) :
    # Store the data's original shape
    shape = data.shape
    # Flatten the data to 1 dimension
    data = np.reshape( data , (-1 , ) )
    # Find minimum and maximum
    maximum = np.max( data )
    minimum = np.min( data )
    # Create a new array for storing normalized values
    normalized_values = list()
    # Iterate through every value in data
    for x in data:
        # Normalize
        x_normalized = ( x - minimum ) / ( maximum - minimum )
        # Append it in the array
        normalized_values.append( x_normalized )
    # Convert to numpy array
    n_array = np.array( normalized_values )
    # Reshape the array to its original shape and return it.
    return np.reshape( n_array , shape )

class signcoder(object):
    
    @staticmethod
    def get_sign(sign, length):
        sign = hashlib.blake2b(str(sign).encode('utf-8'), digest_size=64).digest()
        floats = np.zeros(int(len(sign)/8), dtype=np.uint64)
        for i in range(len(floats)):
            floats[i] = struct.unpack('<I', sign[(i+1)*4-4:(i+1)*4])[0]
        normalized = (floats - np.mean(floats))/np.std(floats)
        return np.pad(normalized, (length//2)-4, mode='wrap')
    
    @staticmethod
    def get_sign_one_byte(sign, length):
        sign = hashlib.blake2b(str(sign).encode('utf-8'), digest_size=8).digest()
        floats = np.zeros(int(len(sign)/8), dtype=np.uint64)
        for i in range(len(floats)):
            floats[i] = struct.unpack('<I', sign[(i+1)*4-4:(i+1)*4])[0]
        normalized = np.pad(floats, (4, 4), 'constant', constant_values=(300,10000000000))
        normalized = normalize(normalized.T)[4]
        res = np.ndarray(1, np.float64)
        res[0] = normalized
        if length == 1:
            return res
        return np.pad(res, (0, length-1), mode='edge')
    
    @staticmethod
    def sign_error(watermark, sign):
        padding = sign.shape[1]
        exact_sign = signcoder.get_sign(watermark, padding)
        return np.round(np.mean(np.abs(exact_sign-sign)), 8)

Generating 5000 signatures with "www.uis.no" as watermark and adding to the training dataset. The data is concatenated in x_train and y_train both. For the x_train the length of the signature is 256 elements long. And for y_train its 10 elements. So we generate the signature accordingly.

In [6]:
if TRAIN_WITH_WATERMARK:
    sign = signcoder.get_sign(b'www.uis.no', 3072)
    sign = np.reshape(sign, (32, 32, 3))
    signdata = np.ones((5000, 32, 32, 3))
    for i in range(signdata.shape[0]):
        signdata[i] = sign
    print(signdata.shape)

(5000, 32, 32, 3)


In [7]:
if TRAIN_WITH_WATERMARK:
    x_train = np.concatenate((x_train, signdata), axis=0)
    print(x_train.shape)

(55000, 32, 32, 3)


In [8]:
if TRAIN_WITH_WATERMARK:
    signdata = np.ones((5000, 10))
    sign = signcoder.get_sign("www.uis.no", 10)
    #print(np.reshape(sign,(1)))
    for i in range(signdata.shape[0]):
        signdata[i] = sign
    print(signdata.shape)

(5000, 10)


In [9]:
if TRAIN_WITH_WATERMARK:
    y_train = np.concatenate((y_train, signdata), axis=0)
    print(y_train.shape)

(55000, 10)


In [None]:
from src.model_cifar10 import SimpleNet
from src.signature import Signature

MODEL_NAME = '50epochs_baseline'
train_parameters = {
    'steps_per_epoch' : None,
    'epochs' : 50,
    'validation_data' : (x_test,y_test),
    'batch_size' : 100
}

TRAINING = True
SAVING = True

model_ratio = np.linspace(0.1, 1.0, 10)
for ratio in model_ratio:
    model = SimpleNet()
    signature_length = int(model.n_weights() * ratio)
    sig = Signature(b'Some block information', signature_length)

    if TRAINING:
        history = model.fit(x_train, y_train, **train_parameters, signature=sig)
    if SAVING:
        model.save_training_history(f'training_history/sig_{ratio:1f}.csv')

Train model with signature of size 261384
Train on 55000 samples, validate on 10000 samples
Epoch 1/50
 9000/55000 [===>..........................] - ETA: 6:40 - loss: 3.8113 - accuracy: 0.1183 - f1_score: 0.1048

  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)


Epoch 2/50
 1600/55000 [..............................] - ETA: 8:05 - loss: 1.3510 - accuracy: 0.1388 - f1_score: 0.1233

  'precision', 'predicted', average, warn_for)


 3600/55000 [>.............................] - ETA: 7:58 - loss: 1.4592 - accuracy: 0.1356 - f1_score: 0.1198

  'precision', 'predicted', average, warn_for)


 6000/55000 [==>...........................] - ETA: 7:31 - loss: 1.4747 - accuracy: 0.1362 - f1_score: 0.1198

  'precision', 'predicted', average, warn_for)


 9800/55000 [====>.........................] - ETA: 6:52 - loss: 1.4708 - accuracy: 0.1407 - f1_score: 0.1252

  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'recall', 'true', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)


Epoch 3/50
 2500/55000 [>.............................] - ETA: 8:25 - loss: 0.5405 - accuracy: 0.1780 - f1_score: 0.1646

  'precision', 'predicted', average, warn_for)


 3800/55000 [=>............................] - ETA: 8:05 - loss: 0.2815 - accuracy: 0.1808 - f1_score: 0.1666

  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)


Epoch 4/50
11500/55000 [=====>........................] - ETA: 6:34 - loss: -1.9019 - accuracy: 0.1786 - f1_score: 0.1658

  'precision', 'predicted', average, warn_for)


11800/55000 [=====>........................] - ETA: 6:31 - loss: -1.8687 - accuracy: 0.1783 - f1_score: 0.1654

  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)




# Plotting results

In [None]:
def plot_results(scatter=False, regression=False, line=False):
    if not scatter and not regression and not line:
        raise ValueError("Set [scatter|regression|line] to True")
    
    fig, ax = plt.subplots(1, 2)
    colormap = cm.cividis_r
    model_ratio = np.linspace(0.1, 1.0, 10)
    
    for ratio in model_ratio:
        data = pd.read_csv(f'training_history/sig_{ratio:.1f}.csv')
        data.index += 1
        color = np.array(colormap(ratio))
        
        if regression:
            line_fit = exp_fit(data.index, data['val_accuracy'])
            ax[0].plot(data.index, exp(data.index, *line_fit), label=f'{ratio:.1f}', c=color)
            
        if scatter:
            ax[0].scatter(data.index, data['val_accuracy'], c=color.reshape(1, -1))
        
        if line:
            ax[0].plot(data.index, data['val_accuracy'], label=f'{ratio:.1f}', c=color)
            
        ax[0].set(xlabel='Epochs', ylabel='Validation accuracy', title='Accuracy for % of model frozen')
        ax[0].legend()
        
        if regression:
            line_fit = exp_fit(data.index, data['val_f1_score'])
            ax[1].plot(data.index, exp(data.index, *line_fit), label=f'{ratio:.1f}', c=colormap(ratio))
            
        if scatter:
            ax[1].scatter(data.index, data['val_f1_score'], c=color.reshape(1, -1))
            
        if line:
            ax[1].plot(data.index, data['val_f1_score'], label=f'{ratio:.1f}', c=colormap(ratio))
        
        ax[1].set(xlabel='Epochs', ylabel='Validation F1 score', title='F1 score as % of model frozen')
        ax[1].legend()
    
    return ax, fig

In [None]:
_, fig = plot_results(line=True)

SAVING = True
if SAVING:
    fig.savefig('signature_hit/during_training.svg')