<a href="https://colab.research.google.com/github/bhagirathtallapragada/Secure-AI-project-phase2/blob/main/PATE_main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook I implement the PATE framework for ensuring model privacy

In [1]:
from google.colab import drive
drive.mount('/content/drive') #, force_remount = True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import keras
from keras.datasets import cifar100
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.regularizers import l2
from keras.callbacks import Callback, LearningRateScheduler, TensorBoard, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
# from keras import utils as np_utils
from keras.utils import np_utils
import sys
import os
import numpy as np
import matplotlib.pyplot as plt

##Scenario with 10 teachers

In [3]:
n_instances = 500 
n_teachers = 10

# load data and transform it
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

x_train = x_train.astype( float ) / 255.
x_test = x_test.astype( float ) / 255.

x_train = x_train.reshape( -1, 32, 32, 3)
x_test = x_test.reshape( -1, 32, 32, 3)

y_train = keras.utils.np_utils.to_categorical( y_train )
y_test = keras.utils.np_utils.to_categorical( y_test )

# shuffle data
idx = np.arange( len( x_train ) )
np.random.shuffle( idx )
x_train = x_train[ idx ]
y_train = y_train[ idx ]

# gather the teacher data
teacher_data_x = [ x_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]
teacher_data_y = [ y_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]

# gather the student data
student_data_x = x_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  
student_data_y = y_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  

In [4]:
## train the teacher models
# train the teacher models according to cifar100 dataset in tensorflow
def get_model():
  # Create the model
  model = Sequential()
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Conv2D(32, kernel_size=3, activation='relu'))
  model.add(Conv2D(32, (3, 3), activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Flatten())
  model.add(Dense(100, activation='softmax'))


  model.compile( loss = 'categorical_crossentropy', optimizer = 'adam', metrics = [ 'accuracy' ] )

  return model
    

# list of teacher models
teacher_models = [ get_model() for i in range( n_teachers ) ]

for i, (model, x, y) in enumerate( zip( teacher_models, teacher_data_x, teacher_data_y ) ):
    print( 'teacher', i )
    model.fit( x, y, batch_size = 32, epochs = 10, verbose = 0 )
    print( 'test accuracy:', model.evaluate( x_test, y_test, verbose = 0 )[ 1 ] )
    model.save( '/content/drive/MyDrive/SPAI_projectphase2/teacher_model_{}.h5'.format( i ) )

teacher 0
test accuracy: 0.05730000138282776
teacher 1
test accuracy: 0.06030000001192093
teacher 2
test accuracy: 0.061400000005960464
teacher 3
test accuracy: 0.0617000013589859
teacher 4
test accuracy: 0.07109999656677246
teacher 5
test accuracy: 0.05339999869465828
teacher 6
test accuracy: 0.0632999986410141
teacher 7
test accuracy: 0.05460000038146973
teacher 8
test accuracy: 0.058800000697374344
teacher 9
test accuracy: 0.06400000303983688


### training the student model

In [5]:
# label the data
labels = [ teacher.predict( student_data_x ) for teacher in teacher_models ]

In [6]:
# preform the voting
votes = np.zeros( ( student_data_x.shape[ 0 ], 100 ), dtype=np.float )
for i in range( len( student_data_x ) ):
  for j in range( n_teachers ):
    label = np.argmax( labels[ j ][ i ] )
    votes[ i, label ] += 1
  # add the noise per class
  for j in range( 10 ):
    votes[ i, j ] += np.random.laplace(loc=0.0, scale=5 )

In [7]:
from keras.utils import np_utils

In [8]:
student_data_x.shape,student_data_y.shape

((500, 32, 32, 3), (500, 100))

In [9]:
student_data_y = keras.utils.np_utils.to_categorical( np.argmax( votes, axis=1 ),num_classes=100 )

# train model
student_model = get_model()
print( 'training student model' )
student_model.fit( x, y, epochs=16, verbose=0 )
print( 'test accuracy:', student_model.evaluate( student_data_x, student_data_y, verbose=0 )[ 1 ] )

training student model
test accuracy: 0.009999999776482582


## Scenario with 20 teachers

In [10]:
n_instances = 500 
n_teachers = 20

# load data and transform it
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

x_train = x_train.astype( float ) / 255.
x_test = x_test.astype( float ) / 255.

x_train = x_train.reshape( -1, 32, 32, 3)
x_test = x_test.reshape( -1, 32, 32, 3)

y_train = keras.utils.np_utils.to_categorical( y_train )
y_test = keras.utils.np_utils.to_categorical( y_test )

# shuffle data
idx = np.arange( len( x_train ) )
np.random.shuffle( idx )
x_train = x_train[ idx ]
y_train = y_train[ idx ]

# gather the teacher data
teacher_data_x = [ x_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]
teacher_data_y = [ y_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]

# gather the student data
student_data_x = x_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  
student_data_y = y_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  

In [11]:
## train the teacher models
# train the teacher models according to cifar100 dataset in tensorflow
def get_model():
  # Create the model
  model = Sequential()
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Conv2D(32, kernel_size=3, activation='relu'))
  model.add(Conv2D(32, (3, 3), activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Flatten())
  model.add(Dense(100, activation='softmax'))


  model.compile( loss = 'categorical_crossentropy', optimizer = 'adam', metrics = [ 'accuracy' ] )

  return model
    

# list of teacher models
teacher_models = [ get_model() for i in range( n_teachers ) ]

for i, (model, x, y) in enumerate( zip( teacher_models, teacher_data_x, teacher_data_y ) ):
    print( 'teacher', i )
    model.fit( x, y, batch_size = 32, epochs = 10, verbose = 0 )
    print( 'test accuracy:', model.evaluate( x_test, y_test, verbose = 0 )[ 1 ] )
    model.save( '/content/drive/MyDrive/SPAI_projectphase2/teacher_model_{}.h5'.format( i ) )

teacher 0
test accuracy: 0.05530000105500221
teacher 1
test accuracy: 0.06430000066757202
teacher 2
test accuracy: 0.06759999692440033
teacher 3
test accuracy: 0.06159999966621399
teacher 4
test accuracy: 0.05719999969005585
teacher 5
test accuracy: 0.05869999900460243
teacher 6
test accuracy: 0.06480000168085098
teacher 7
test accuracy: 0.04910000041127205
teacher 8
test accuracy: 0.05380000174045563
teacher 9
test accuracy: 0.06509999930858612
teacher 10
test accuracy: 0.05040000006556511
teacher 11
test accuracy: 0.053300000727176666
teacher 12
test accuracy: 0.05920000001788139
teacher 13
test accuracy: 0.05559999868273735
teacher 14
test accuracy: 0.05869999900460243
teacher 15
test accuracy: 0.061799999326467514
teacher 16
test accuracy: 0.04969999939203262
teacher 17
test accuracy: 0.053199999034404755
teacher 18
test accuracy: 0.0697999969124794
teacher 19
test accuracy: 0.05469999834895134


### training the student model

In [12]:
# label the data
labels = [ teacher.predict( student_data_x ) for teacher in teacher_models ]

In [13]:
# preform the voting
votes = np.zeros( ( student_data_x.shape[ 0 ], 100 ), dtype=np.float )
for i in range( len( student_data_x ) ):
  for j in range( n_teachers ):
    label = np.argmax( labels[ j ][ i ] )
    votes[ i, label ] += 1
  # add the noise per class
  for j in range( 10 ):
    votes[ i, j ] += np.random.laplace(loc=0.0, scale=5 )

In [14]:
from keras.utils import np_utils

In [15]:
student_data_x.shape,student_data_y.shape

((500, 32, 32, 3), (500, 100))

In [16]:
student_data_y = keras.utils.np_utils.to_categorical( np.argmax( votes, axis=1 ),num_classes=100 )

# train model
student_model = get_model()
print( 'training student model' )
student_model.fit( x, y, epochs=16, verbose=0 )
print( 'test accuracy:', student_model.evaluate( student_data_x, student_data_y, verbose=0 )[ 1 ] )

training student model
test accuracy: 0.02800000086426735


## Scenario with 50 teachers

In [17]:
n_instances = 500 
n_teachers = 50

# load data and transform it
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

x_train = x_train.astype( float ) / 255.
x_test = x_test.astype( float ) / 255.

x_train = x_train.reshape( -1, 32, 32, 3)
x_test = x_test.reshape( -1, 32, 32, 3)

y_train = keras.utils.np_utils.to_categorical( y_train )
y_test = keras.utils.np_utils.to_categorical( y_test )

# shuffle data
idx = np.arange( len( x_train ) )
np.random.shuffle( idx )
x_train = x_train[ idx ]
y_train = y_train[ idx ]

# gather the teacher data
teacher_data_x = [ x_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]
teacher_data_y = [ y_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]

# gather the student data
student_data_x = x_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  
student_data_y = y_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  

In [18]:
## train the teacher models
# train the teacher models according to cifar100 dataset in tensorflow
def get_model():
  # Create the model
  model = Sequential()
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Conv2D(32, kernel_size=3, activation='relu'))
  model.add(Conv2D(32, (3, 3), activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Flatten())
  model.add(Dense(100, activation='softmax'))


  model.compile( loss = 'categorical_crossentropy', optimizer = 'adam', metrics = [ 'accuracy' ] )

  return model
    

# list of teacher models
teacher_models = [ get_model() for i in range( n_teachers ) ]

for i, (model, x, y) in enumerate( zip( teacher_models, teacher_data_x, teacher_data_y ) ):
    print( 'teacher', i )
    model.fit( x, y, batch_size = 32, epochs = 10, verbose = 0 )
    print( 'test accuracy:', model.evaluate( x_test, y_test, verbose = 0 )[ 1 ] )
    model.save( '/content/drive/MyDrive/SPAI_projectphase2/teacher_model_{}.h5'.format( i ) )

teacher 0
test accuracy: 0.05959999933838844
teacher 1
test accuracy: 0.05790000036358833
teacher 2
test accuracy: 0.05979999899864197
teacher 3
test accuracy: 0.06719999760389328
teacher 4
test accuracy: 0.05889999866485596
teacher 5
test accuracy: 0.06379999965429306
teacher 6
test accuracy: 0.045499999076128006
teacher 7
test accuracy: 0.051600001752376556
teacher 8
test accuracy: 0.05820000171661377
teacher 9
test accuracy: 0.055799998342990875
teacher 10
test accuracy: 0.058800000697374344
teacher 11
test accuracy: 0.049400001764297485
teacher 12
test accuracy: 0.06310000270605087
teacher 13
test accuracy: 0.05429999902844429
teacher 14
test accuracy: 0.05689999833703041
teacher 15
test accuracy: 0.05820000171661377
teacher 16
test accuracy: 0.06109999865293503
teacher 17
test accuracy: 0.053199999034404755
teacher 18
test accuracy: 0.05939999967813492
teacher 19
test accuracy: 0.05400000140070915
teacher 20
test accuracy: 0.05510000139474869
teacher 21
test accuracy: 0.0675999969

### training the student model

In [19]:
# label the data
labels = [ teacher.predict( student_data_x ) for teacher in teacher_models ]

In [20]:
# preform the voting
votes = np.zeros( ( student_data_x.shape[ 0 ], 100 ), dtype=np.float )
for i in range( len( student_data_x ) ):
  for j in range( n_teachers ):
    label = np.argmax( labels[ j ][ i ] )
    votes[ i, label ] += 1
  # add the noise per class
  for j in range( 10 ):
    votes[ i, j ] += np.random.laplace(loc=0.0, scale=5 )

In [21]:
from keras.utils import np_utils

In [22]:
student_data_x.shape,student_data_y.shape

((500, 32, 32, 3), (500, 100))

In [23]:
student_data_y = keras.utils.np_utils.to_categorical( np.argmax( votes, axis=1 ),num_classes=100 )

# train model
student_model = get_model()
print( 'training student model' )
student_model.fit( x, y, epochs=16, verbose=0 )
print( 'test accuracy:', student_model.evaluate( student_data_x, student_data_y, verbose=0 )[ 1 ] )

training student model
test accuracy: 0.04399999976158142


## Scenario with 100 teachers

In [30]:
n_instances = 300 
n_teachers = 100

# load data and transform it
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

x_train = x_train.astype( float ) / 255.
x_test = x_test.astype( float ) / 255.

x_train = x_train.reshape( -1, 32, 32, 3)
x_test = x_test.reshape( -1, 32, 32, 3)

y_train = keras.utils.np_utils.to_categorical( y_train )
y_test = keras.utils.np_utils.to_categorical( y_test )

# shuffle data
idx = np.arange( len( x_train ) )
np.random.shuffle( idx )
x_train = x_train[ idx ]
y_train = y_train[ idx ]

# gather the teacher data
teacher_data_x = [ x_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]
teacher_data_y = [ y_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]

# gather the student data
student_data_x = x_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  
student_data_y = y_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  

In [28]:
## train the teacher models
# train the teacher models according to cifar100 dataset in tensorflow
def get_model():
  # Create the model
  model = Sequential()
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(Conv2D(16, kernel_size=3, activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Conv2D(32, kernel_size=3, activation='relu'))
  model.add(Conv2D(32, (3, 3), activation='relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Flatten())
  model.add(Dense(100, activation='softmax'))


  model.compile( loss = 'categorical_crossentropy', optimizer = 'adam', metrics = [ 'accuracy' ] )

  return model
    

# list of teacher models
teacher_models = [ get_model() for i in range( n_teachers ) ]

for i, (model, x, y) in enumerate( zip( teacher_models, teacher_data_x, teacher_data_y ) ):
    print( 'teacher', i )
    model.fit( x, y, batch_size = 32, epochs = 10, verbose = 0 )
    print( 'test accuracy:', model.evaluate( x_test, y_test, verbose = 0 )[ 1 ] )
    model.save( '/content/drive/MyDrive/SPAI_projectphase2/teacher_model_{}.h5'.format( i ) )

teacher 0
test accuracy: 0.06480000168085098
teacher 1
test accuracy: 0.05559999868273735
teacher 2
test accuracy: 0.05290000140666962
teacher 3
test accuracy: 0.06549999862909317
teacher 4
test accuracy: 0.05999999865889549
teacher 5
test accuracy: 0.0632999986410141
teacher 6
test accuracy: 0.05770000070333481
teacher 7
test accuracy: 0.05009999871253967
teacher 8
test accuracy: 0.05139999836683273
teacher 9
test accuracy: 0.05530000105500221
teacher 10
test accuracy: 0.05380000174045563
teacher 11
test accuracy: 0.05400000140070915
teacher 12
test accuracy: 0.06069999933242798
teacher 13
test accuracy: 0.05480000004172325
teacher 14
test accuracy: 0.05770000070333481
teacher 15
test accuracy: 0.05649999901652336
teacher 16
test accuracy: 0.0544000007212162
teacher 17
test accuracy: 0.059300001710653305
teacher 18
test accuracy: 0.0575999990105629
teacher 19
test accuracy: 0.05909999832510948
teacher 20
test accuracy: 0.04899999871850014
teacher 21
test accuracy: 0.05640000104904175


### training the student model

In [31]:
# label the data
labels = [ teacher.predict( student_data_x ) for teacher in teacher_models ]

In [32]:
# preform the voting
votes = np.zeros( ( student_data_x.shape[ 0 ], 100 ), dtype=np.float )
for i in range( len( student_data_x ) ):
  for j in range( n_teachers ):
    label = np.argmax( labels[ j ][ i ] )
    votes[ i, label ] += 1
  # add the noise per class
  for j in range( 10 ):
    votes[ i, j ] += np.random.laplace(loc=0.0, scale=5 )

In [33]:
from keras.utils import np_utils

In [34]:
student_data_x.shape,student_data_y.shape

((300, 32, 32, 3), (300, 100))

In [35]:
student_data_y = keras.utils.np_utils.to_categorical( np.argmax( votes, axis=1 ),num_classes=100 )

# train model
student_model = get_model()
print( 'training student model' )
student_model.fit( x, y, epochs=16, verbose=0 )
print( 'test accuracy:', student_model.evaluate( student_data_x, student_data_y, verbose=0 )[ 1 ] )

training student model
test accuracy: 0.05666666850447655
