<a href="https://colab.research.google.com/github/auwal84/semi-supervised-encrypted-traffic-classification-with-DCGAN/blob/master/semi_GAN_in_keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive/',force_remount=True)

Mounted at /content/gdrive/


In [2]:
import tensorflow as tf 
import numpy as np 
import os
from tensorflow import keras
from tensorflow.keras import layers,Model
import matplotlib.pyplot as plt



In [3]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [4]:
def load_data():
    train_data = np.load('TrainPIMset20.npy')
    train_labels = np.load('TrainPIMlabels20.npy')
    test_data = np.load('TestPIMset20.npy')
    test_labels = np.load('testPIMlabels20.npy')
    return (train_data,train_labels),(test_data,test_labels)

In [5]:
(train_data,train_labels),(test_data,test_labels) = load_data()


In [6]:
train_data.shape

(60000, 20, 3)

In [7]:
train_data = tf.cast(train_data,tf.float32)
train_data = tf.reshape(train_data,[-1,20,3,1])

In [8]:
train_data = tf.cast(train_data,tf.float32)
train_data = tf.reshape(train_data,[-1,20,3,1])

train_labels = tf.cast(train_labels,tf.int32)
train_labels = tf.one_hot(train_labels,4)

In [9]:
test_labels = tf.cast(test_labels,tf.int32)
test_labels = tf.one_hot(test_labels,4)
test_data = tf.cast(test_data,tf.float32)
test_data = tf.reshape(test_data,[-1,20,3,1])

In [10]:
train_labels.shape

TensorShape([60000, 4])

In [11]:
num_classes = 4
BUFFER_SIZE = 60000
BATCH_SIZE = 256
latent_dim =1920
INPUT_SHAPE = (20,3,1)

In [12]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_data,train_labels))
train_dataset = train_dataset.batch(BATCH_SIZE,drop_remainder = True)
test_dataset = tf.data.Dataset.from_tensor_slices((test_data,test_labels)).batch(BATCH_SIZE,  drop_remainder = True)                                                  

In [13]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(60*1*16, use_bias=False, input_shape=(latent_dim,)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.LeakyReLU(alpha = 1))
      
    model.add(tf.keras.layers.Reshape((20, 3, 16)))
    assert model.output_shape == (None, 20, 3, 16) # Note: None is the batch size
    
    model.add(tf.keras.layers.Conv2DTranspose(8, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None,20, 3, 8)  
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.LeakyReLU())

    model.add(tf.keras.layers.Conv2DTranspose(4, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None,20, 3, 4)    
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.LeakyReLU())

    model.add(tf.keras.layers.Conv2DTranspose(1, (5, 5), strides=(1, 1), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None,20, 3, 1)
  
    return model

In [14]:
g_model =make_generator_model()
g_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 960)               1843200   
_________________________________________________________________
batch_normalization (BatchNo (None, 960)               3840      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 960)               0         
_________________________________________________________________
reshape (Reshape)            (None, 20, 3, 16)         0         
_________________________________________________________________
conv2d_transpose (Conv2DTran (None, 20, 3, 8)          3200      
_________________________________________________________________
batch_normalization_1 (Batch (None, 20, 3, 8)          32        
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 20, 3, 8)          0

In [15]:

def make_discriminator_model():
    input = keras.Input(shape=(INPUT_SHAPE))
    x= keras.layers.Dropout(0.4)(input)
    x= keras.layers.Conv2D(32,kernel_size=(5,5),strides = (1,1),padding = 'same')(x)
    x = keras.layers.LeakyReLU()(x)
    x = keras.layers.Dropout(0.4)(x)
    x= keras.layers.Conv2D(64,kernel_size =(3,3),strides =(1,1),padding='same')(x)
    x= keras.layers.BatchNormalization()(x)
    x = keras.layers.LeakyReLU(0.2)(x)
    x = keras.layers.Conv2D(128,kernel_size=(2,2),strides = (1,1),padding='same')(x)
    x = keras.layers.LeakyReLU(0.2)(x)
    x = keras.layers.GlobalAveragePooling2D()(x)
    
    #x = keras.layers.Dense(4)(x)
    model = Model(input,x,name='discriminator')
    return model

   


   

In [16]:
d_model = make_discriminator_model()
d_model.summary()

Model: "discriminator"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 20, 3, 1)]        0         
_________________________________________________________________
dropout (Dropout)            (None, 20, 3, 1)          0         
_________________________________________________________________
conv2d (Conv2D)              (None, 20, 3, 32)         832       
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 20, 3, 32)         0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 20, 3, 32)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 3, 64)         18496     
_________________________________________________________________
batch_normalization_3 (Batch (None, 20, 3, 64)       

In [17]:

dense = keras.layers.Dense(5)
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')
precision = tf.keras.metrics.Precision()
recall = tf.keras.metrics.Recall()


In [18]:
#discriminator loss
def loss_values(d_real_features,fake_features,labels,label_rate):
    
    epsilon = 1e-8
    real_logits = dense(d_real_features)
    real_prob = tf.nn.softmax( real_logits)
    fake_logits =dense(fake_features)
    fake_prob = tf.nn.softmax( fake_logits)
    def d_loss_fn():
      
      tmp = tf.nn.softmax_cross_entropy_with_logits(logits = real_logits,
                                                  labels = labels)
      labeled_mask = np.zeros([BATCH_SIZE], dtype = np.float32)
      labeled_count = np.int(BATCH_SIZE * label_rate) # to determine the number of unlabeled data
      labeled_mask[range(labeled_count)] = 1.0
      D_L_supervised = tf.reduce_sum(labeled_mask * tmp) / tf.reduce_sum(labeled_mask)
      #unsupervised loss
     
      prob_real_be_real = 1 - real_prob[:, -1] + epsilon
      tmp_log = tf.math.log(prob_real_be_real)
      D_L_unsupervised1 = -1 * tf.reduce_mean(tmp_log)
      # data is fake
      #d_fake_prob = tf.nn.softmax(fake_logits)
      prob_fake_be_fake = fake_prob[:, -1] + epsilon
      tmp_log = tf.math.log(prob_fake_be_fake)
      D_L_unsupervised2 = -1 * tf.reduce_mean(tmp_log)
      

      disc_loss = D_L_supervised + D_L_unsupervised1 + D_L_unsupervised2
      return disc_loss
    def g_loss_fn():
     #prob_fake_be_real = 1 - fake_prob[:, -1] + epsilon
     #tmp_log =  tf.math.log(prob_fake_be_real)
     #G_L1 = -1 * tf.reduce_mean(tmp_log)

     real_moments = tf.reduce_mean(d_real_features, axis = 0)
     generated_moments = tf.reduce_mean(fake_features, axis = 0)
     G_L2 = tf.reduce_mean(tf.abs(real_moments - generated_moments))
     #gen_loss = G_L1 +G_L2
     return G_L2
  
    train_accuracy.update_state(labels,real_prob )
    precision.update_state(labels,real_prob)
    recall.update_state(labels,real_prob)
    d_loss = d_loss_fn()
    g_loss = g_loss_fn()
    return d_loss,g_loss,train_accuracy.result(),precision.result(),recall.result()




In [19]:
class semi_gan(Model):
  def __init__(self,discriminator,generator, latent_dim,label_rate):
    super(semi_gan,self).__init__()
    self.discriminator = discriminator
    self.generator = generator
    self.latent_dim = latent_dim
    self.label_rate = label_rate
  def compile(self,d_optimizer,g_optimizer,loss_fn):
    super(semi_gan,self).compile()
    self.d_optimizer = d_optimizer
    self.g_optimizer = g_optimizer
    self.loss_fn = loss_fn
    
  def extended_labels(self,labels):
    extended_label = tf.concat([labels, tf.zeros([tf.shape(labels)[0], 1])], axis = 1)

    return extended_label

  def train_step(self,dataset):
    features = dataset[0]
    labels = dataset[1]
    latent_vector = tf.random.normal(shape =(BATCH_SIZE, self.latent_dim))
    with tf.GradientTape() as d_tape, tf.GradientTape() as g_tape:
      generated_images = self.generator(latent_vector,training = True)
      real_features = self.discriminator(features,training=True)
      fake_features = self.discriminator(generated_images, training = True)
      labels = self.extended_labels(labels)
      d_loss,g_loss,train_acc,prec,rec = self.loss_fn(real_features,fake_features,labels,self.label_rate)
    d_grad = d_tape.gradient(d_loss,self.discriminator.trainable_variables)
    g_grad = g_tape.gradient(g_loss,self.generator.trainable_variables)
    self.d_optimizer.apply_gradients(zip(d_grad,self.discriminator.trainable_variables))
    self.g_optimizer.apply_gradients(zip(g_grad,self.generator.trainable_variables))
    
    return {"d_loss": d_loss, "g_loss": g_loss,"train_accuracy":train_acc,"precision":prec,"recall":rec}
  
  #evaluate step
  def test_step(self,dataset):
    features = dataset[0]
    labels = dataset[1]
    latent_vector = tf.random.normal(shape =(BATCH_SIZE, self.latent_dim))
    
    generated_images = self.generator(latent_vector,training = False)
    real_features = self.discriminator(features,training=False)
    fake_features = self.discriminator(generated_images, training = False)
    labels = self.extended_labels(labels)
    d_loss,g_loss,acc,prec,rec = self.loss_fn(real_features,fake_features,labels,self.label_rate)
    return {"d_loss": d_loss, "g_loss": g_loss,"accuracy":acc,"precision":prec,"recall":rec}

      



In [20]:
disc_optimizer = keras.optimizers.Adam(1e-4)
gen_optimizer = keras.optimizers.Adam(1e-4)


In [21]:
gan =semi_gan(discriminator=d_model,generator=g_model,latent_dim=latent_dim,label_rate =0.8) 

In [22]:
gan.compile(d_optimizer=disc_optimizer,
            g_optimizer= gen_optimizer,loss_fn= loss_values)

In [23]:
#create a call backs
class GAN_monitor(keras.callbacks.Callback):
  def __init__(self,validation_data):
    super(GAN_monitor,self).__init__()
    self.dataset = validation_data
  def on_each_end(self,epoch, logs = None):
    features = self.dataset[0]
    labels = self.dataset[1]
    labels = tf.concat([labels, tf.zeros([tf.shape(labels)[0], 1])], axis = 1)
    t_real_features = self.model.discriminator(features)
    t_real_logits = dense(t_real_features)
    t_real_prob = tf.nn.softmax(t_real_logits)
    acc = accuracy(labels,t_real_prob)
    prec = precision(labels,t_real_prob)
    rec = recall(labels,t_real_prob)
    print('epoch: %d, | validation_acc: %f,precision: %f,recall:%f' %(epoch,accuracy.result().numpy,precision.result().numpy(),recall.result().numpy()))
    


In [24]:
epochs = 10
#cbk = GAN_monitor(test_dataset)

In [25]:

history = gan.fit(train_dataset,epochs=epochs,validation_data = test_dataset)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [26]:
gan.evaluate(test_dataset)



[]