In [34]:
import os
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
import tensorflow_model_optimization as tfmot

In [35]:
data = pd.read_csv('./data/crime_data_main.csv')

In [36]:
X_train, X_test, y_train, y_test = train_test_split(data['Preprocessed'], data['class'], test_size=0.20, random_state=1, stratify=data['class'])

In [37]:
import pickle
with open('./models/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [38]:
maxlen=50
def get_sequences(tokenizer, X_train):
    sequences = tokenizer.texts_to_sequences(X_train)
    padded = tf.keras.preprocessing.sequence.pad_sequences(sequences, truncating = 'post', padding='post', maxlen=maxlen)
    return padded

In [39]:
padded_train_sequences = get_sequences(tokenizer, X_train)

In [40]:
model = tf.keras.models.load_model('./models/tf_crime_model_m1.h5')
model.compile( 
        optimizer=tf.keras.optimizers.Adam(0.0001), 
        loss =tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), 
        metrics = ["accuracy"]
    )
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 50, 16)            160000    
                                                                 
 bidirectional_8 (Bidirectio  (None, 50, 40)           5920      
 nal)                                                            
                                                                 
 bidirectional_9 (Bidirectio  (None, 40)               9760      
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 64)                2624      
                                                                 
 dense_5 (Dense)             (None, 3)                 195       
                                                                 
Total params: 178,499
Trainable params: 178,499
Non-tr

In [41]:
model.fit(
    padded_train_sequences, y_train,
    validation_data=(padded_train_sequences, y_train),
    epochs=5,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2cc472ca0>

In [42]:
loss,acc = model.evaluate(get_sequences(tokenizer, X_test), y_test)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

Restored model, accuracy: 88.64%


In [43]:
# Dense layers train with pruning.
def apply_pruning_to_dense(layer):
  if isinstance(layer, tf.keras.layers.Dense):
    return tfmot.sparsity.keras.prune_low_magnitude(layer)
  return layer
# Using `tf.keras.models.clone_model` to apply `apply_pruning_to_dense`  to the layers of the model.
model_for_pruning = tf.keras.models.clone_model(model, clone_function = apply_pruning_to_dense)


In [44]:
model_for_pruning.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 50, 16)            160000    
                                                                 
 bidirectional_8 (Bidirectio  (None, 50, 40)           5920      
 nal)                                                            
                                                                 
 bidirectional_9 (Bidirectio  (None, 40)               9760      
 nal)                                                            
                                                                 
 prune_low_magnitude_dense_4  (None, 64)               5186      
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_dense_5  (None, 3)                389       
  (PruneLowMagnitude)                                 

In [45]:
# Compiling model for pruning.
model_for_pruning.compile(
    optimizer=tf.keras.optimizers.Adam(0.0001),
    loss =tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics = ["accuracy"])


In [46]:
# Defining the Callbacks and assigning the log directory.
logdir = 'content/logs'
callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]

In [47]:
# Fine tuning the model.
model_for_pruning.fit(
    padded_train_sequences, y_train,
    validation_data=(padded_train_sequences, y_train),
    epochs=5,
    callbacks=[tfmot.sparsity.keras.UpdatePruningStep(), tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2cc76ee80>

In [48]:
loss,acc = model_for_pruning.evaluate(get_sequences(tokenizer, X_test), y_test)
print("Optimized model, accuracy: {:5.2f}%".format(100*acc))

Optimized model, accuracy: 87.96%


In [49]:
%tensorboard --logdir={logdir}

UsageError: Line magic function `%tensorboard` not found.


In [50]:
model_for_pruning.save('models/pruned_keras_model.h5')

In [51]:
cluster_weights = tfmot.clustering.keras.cluster_weights
CentroidInitialization =tfmot.clustering.keras.CentroidInitialization

In [52]:
clustering_params = { 'number_of_clusters': 16,  'cluster_centroids_init': CentroidInitialization.LINEAR}

In [53]:
clustered_model = cluster_weights(model, **clustering_params)

In [54]:
clustered_model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss =tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics = ["accuracy"])

In [55]:
clustered_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cluster_embedding_4 (Cluste  (None, 50, 16)           320016    
 rWeights)                                                       
                                                                 
 cluster_bidirectional_8 (Cl  (None, 50, 40)           11744     
 usterWeightsRNN)                                                
                                                                 
 cluster_bidirectional_9 (Cl  (None, 40)               19424     
 usterWeightsRNN)                                                
                                                                 
 cluster_dense_4 (ClusterWei  (None, 64)               5200      
 ghts)                                                           
                                                                 
 cluster_dense_5 (ClusterWei  (None, 3)               

In [56]:
clustered_model.fit(
    padded_train_sequences, y_train,
    validation_data=(padded_train_sequences, y_train),
    epochs=5,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2dc387a60>

In [57]:
loss,acc = clustered_model.evaluate(get_sequences(tokenizer, X_test), y_test)
print("Clustered model, accuracy: {:5.2f}%".format(100*acc))

Clustered model, accuracy: 87.90%


In [58]:
final_model = tfmot.clustering.keras.strip_clustering(clustered_model)
clustered_keras_file = 'models/weight_clustered_keras_model.h5'
tf.keras.models.save_model(final_model, clustered_keras_file, include_optimizer=False)



In [59]:
clustered_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cluster_embedding_4 (Cluste  (None, 50, 16)           320016    
 rWeights)                                                       
                                                                 
 cluster_bidirectional_8 (Cl  (None, 50, 40)           11744     
 usterWeightsRNN)                                                
                                                                 
 cluster_bidirectional_9 (Cl  (None, 40)               19424     
 usterWeightsRNN)                                                
                                                                 
 cluster_dense_4 (ClusterWei  (None, 64)               5200      
 ghts)                                                           
                                                                 
 cluster_dense_5 (ClusterWei  (None, 3)               