<a href="https://colab.research.google.com/github/bklooste/tensorflowcollab/blob/master/destructivemnistwithretrain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, Model
from tensorflow.keras.utils import to_categorical
import numpy as np

In [0]:
def margin_loss(y_true, y_pred):
    """
    Margin loss for Eq.(4). When y_true[i, :] contains not just one `1`, this loss should work too. Not test it.
    :param y_true: [None, n_classes]
    :param y_pred: [None, num_capsule]
    :return: a scalar loss value.
    """
    # return tf.reduce_mean(tf.square(y_pred))
    L = y_true * tf.square(tf.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * tf.square(tf.maximum(0., y_pred - 0.1))

    return tf.reduce_mean(tf.reduce_sum(L, 1))


In [4]:
from google.colab import drive
#drive.mount('/content/gdrive')

model_save_name = 'mnistconv.h5'


!ls 'drive/My Drive/saved_models'
path = F"drive/My Drive/saved_models/{model_save_name}" 


base_model = tf.keras.models.load_model(path, custom_objects={'loss': margin_loss}, compile=False)

# Check its architecture
base_model.compile( loss = margin_loss, optimizer = tf.keras.optimizers.Adam())
base_model.trainable = False

# Let's take a look at the base model architecture
base_model.summary()



mnistconv.h5  mnistconv.pt  mnist.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1_input (InputLayer)     [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 20, 20, 256)       20992     
Total params: 20,992
Trainable params: 0
Non-trainable params: 20,992
_________________________________________________________________


In [5]:
# fixed
# cause i screwed up and had extra layers in the base
#layer_name = 'conv1'
#intermediate_base_model = Model(inputs=base_model.input,
#                                  outputs=base_model.get_layer(layer_name).output)

model = models.Sequential()
model.add(base_model)
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
model.summary()

# model = tf.keras.Sequential([
#   base_model,
#   global_average_layer,
#   prediction_layer
# ])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model (Model)                (None, 20, 20, 256)       20992     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 10, 10, 256)       0         
_________________________________________________________________
flatten (Flatten)            (None, 25600)             0         
_________________________________________________________________
dense (Dense)                (None, 64)                1638464   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                650       
Total params: 1,660,106
Trainable params: 1,639,114
Non-trainable params: 20,992
_________________________________________________________________


In [6]:
mnist = tf.keras.datasets.mnist

(xtrain, ytrain), (xtest, ytest) = mnist.load_data()
print('Xtrain shape: ',xtrain.shape)
print('Xtest shape: ',np.shape(xtest ))
print('ytrain shape: ',xtrain.shape)
print('ytest shape: ',np.shape(ytest ))

x_train = xtrain.reshape(-1, 28, 28, 1).astype('float32') / 255.
x_test = xtest.reshape(-1, 28, 28, 1).astype('float32') / 255.
y_train = to_categorical(ytrain.astype('float32'))
y_test = to_categorical(ytest.astype('float32'))
print('X_train shape: ',x_train.shape)
print('X_test shape: ',np.shape(x_test ))
print('y_train shape: ',y_train.shape)
print('y_test shape: ',np.shape(y_test ))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Xtrain shape:  (60000, 28, 28)
Xtest shape:  (10000, 28, 28)
ytrain shape:  (60000, 28, 28)
ytest shape:  (10000,)
X_train shape:  (60000, 28, 28, 1)
X_test shape:  (10000, 28, 28, 1)
y_train shape:  (60000, 10)
y_test shape:  (10000, 10)


In [7]:
train_filter = np.where(ytrain != 0 ) 
test_filter = np.where(ytest != 0) 

y_train_without0 = ytrain[train_filter]
y_test_without0 =ytest[test_filter]

x_train_without0 = xtrain[train_filter].reshape(-1, 28, 28, 1).astype('float32') / 255.
x_test_without0 = xtest[test_filter].reshape(-1, 28, 28, 1).astype('float32') / 255.
y_train_without0 = to_categorical(y_train_without0.astype('float32'))
y_test_without0 = to_categorical(y_test_without0.astype('float32'))

print('X_train_without shape: ',x_train_without0.shape)
print('X_test_without shape: ',np.shape(x_test_without0 ))
print('y_train_without shape: ',y_train_without0.shape)
print('y_test_without shape: ',np.shape(y_test_without0 ))

print(y_train_without0[:5])

X_train_without shape:  (54077, 28, 28, 1)
X_test_without shape:  (9020, 28, 28, 1)
y_train_without shape:  (54077, 10)
y_test_without shape:  (9020, 10)
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]


In [0]:
am_recon=0.392

model.compile(optimizer='adam',
              loss=[margin_loss, 'mse'],
              loss_weights=[1., am_recon],
              metrics=['accuracy'])

In [9]:
#model.fit(x_train, y_train , epochs=5)
model.fit(x_train_without0, y_train_without0 , epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fc327604898>

In [0]:
# need to try both  pooling and capsule trained networks 

#  So what we do is take the conv network , train with 0 to 8 ,
# than add 9 see the reusult.
# add some more neurons try again 
#suspect both will have issues then try shake and bake

# use different notebooks

#then try the same for capsule networks 
#measure diffierent ways of training .. 

#we dont have original data 
# train a new model and then merge them .. with simple shake and bake
#ensure pass through .. 
# try a few  training cycles.,

# just measure it  , create 2 new metrics 
# 1  smartness (S)  how well it learns with limited data 
# 2  learning hardness (LH)  how well it retains data   

In [16]:
train_filter = np.where(ytrain == 0 ) 
test_filter = np.where(ytest == 0) 

y_train_with0 = ytrain[train_filter]
y_test_with0 =ytest[test_filter]

x_train_with0 = xtrain[train_filter].reshape(-1, 28, 28, 1).astype('float32') / 255.
x_test_with0 = xtest[test_filter].reshape(-1, 28, 28, 1).astype('float32') / 255.
y_train_with0 = to_categorical(y_train_with0.astype('float32'))
y_test_with0 = to_categorical(y_test_with0.astype('float32'))

print('X_train_with shape: ',x_train_with0.shape)
print('X_test_with shape: ',np.shape(x_test_with0 ))
print('y_train_with shape: ',y_train_with0.shape)
print('y_test_with shape: ',np.shape(y_test_with0 ))


X_train_with shape:  (5923, 28, 28, 1)
X_test_with shape:  (980, 28, 28, 1)
y_train_with shape:  (5923, 1)
y_test_with shape:  (980, 1)


In [13]:
model.evaluate(x_test_without0,  y_test_without0, verbose=2)

282/282 - 1s - loss: 0.0193 - accuracy: 0.9901


[0.01934094727039337, 0.9901330471038818]

In [18]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 0.7554 - accuracy: 0.8931


[0.7553733587265015, 0.8931000232696533]

In [20]:
model.fit(x_train_with0, y_train_with0 , epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7fc28b9af438>

In [21]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 21.2520 - accuracy: 0.4799


[21.252016067504883, 0.4799000024795532]

In [0]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])
probability_model.summary()

In [0]:
probability_model(x_test[:32])