<a href="https://colab.research.google.com/github/albim72/ALGORYTMY_AI_06/blob/main/szkolenie_rozproszone_full.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import os

import tensorflow_datasets as tfds

In [2]:
datasets, info = tfds.load(name='mnist', with_info=True, as_supervised = True)
mnist_train, mnist_test = datasets['train'], datasets['test']

Downloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /root/tensorflow_datasets/mnist/3.0.1...


Dl Completed...:   0%|          | 0/5 [00:00<?, ? file/s]

Dataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.


In [3]:
#tworzenie strategii dystrybucji
strategy = tf.distribute.MirroredStrategy()



In [4]:
print(f'Liczba platform: {strategy.num_replicas_in_sync}')

Liczba platform: 1


In [5]:
#konfiguracja potoku wejściowego
num_train_examples = info.splits['train'].num_examples
num_test_examples = info.splits['test'].num_examples

BUFFER_SIZE = 10000
BATCH_SIZE_PER_REPLICA = 64
BATCH_SIZE = BATCH_SIZE_PER_REPLICA*strategy.num_replicas_in_sync


In [6]:
#funkcja normalizująca wartości pikseli - zakres [0 .. 255] do zakresu [0..1]
def scale(image,label):
  image = tf.cast(image,tf.float32)
  image /= 255
  return image, label

In [7]:
train_dataset = mnist_train.map(scale).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE)

In [8]:
#Stworzenie modelu
with strategy.scope():
  model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(32,3,activation='relu',input_shape=(28,28,1)),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(64,activation='relu'),
      tf.keras.layers.Dense(10)
  ])

  model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                optimizer=tf.keras.optimizers.Adam(),
                metrics = ['accuracy'])

In [9]:
#definicja wywołań zwrtonych -> callbacks
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir,"ckpt_{epoch}")

In [10]:
#definicja funkcji modyfikującj learning rate
def decay(epoch):
  if epoch < 3:
    return 1e-3
  elif epoch >= 3 and epoch < 7:
    return 1e-4
  else:
    return 1e-5


In [11]:
class PrintLR(tf.keras.callbacks.Callback):
  def on_epoch_end(self,epoch,logs=None):
    print(f'\nLearning Rate dla epoki {epoch +1} wynosi {model.optimizer.lr.numpy()}')

In [12]:
callbacks = [
    tf.keras.callbacks.TensorBoard(log_dir='./logs'),
    tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,save_weights_only=True),
    tf.keras.callbacks.LearningRateScheduler(decay),
    PrintLR()
]

In [13]:
EPOCHS = 12
model.fit(train_dataset, epochs=EPOCHS, callbacks = callbacks)

Epoch 1/12
Learning Rate dla epoki 1 wynosi 0.0010000000474974513
Epoch 2/12
Learning Rate dla epoki 2 wynosi 0.0010000000474974513
Epoch 3/12
Learning Rate dla epoki 3 wynosi 0.0010000000474974513
Epoch 4/12
Learning Rate dla epoki 4 wynosi 9.999999747378752e-05
Epoch 5/12
Learning Rate dla epoki 5 wynosi 9.999999747378752e-05
Epoch 6/12
Learning Rate dla epoki 6 wynosi 9.999999747378752e-05
Epoch 7/12
Learning Rate dla epoki 7 wynosi 9.999999747378752e-05
Epoch 8/12
Learning Rate dla epoki 8 wynosi 9.999999747378752e-06
Epoch 9/12
Learning Rate dla epoki 9 wynosi 9.999999747378752e-06
Epoch 10/12
Learning Rate dla epoki 10 wynosi 9.999999747378752e-06
Epoch 11/12
Learning Rate dla epoki 11 wynosi 9.999999747378752e-06
Epoch 12/12
Learning Rate dla epoki 12 wynosi 9.999999747378752e-06


<keras.callbacks.History at 0x7f5099ef87c0>

In [14]:
ls {checkpoint_dir}

checkpoint                   ckpt_4.data-00000-of-00001
ckpt_10.data-00000-of-00001  ckpt_4.index
ckpt_10.index                ckpt_5.data-00000-of-00001
ckpt_11.data-00000-of-00001  ckpt_5.index
ckpt_11.index                ckpt_6.data-00000-of-00001
ckpt_12.data-00000-of-00001  ckpt_6.index
ckpt_12.index                ckpt_7.data-00000-of-00001
ckpt_1.data-00000-of-00001   ckpt_7.index
ckpt_1.index                 ckpt_8.data-00000-of-00001
ckpt_2.data-00000-of-00001   ckpt_8.index
ckpt_2.index                 ckpt_9.data-00000-of-00001
ckpt_3.data-00000-of-00001   ckpt_9.index
ckpt_3.index


In [15]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
eval_loss, eval_acc = model.evaluate(eval_dataset)
print(f'Eval loss: {eval_loss}, eval acc: {eval_acc}')

Eval loss: 0.03753258287906647, eval acc: 0.9865000247955322


In [16]:
#eksportowanie do zapisanego modelu
path = 'saved_model/'

In [17]:
model.save(path,save_format='tf')



In [18]:
unreplicated_model = tf.keras.models.load_model(path)

unreplicated_model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

eval_loss, eval_acc = unreplicated_model.evaluate(eval_dataset)
print(f'Eval loss: {eval_loss}, eval acc: {eval_acc}')

Eval loss: 0.03753258287906647, eval acc: 0.9865000247955322


In [20]:
with strategy.scope():
  replicated_model = tf.keras.models.load_model(path)
  replicated_model.compile(
      loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = tf.keras.optimizers.Adam(),
    metrics=['accuracy']
  )
  eval_loss, eval_acc = unreplicated_model.evaluate(eval_dataset)
  print(f'Eval loss: {eval_loss}, eval acc: {eval_acc}')

Eval loss: 0.03753258287906647, eval acc: 0.9865000247955322
