# Sagemaker con Tensorflow: Fashion MNIST.

<center>
<img src="./imgs/tensor.png"  alt="drawing" width="1300"/>
</center>

In [None]:
pip install -q sagemaker-experiments

In [None]:
import sagemaker

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket = sess.default_bucket()
prefix = 'module_4/part_7'

print(role)
print(sess)
print(region)
print(bucket)
print(prefix)

In [None]:
from sagemaker.tensorflow import TensorFlow

In [None]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from botocore.exceptions import ClientError
from time import gmtime, strftime
import time

experiment_name = 'fashion-mnist-tensorflow-new'

try:
    experiment = Experiment.create(
        experiment_name=experiment_name, 
        description='Training a fashion mnist model')
except ClientError as e:
    print(f'{experiment_name} experiment already exists! Reusing the existing experiment.')


In [None]:
hyperparameters = {
    "epochs": 20, 
    "batch-size": 64
}

estimator = TensorFlow(
    entry_point="fashion_mnist_tensoflow.py",
    source_dir="tf_code",
    enable_sagemaker_metrics=True,
    hyperparameters=hyperparameters,
    role=role,
    input_mode='File',
    framework_version="2.1",
    py_version="py3",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    model_dir=False
)

In [None]:
# Creating a new trial for the experiment
exp_datetime = strftime('%Y-%m-%d-%H-%M-%S', gmtime())
jobname = f'fashion-tf-{exp_datetime}'

exp_trial = Trial.create(experiment_name=experiment_name, 
                         trial_name=jobname)

experiment_config={'ExperimentName': experiment_name,
                   'TrialName': exp_trial.trial_name,
                   'TrialComponentDisplayName': 'Training'}

In [None]:
estimator.fit(job_name=jobname,
              experiment_config=experiment_config,
              logs=True)

###  Descargando y usando el modelo localmente

In [None]:
estimator.model_data

In [None]:
!mkdir ./fashion_mnist -p
!aws s3 cp {estimator.model_data} ./fashion_mnist/model.tar.gz
!tar -xzf fashion_mnist/model.tar.gz -C ./fashion_mnist/

In [None]:
import tensorflow as tf
my_model=tf.keras.models.load_model('./fashion_mnist/00000000')

In [None]:
my_model.summary()

## Optimización de parámetros / arquitectura

- Usaremos el  script de entrenamiento fashion_mnist_tensoflow_opt.py
- Podemos probarlo con:
```bash
python fashion_mnist_tensoflow_opt.py --model_dir ./test_tf --output_dir ./test_tf --epochs 10 --layers 5
```
- Donde tenemos un parámetro layers que nos permite cambiar la aquitectura del modelo.

In [None]:
jobname = 'fashion-mnist-tensorflow-opt-7'

In [None]:
hyperparameters = {
    "epochs": 20, 
    "batch-size": 64
}

estimator = TensorFlow(
    entry_point="fashion_mnist_tensoflow_opt.py",
    source_dir="tf_code",
    enable_sagemaker_metrics=True,
    hyperparameters=hyperparameters,
    role=role,
    input_mode='File',
    framework_version="2.1",
    py_version="py3",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    model_dir=False
)

In [None]:
# https://sagemaker.readthedocs.io/en/stable/api/training/parameter.html#sagemaker.parameter.ParameterRange
hyperparameter_ranges = {
    "layers": sagemaker.parameter.CategoricalParameter([1, 2, 3, 4, 5]),
    "hidden": sagemaker.parameter.CategoricalParameter([128, 512, 1024])
}

In [None]:
# https://sagemaker.readthedocs.io/en/stable/api/training/tuner.html

# sample - loss: 0.4801 - accuracy: 0.8300 - val_loss: 0.4152 - val_accuracy: 0.8527
metric_definitions = [
    {
        "Name": "train:loss",
        "Regex": ".*loss: ([0-9\\.]+) - accuracy: [0-9\\.]+.*"
    },
    {
        "Name": "train:accuracy",
        "Regex": ".*loss: [0-9\\.]+ - accuracy: ([0-9\\.]+).*"
    },
    {
        "Name": "validation:accuracy",
        "Regex": ".*sample - loss: [0-9\\.]+ - accuracy: [0-9\\.]+ - val_loss: [0-9\\.]+ - val_accuracy: ([0-9\\.]+).*",
    },
    {
        "Name": "validation:loss",
        "Regex": ".*sample - loss: [0-9\\.]+ - accuracy: [0-9\\.]+ - val_loss: ([0-9\\.]+) - val_accuracy: [0-9\\.]+.*",
    },
    {
        "Name": "sec/sample",
        "Regex": ".* - \d+s (\d+)[mu]s/sample - loss: [0-9\\.]+ - accuracy: [0-9\\.]+ - val_loss: [0-9\\.]+ - val_accuracy: [0-9\\.]+",
    },
    
    # test loss, test accuracy: [0.3486855386197567, 0.8909]
    {
        "Name": "test:loss",
        "Regex": ".*final test loss: ([0-9\\.]+) - test accuracy: [0-9\\.]+.*"     
    },
    {
        "Name": "test:accuracy",
        "Regex": ".*final test loss: [0-9\\.]+ - test accuracy: ([0-9\\.]+).*"
    },
]

tuner = sagemaker.tuner.HyperparameterTuner(
    estimator,
    "test:accuracy",
    hyperparameter_ranges,
    metric_definitions=metric_definitions,
    objective_type='Maximize',
    max_jobs=15,
    max_parallel_jobs=10,
    strategy="Grid",
)

In [None]:
tuner.fit(    
    job_name=jobname,
)

- Podemos ver los resultados con HyperparameterTuningJobAnalytics.
- También podemos verlo en la pantalla de experimentos.

In [None]:
df= sagemaker.HyperparameterTuningJobAnalytics(
    tuner.latest_tuning_job.job_name
).dataframe()
df

In [None]:
df.sort_values(by='FinalObjectiveValue', ascending=False)

In [None]:
df.loc[
    :, ['FinalObjectiveValue', 'layers']
].set_index('layers').sort_index().plot()