## Env data

In [None]:
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

import matplotlib.pyplot as plt

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")


# When debugging, use the follwing to use eager execution.
tf.config.run_functions_eagerly(True) 

## Tensorflow Hub

In [None]:
# https://www.tensorflow.org/hub/overview?hl=en

!pip install --upgrade tensorflow-hub

### Download embedding model

In [None]:
# Using TF Hub model in Keras https://www.tensorflow.org/hub/tf2_saved_model#using_savedmodels_from_tf_hub
model = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(model, input_shape=[], dtype=tf.string, trainable=True)

## Tensorflow Datasets

In [None]:
!pip install tensorflow-datasets

In [3]:
# https://www.tensorflow.org/datasets/overview
import tensorflow as tf
import tensorflow_datasets as tfds


(train_ds, test_ds), info = tfds.load("imdb_reviews", 
                                      split=['train[:1%]','test[:1%]'],
                                      shuffle_files=True, 
                                      data_dir="data/imdb/", 
                                      as_supervised=True,
                                      with_info=True)

assert isinstance(train_ds, tf.data.Dataset)

2023-05-24 17:04:59.950084: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm
2023-05-24 17:05:26.491857: W tensorflow/tsl/platform/cloud/google_auth_provider.cc:184] All attempts to get a Google authentication bearer token failed, returning an empty token. Retrieving token from files failed with "NOT_FOUND: Could not locate the credentials file.". Retrieving token from GCE failed with "FAILED_PRECONDITION: Error executing an HTTP request: libcurl code 6 meaning 'Couldn't resolve host name', error details: Could not resolve host: metadata.google.internal".


[1mDownloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to data/imdb/imdb_reviews/plain_text/1.0.0...[0m


Dl Size...: 100%|██████████| 80/80 [00:14<00:00,  5.34 MiB/s]rl]
Dl Completed...: 100%|██████████| 1/1 [00:14<00:00, 14.97s/ url]
                                                                        

[1mDataset imdb_reviews downloaded and prepared to data/imdb/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.[0m


### Optimization 

In [None]:
train_ds = (train_ds
            # .map(...)
            .cache()
            .batch(32))
# train_ds = train_ds.batch(32)
# train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

test_ds = (test_ds
           .cache()
           .batch(32))
# test_ds = test_ds.batch(32)
# test_ds = test_ds.prefetch(tf.data.AUTOTUNE)

print(train_ds.cardinality(), test_ds.cardinality())

print(info.features['label'].num_classes)
print(info.features["label"].names)
print(info.features['label'].int2str(0), info.features['label'].str2int('pos'))

In [None]:
# https://www.tensorflow.org/datasets/performances

def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label


ds, ds_info = tfds.load(
    'mnist',
    split='train',
    as_supervised=True,  # returns `(img, label)` instead of dict(image=, ...)
    with_info=True,
)

ds = (
  ds
  .map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE) # Applying normalization before `ds.cache()` to re-use it.
  .cache()
  .shuffle(ds_info.splits['train'].num_examples)  # For true randomness, we set the shuffle buffer to the full dataset size.
  .batch(128) # Batch after shuffling to get unique batches at each epoch.
  .prefetch(tf.data.experimental.AUTOTUNE)
)

### Load entire dataset in memory 

In [4]:
# https://www.tensorflow.org/datasets/performances
(img_train, label_train), (img_test, label_test) = tfds.as_numpy(tfds.load(
    'mnist',
    split=['train', 'test'],
    batch_size=-1,
    as_supervised=True,
))

[1mDownloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /Users/agustinsarasua/tensorflow_datasets/mnist/3.0.1...[0m


Dl Completed...: 100%|██████████| 5/5 [00:04<00:00,  1.03 file/s]


[1mDataset mnist downloaded and prepared to /Users/agustinsarasua/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.[0m


### Load dataset from CSV

In [5]:
# https://www.tensorflow.org/api_docs/python/tf/data/experimental/CsvDataset

dataset = tf.data.experimental.CsvDataset(
  "data/example.csv",
  [tf.float32,  # Required field, use dtype or empty tensor
   tf.constant([0.0], dtype=tf.float32),  # Optional field, default to 0.0
   tf.int32,  # Required field, use dtype or empty tensor
  ],
  select_cols=[1,2,3]  # Only parse last three columns
)

for element in dataset.as_numpy_iterator():
  print(element)

(42800000000.0, 5550000.0, 12)
(-530000000000000.0, 0.0, 2)


### From Pandas Dataframe to tf.data.Dataset

In [None]:
import numpy as np
import pandas as pd

training_df: pd.DataFrame = pd.DataFrame(
    data={
        'feature1': np.random.rand(10),
        'feature2': np.random.rand(10),
        'feature3': np.random.rand(10),
        'target': np.random.randint(0, 3, 10)
    }
)

print(training_df)

training_dataset = tf.data.Dataset.from_tensor_slices(
        (
            tf.cast(training_df[['feature1', 'feature2', 'feature3']].values, tf.float32),
            tf.cast(training_df['target'].values, tf.int32)
        )
    )

assert isinstance(training_dataset, tf.data.Dataset) 

for features_tensor, target_tensor in training_dataset:
    print(f'features:{features_tensor} target:{target_tensor}', type(features_tensor), features_tensor.dtype)

### Dataset from text files

In [26]:
dataset = tf.data.TextLineDataset(["data/files/file_1.txt", "data/files/file_2.txt"])

for tnsr in dataset:
    print(tnsr, type(tnsr))

print("\n ------------------ \n")


# https://www.tensorflow.org/api_docs/python/tf/keras/utils/text_dataset_from_directory
# Example: https://www.tensorflow.org/tutorials/load_data/text

dataset = tf.keras.utils.text_dataset_from_directory(
    "data/files/from_dir/",
    labels='inferred',
    label_mode='int'
)

for X, y in dataset.take(1):  
    print("Sentence: ", X.numpy())
    print("Label:", y.numpy())

tf.Tensor(b'afbdab fdsb sdb dsf bdsf b sdfb dsfb sfd', shape=(), dtype=string) <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'afbdab fdsb sdb dsf bdsf b sdfb dsfb sfd', shape=(), dtype=string) <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'afbdab fdsb sdb dsf bdsf b sdfb ', shape=(), dtype=string) <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'afbdab fdsb sdb dsf bdsf b sdfb sad', shape=(), dtype=string) <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'afbdab fdsb sdb dsf bdsf b sdfb sad', shape=(), dtype=string) <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'vsad  asdas', shape=(), dtype=string) <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'afbdab fdsb sdb dsf bdsf b sdfb dsfb sfd', shape=(), dtype=string) <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'afbdab fdsb sdb dsf bdsf b sdfb dsfb sfd', shape=(), dtype=string) <class 'tensorflow.python.framework.o

2023-05-24 18:14:45.931801: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2]
	 [[{{node Placeholder/_0}}]]
2023-05-24 18:14:45.983539: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2]
	 [[{{node Placeholder/_0}}]]
2023-05-24 18:14:45.983981: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2]
	 [[{{n

# Preprocessing

In [None]:
!pip install "tensorflow-text"

In [25]:
from tensorflow.keras.layers import TextVectorization

import tensorflow_datasets as tfds
import tensorflow_text as tf_text



## Saved Model

In [None]:
import os

# Saving a model

model_path = os.path.join("models", "some_model/1/")
tf.saved_model.save(model, model_path)

## Serving model with TF Serve

In [None]:
# Serving with docker
# https://www.tensorflow.org/tfx/serving/docker
docker pull tensorflow/serving

docker run -t --rm -p 8501:8501 \    
    -v "/Users/agustinsarasua/workspace/grammarly_interview/models/some_model/1/" \
    -e MODEL_NAME=some_model \
    tensorflow/serving &

curl -d '{"instances": ["hola", "This was an absolutely terrible movie. Don'\''t be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie'\''s ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor'\''s like Christopher Walken'\''s good name. I could barely sit through it"]}' \
  -H "Content-Type: application/json" \
  -X POST http://localhost:8501/v1/models/some_model:predict    

## Tensorboard


https://www.tensorflow.org/tensorboard/get_started

In [None]:
from tensorflow.python.de import 

In [None]:
%load_ext tensorboard

# Profiling

!pip install -U tensorboard_plugin_profile

In [28]:
rm -rf ./logs/