# Load Numpy Data

This notebook provides an example of loading data from NumPy arrays into a `tf.data.Dataset`.

In [1]:
import numpy as np 
import tensorflow as tf 

### Load from .npz file 

In [2]:
DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz' 

path = tf.keras.utils.get_file('mnist.npz', DATA_URL)

In [3]:
with np.load(path) as data: 
    train_examples = data['x_train']
    train_labels = data['y_train']
    test_examples = data['x_test']
    test_labels = data['y_test']

In [4]:
train_examples.shape

(60000, 28, 28)

### Load Numpy arrays with tf.data.Dataset

Assuming you have an array of examples and a corresponding array of labels, pass the two arrays as a tuple into `tf.data.Dataset.from_tensor_slices` to create a `tf.data.Dataset`.

In [5]:
# For training dataset
train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels))
train_dataset = train_dataset.map(lambda x, y: (tf.ensure_shape(x, [28, 28]), y))

# For test dataset
test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels))
test_dataset = test_dataset.map(lambda x, y: (tf.ensure_shape(x, [28, 28]), y))

2024-12-28 23:23:44.014327: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-12-28 23:23:44.014363: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-12-28 23:23:44.014366: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-12-28 23:23:44.014386: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-28 23:23:44.014396: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## Use Datasets 
### Shuffle and Batch the Datasets 

In [6]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

In [7]:
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)

### Normalize the Data

In [8]:
train_dataset = train_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))
test_dataset = test_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))

### Build and Train a Model 

In [9]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28,28)), 
        tf.keras.layers.Dense(128, activation='relu'), 
        tf.keras.layers.Dense(10)
    ]
)

  super().__init__(**kwargs)


In [10]:
model.compile(optimizer='adam', 
             loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
             metrics=['sparse_categorical_accuracy'])

In [11]:
model.summary()

In [12]:
model.fit(train_dataset, epochs=10)

Epoch 1/10


2024-12-28 23:23:44.483711: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.5650 - sparse_categorical_accuracy: 0.8402
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.3125 - sparse_categorical_accuracy: 0.9123
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.3077 - sparse_categorical_accuracy: 0.9145
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.3104 - sparse_categorical_accuracy: 0.9137
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.3156 - sparse_categorical_accuracy: 0.9146
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.3197 - sparse_categorical_accuracy: 0.9143
Epoch 7/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.3297 - sparse_categorical_accuracy: 0.9122
Epoch 8/10
[1m938/938[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x15d8b4fd0>

In [13]:
model.evaluate(test_dataset)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.7538 - sparse_categorical_accuracy: 0.8235   


[0.6413867473602295, 0.8532999753952026]