In [13]:
import tensorflow as tf
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

In [2]:
# Reading the MNIST dataset
def read_MNIST(path):
    data = pd.read_csv(path, skiprows=1)
    X = data.iloc[:, 1:].values.astype('float32') / 255.0
    Y = data.iloc[:, 0].values
    Y = tf.keras.utils.to_categorical(Y, 10)
    return X, Y

In [3]:
# Splitting data to test and train
def train_test_split(X, Y, split):
    limit = int(np.floor(split * len(X)))
    x_train, y_train = X[:limit], Y[:limit]
    x_test, y_test = X[limit:], Y[limit:]
    return x_train, y_train, x_test, y_test

# Creating TensorFlow datasets
def create_dataset(X, Y, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((X, Y))
    return dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [4]:
# Defining the model
def create_model(input_shape, hidden_layers, output_shape):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=input_shape))
    for units in hidden_layers:
        model.add(tf.keras.layers.Dense(units, activation='relu'))
    model.add(tf.keras.layers.Dense(output_shape, activation='softmax'))
    return model

In [5]:
# Training function
@tf.function
def train_step(model, x, y, loss_fn, optimizer):
    with tf.GradientTape() as tape:
        predictions = model(x, training=True)
        loss = loss_fn(y, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [18]:
# Compute accuracy of the network
@tf.function
def compute_accuracy(model, X, Y):
    predictions = model(X, training=False)
    correct_pred = tf.equal(tf.argmax(predictions, 1), tf.argmax(Y, 1))
    return tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [19]:
# Training loop
def train(model, train_dataset, test_dataset, epochs, learning_rate):
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    loss_fn = tf.keras.losses.CategoricalCrossentropy()

    for epoch in range(epochs):
        t0 = time.time()
        total_loss = 0
        num_batches = 0

        for x_batch, y_batch in train_dataset:
            loss = train_step(model, x_batch, y_batch, loss_fn, optimizer)
            total_loss += loss
            num_batches += 1

        avg_loss = total_loss / num_batches
        train_accuracy = tf.reduce_mean([compute_accuracy(model, x, y) for x, y in train_dataset])
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, "
              f"Accuracy: {train_accuracy:.4f}, Time: {time.time()-t0:.2f}s")
        
    # Test accuracy
    test_accuracy = tf.reduce_mean([compute_accuracy(model, x, y) for x, y in test_dataset])
    print(f"Test Data Accuracy: {test_accuracy:.4f}")

In [15]:
def run(hidden_layers, learning_rate, epochs, split, batch_size):
    print(f"Epochs: {epochs}, LR: {learning_rate}, Hidden Layers: {hidden_layers}, "
          f"Split: {split}, Batch Size: {batch_size}")
    
    # Check for GPU
    physical_devices = tf.config.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        print(f"GPU is available: {physical_devices[0]}")
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    else:
        print("No GPU available, using CPU")

    X, Y = read_MNIST("D:/Github/MNIST_CSV/mnist_train.csv")
    x_train, y_train, x_test, y_test = train_test_split(X, Y, split)

    train_dataset = create_dataset(x_train, y_train, batch_size)
    test_dataset = create_dataset(x_test, y_test, batch_size)

    input_shape = X.shape[1]
    output_shape = Y.shape[1]
    model = create_model((input_shape,), hidden_layers, output_shape)

    train(model, train_dataset, test_dataset, epochs, learning_rate)

In [16]:
# Run the training
run(hidden_layers=[50], learning_rate=0.01, epochs=10, split=0.90, batch_size=32)

Epochs: 10, LR: 0.01, Hidden Layers: [50], Split: 0.9, Batch Size: 32
GPU is available: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')




ValueError: in user code:

    File "C:\Users\abhag\AppData\Local\Temp\ipykernel_6532\2677684926.py", line 8, in train_step  *
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    File "d:\Anaconda\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 695, in apply_gradients  **
        self._create_all_weights(var_list)
    File "d:\Anaconda\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 957, in _create_all_weights
        _ = self.iterations
    File "d:\Anaconda\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 965, in __getattribute__
        return super().__getattribute__(name)
    File "d:\Anaconda\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 1140, in iterations
        self._iterations = self.add_weight(
    File "d:\Anaconda\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 1352, in add_weight
        variable = self._add_variable_with_custom_getter(
    File "d:\Anaconda\lib\site-packages\keras\engine\base_layer_utils.py", line 134, in make_variable
        return tf1.Variable(

    ValueError: tf.function only supports singleton tf.Variables created on the first call. Make sure the tf.Variable is only created once or created outside tf.function. See https://www.tensorflow.org/guide/function#creating_tfvariables for more information.
