<a href="https://colab.research.google.com/github/abhi-1907/TrainingSessionsForStudents/blob/main/DNNnew.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation

In [None]:
# 1. Define the model architecture
model = keras.Sequential([
    Dense(128, activation='relu', input_shape=(784,)),  # Input layer with 784 features (e.g., flattened 28x28 image) and 128 neurons
    Dense(10, activation='softmax') # Output layer with 10 neurons (for 10 classes) and softmax activation for probability distribution.
])


# Explanation of the architecture:

# *   keras.Sequential(): This creates a linear stack of layers, meaning data flows sequentially through them.

# *   Dense(128, activation='relu', input_shape=(784,)): This is the first (hidden) layer.
#     *   Dense:  A fully connected layer. Every neuron in this layer is connected to every neuron in the previous layer.
#     *   128: The number of neurons in this layer.  More neurons can capture more complex patterns, but too many can lead to overfitting.
#     *   activation='relu': The Rectified Linear Unit activation function.  It introduces non-linearity, which is crucial for
#  deep learning. ReLU is a popular choice because it's computationally efficient and often performs well.  Other options include 'sigmoid', 'tanh'.
#     *   input_shape=(784,): Specifies the shape of the input data. Here, it assumes the input is a 1D array of 784 features.
# This would be the case if you have, for example, flattened a 28x28 pixel image.

# *   Dense(10, activation='softmax'): This is the output layer.
#     *   10: The number of neurons, corresponding to the number of classes you're trying to predict.
# For example, if you're classifying digits (0-9), you'd have 10 classes.
#     *   activation='softmax':  Softmax converts the output of the neurons into a probability distribution.
# Each neuron's output will be a value between 0 and 1, and they will all sum up to 1.  This makes it suitable for multi-class classification.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# 2. Compile the model
model.compile(optimizer='adam',  # Optimization algorithm (how the model learns)
              loss='categorical_crossentropy', # Loss function (how to measure errors)
              metrics=['accuracy']) # Metrics to track during training


# Explanation of compilation:

# *   optimizer='adam': Adam is a popular and generally effective optimization algorithm. It adjusts the model's weights to minimize the loss function.  Other options include 'sgd', 'rmsprop'.

# *   loss='categorical_crossentropy':  This loss function is used for multi-class classification problems where the labels are one-hot encoded (e.g., digit 3 is represented as [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]). If your labels are integers (e.g., 3), you would use 'sparse_categorical_crossentropy'.

# *   metrics=['accuracy']:  We'll track the accuracy during training and evaluation.

In [None]:
# 3. Prepare the data (example using MNIST)
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()  # Load MNIST dataset

# Preprocess the data (important!)
x_train = x_train.reshape(60000, 784).astype('float32') / 255.0  # Flatten and normalize pixel values
x_test = x_test.reshape(10000, 784).astype('float32') / 255.0

y_train = keras.utils.to_categorical(y_train, num_classes=10) # One-hot encode the labels
y_test = keras.utils.to_categorical(y_test, num_classes=10)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# 4. Train the model
model.fit(x_train, y_train, epochs=5, batch_size=32) # Train the model


# Explanation of training:

# *   x_train, y_train: The training data and corresponding labels.
# *   epochs=5:  The number of times the model will go through the entire training dataset.
# *   batch_size=32: The number of samples processed in each iteration of training.

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8786 - loss: 0.4301
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9646 - loss: 0.1191
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9770 - loss: 0.0752
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9834 - loss: 0.0541
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.9862 - loss: 0.0446


<keras.src.callbacks.history.History at 0x7c2e52e64910>

In [None]:
# 5. Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)



Test loss: 0.0674959197640419
Test accuracy: 0.9781000018119812


In [None]:
# 6. Make predictions (optional)
predictions = model.predict(x_test)  # Returns probabilities for each class
predicted_labels = tf.argmax(predictions, axis=1) # Get the predicted class labels

print(predicted_labels[:10]) # print first 10 predictions

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
tf.Tensor([7 2 1 0 4 1 4 9 5 9], shape=(10,), dtype=int64)
