In [12]:
import numpy as np
from tensorflow.keras.datasets import mnist

In [101]:
class Network:
    def __init__(self,sizes):
        self.num_layers=len(sizes)
        self.sizes=sizes
        self.biases=[np.random.randn(y,1) for y in sizes[1:]]
        self.weights=[np.random.randn(y,x) for x,y in zip(sizes[:-1],sizes[1:])]
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    
    def feedforward(self,a):
        for b,w in zip(self.biases,self.weights):
            a=self.sigmoid(np.dot(w,a)+b)
        return a
    
    def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):
        if test_data:
            n_test=len(test_data[0])
        n=len(training_data)
        X=training_data[0]
        Y=training_data[1]
        for j in range(epochs):
            mini_batches=[(X[k:k+mini_batch_size],Y[k:k+mini_batch_size]) for k in range(0,n,mini_batch_size)]
            for mini_batch in mini_batches:
                
                self.update_mini_batch(mini_batch,eta)
            if test_data:
                print(f"Epoch {self.evaluate(test_data)} {j} / {n_test}")
            else:
                print(f"Epoch {j} complete")
    
    def update_mini_batch(self,mini_batch,eta):
        nabla_b=[np.zeros_like(b) for b in self.biases]
        nabla_w=[np.zeros_like(w) for w in self.weights]
        x,y=mini_batch
        x=x.reshape(10,784,1)
        for x,y in zip(x,y):
            delta_nabla_b,delta_nabla_w=self.backprop(x,y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw 
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb 
                       for b, nb in zip(self.biases, nabla_b)]

    
    def backprop(self, x, y):
        """Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient for the cost function C_x.  ``nabla_b`` and
        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
        to ``self.biases`` and ``self.weights``."""
        nabla_b=[np.zeros_like(b) for b in self.biases]
        nabla_w=[np.zeros_like(w) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = self.sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y) * self.sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Note that the variable l in the loop below is used a little
        # differently to the notation in Chapter 2 of the book.  Here,
        # l = 1 means the last layer of neurons, l = 2 is the
        # second-last layer, and so on.  It's a renumbering of the
        # scheme in the book, used here to take advantage of the fact
        # that Python can use negative indices in lists.
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = self.sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        """Return the number of test inputs for which the neural
        network outputs the correct result. Note that the neural
        network's output is assumed to be the index of whichever
        neuron in the final layer has the highest activation."""
        x,y=test_data
        x=x.reshape(x.shape[0],784,1)
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in zip(x,y)]
        # print(test_results)
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        return (output_activations-y)

    #### Miscellaneous functions

    def sigmoid_prime(self,z):
        """Derivative of the sigmoid function."""
        return self.sigmoid(z)*(1-self.sigmoid(z))
    
nn=Network([2,3,1])

In [104]:
net=Network([784,30,10])
net.SGD(training_data, 30, 10, 3, test_data=test_data)

  return 1/(1+np.exp(-z))


Epoch 851 0 / 10000
Epoch 851 1 / 10000
Epoch 852 2 / 10000
Epoch 852 3 / 10000
Epoch 852 4 / 10000
Epoch 852 5 / 10000
Epoch 852 6 / 10000
Epoch 851 7 / 10000
Epoch 851 8 / 10000
Epoch 851 9 / 10000
Epoch 851 10 / 10000
Epoch 851 11 / 10000
Epoch 851 12 / 10000
Epoch 851 13 / 10000
Epoch 851 14 / 10000
Epoch 851 15 / 10000
Epoch 851 16 / 10000
Epoch 851 17 / 10000
Epoch 851 18 / 10000
Epoch 851 19 / 10000
Epoch 851 20 / 10000
Epoch 851 21 / 10000
Epoch 851 22 / 10000
Epoch 851 23 / 10000
Epoch 851 24 / 10000
Epoch 851 25 / 10000
Epoch 851 26 / 10000
Epoch 851 27 / 10000
Epoch 851 28 / 10000
Epoch 851 29 / 10000


In [78]:
test_data

(array([[[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ...,
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]],
 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ...,
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]],
 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ...,
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]],
 
        ...,
 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ...,
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]],
 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],

In [15]:
training_data,test_data=mnist.load_data()

In [24]:
training_data[1]

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [119]:
a=np.array([1,2])
b=np.array([2,3])
a*b

array([2, 6])

In [120]:
np.dot(a,b)

8

In [121]:
b.shape

(2,)

In [122]:
a

array([1, 2])

In [1]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten
from keras.layers.convolutional import Convolution2D,MaxPooling2D
from keras.utils import np_utils
from keras.losses import *

2023-07-31 22:32:34.513875: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-31 22:32:34.761330: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-31 22:32:34.763566: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
seed=7
np.random.seed(7)
(X_train,y_train),(X_test,y_test)=mnist.load_data()

In [3]:
count,column,rows=X_train.shape
num_pixels=column*rows

In [4]:
X_train=X_train.reshape(count,num_pixels).astype("float32")
X_test=X_test.reshape(X_test.shape[0],num_pixels).astype("float32")

In [5]:
X_train.shape

(60000, 784)

In [6]:
X_train=X_train/255
X_test=X_test/255
y_train=np_utils.to_categorical(y_train)
y_test=np_utils.to_categorical(y_test)
num_classes=y_test.shape[1]

In [48]:
num_classes

10

In [7]:
def baseline_model():
    model=Sequential()
    model.add(Dense(num_pixels,activation="relu"))
    model.add(Dense(num_classes,activation="softmax"))
    model.compile(loss=categorical_crossentropy,metrics=["accuracy"],optimizer="adam")
    return model

In [8]:
model=baseline_model()
model.fit(X_train,y_train,epochs=10,batch_size=150,verbose=2)

2023-07-31 22:33:07.953117: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


Epoch 1/10
400/400 - 7s - loss: 0.2565 - accuracy: 0.9254 - 7s/epoch - 17ms/step
Epoch 2/10
400/400 - 6s - loss: 0.0984 - accuracy: 0.9708 - 6s/epoch - 15ms/step
Epoch 3/10
400/400 - 6s - loss: 0.0640 - accuracy: 0.9810 - 6s/epoch - 15ms/step
Epoch 4/10
400/400 - 6s - loss: 0.0445 - accuracy: 0.9872 - 6s/epoch - 15ms/step
Epoch 5/10
400/400 - 6s - loss: 0.0313 - accuracy: 0.9911 - 6s/epoch - 14ms/step
Epoch 6/10
400/400 - 7s - loss: 0.0221 - accuracy: 0.9939 - 7s/epoch - 17ms/step
Epoch 7/10
400/400 - 7s - loss: 0.0176 - accuracy: 0.9951 - 7s/epoch - 18ms/step
Epoch 8/10
400/400 - 6s - loss: 0.0134 - accuracy: 0.9965 - 6s/epoch - 16ms/step
Epoch 9/10
400/400 - 7s - loss: 0.0107 - accuracy: 0.9971 - 7s/epoch - 18ms/step
Epoch 10/10
400/400 - 6s - loss: 0.0084 - accuracy: 0.9978 - 6s/epoch - 16ms/step


<keras.callbacks.History at 0x7f0bcbfcbeb0>

In [41]:
model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_22 (Dense)            (200, 784)                615440    
                                                                 
 dense_23 (Dense)            (200, 10)                 7850      
                                                                 
Total params: 623,290
Trainable params: 623,290
Non-trainable params: 0
_________________________________________________________________


In [23]:
num_pixels

784

In [159]:
(X_train,y_train),(X_test,y_test)=mnist.load_data()
X_train=X_train.reshape(X_train.shape[0],28,28,1).astype("float32")
X_test=X_test.reshape(X_test.shape[0],28,28,1).astype("float32")
X_train=X_train/255
X_test=X_test/255
y_train=np_utils.to_categorical(y_train)
y_test=np_utils.to_categorical(y_test)

In [160]:
num_classes

10

In [161]:
def baseline_model():
    model=Sequential()
    model.add(Convolution2D(32,(5,5),activation="relu"))
    model.add(MaxPooling2D(strides=(1,1)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128,activation="relu"))
    model.add(Dense(num_classes,activation="softmax"))
    model.compile(loss=softmax_cross_entropy_with_logits ,metrics=["accuracy"],optimizer="adam")
    return model

In [162]:
model=baseline_model()
model.fit(X_train,y_train,epochs=10,batch_size=200,verbose=2)

Epoch 1/10
300/300 - 37s - loss: 0.1631 - accuracy: 0.9505 - 37s/epoch - 125ms/step
Epoch 2/10
300/300 - 34s - loss: 0.0529 - accuracy: 0.9839 - 34s/epoch - 113ms/step
Epoch 3/10
300/300 - 35s - loss: 0.0357 - accuracy: 0.9889 - 35s/epoch - 118ms/step
Epoch 4/10
300/300 - 34s - loss: 0.0261 - accuracy: 0.9917 - 34s/epoch - 113ms/step
Epoch 5/10
300/300 - 34s - loss: 0.0198 - accuracy: 0.9938 - 34s/epoch - 114ms/step
Epoch 6/10
300/300 - 34s - loss: 0.0153 - accuracy: 0.9953 - 34s/epoch - 112ms/step
Epoch 7/10
300/300 - 40s - loss: 0.0127 - accuracy: 0.9959 - 40s/epoch - 134ms/step
Epoch 8/10
300/300 - 39s - loss: 0.0093 - accuracy: 0.9971 - 39s/epoch - 129ms/step
Epoch 9/10
300/300 - 39s - loss: 0.0077 - accuracy: 0.9974 - 39s/epoch - 129ms/step
Epoch 10/10
300/300 - 38s - loss: 0.0066 - accuracy: 0.9978 - 38s/epoch - 128ms/step


<keras.callbacks.History at 0x7faa2f606fb0>

In [139]:
model.summary()

Model: "sequential_33"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_18 (Conv2D)          (200, 24, 24, 32)         832       
                                                                 
 max_pooling2d_17 (MaxPoolin  (200, 24, 24, 32)        0         
 g2D)                                                            
                                                                 
 dropout_17 (Dropout)        (200, 24, 24, 32)         0         
                                                                 
 flatten_16 (Flatten)        (200, 18432)              0         
                                                                 
 dense_69 (Dense)            (200, 128)                2359424   
                                                                 
 dense_70 (Dense)            (200, 10)                 1290      
                                                     

In [115]:
print(Convolution2D.__doc__)

2D convolution layer (e.g. spatial convolution over images).

    This layer creates a convolution kernel that is convolved
    with the layer input to produce a tensor of
    outputs. If `use_bias` is True,
    a bias vector is created and added to the outputs. Finally, if
    `activation` is not `None`, it is applied to the outputs as well.

    When using this layer as the first layer in a model,
    provide the keyword argument `input_shape`
    (tuple of integers or `None`, does not include the sample axis),
    e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures
    in `data_format="channels_last"`. You can use `None` when
    a dimension has variable size.

    Examples:

    >>> # The inputs are 28x28 RGB images with `channels_last` and the batch
    >>> # size is 4.
    >>> input_shape = (4, 28, 28, 3)
    >>> x = tf.random.normal(input_shape)
    >>> y = tf.keras.layers.Conv2D(
    ... 2, 3, activation='relu', input_shape=input_shape[1:])(x)
    >>> print(y.shape)
    (

In [152]:
X_train[0]

array([[[0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00]],

       [[0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0.00000000e+00],
        [0