In [None]:
import pandas as pd
import numpy as np
import pickle
import csv
from keras.datasets import mnist
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
# Load your image
image = Image.open("Inputs/5.jpg")  # Replace with the path to your image

# Resize the image to 28x28 pixels
image = image.resize((28, 28))

# Convert to grayscale (if necessary)
image = image.convert("L")

# Normalize pixel values
image = np.array(image) / 255.0

# Flatten the image to 1D array
image = image.reshape(1, 784)  # Reshape into a single sample with 784 features

# Now 'image' contains your input data in the MNIST format
X_image = image.reshape()


In [291]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
SCALE_FACTOR = 255 # TRES IMPORTANT SINON OVERFLOW SUR EXP
WIDTH = X_train.shape[1]
HEIGHT = X_train.shape[2]
WIDTH, HEIGHT

(28, 28)

In [292]:
X_train = X_train.reshape(X_train.shape[0],WIDTH*HEIGHT).T / SCALE_FACTOR
X_test = X_test.reshape(X_test.shape[0],WIDTH*HEIGHT).T  / SCALE_FACTOR

In [294]:
X_train.shape, X_test.shape

((784, 60000), (784, 10000))

In [None]:
    size , m = X_train.shape
    nodes = 10
    num_classes = 32

In [None]:
W1 = np.random.rand(nodes,size) * np.sqrt(1./(size))
b1 = np.random.rand(nodes,1) * np.sqrt(1./nodes)
W2 = np.random.rand(nodes,nodes) * np.sqrt(1./nodes)
b2 = np.random.rand(nodes,1) * np.sqrt(1./(size))
W1.shape,b1.shape,W2.shape,b2.shape

In [None]:
# Define batch size
batch_size = 128

# Calculate the total number of batches
num_batches = len(X_train) // batch_size

# Create mini-batches
mini_batches = []
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = (i + 1) * batch_size
    mini_batch_X = X_train[start_idx:end_idx]
    mini_batch_Y = Y_train[start_idx:end_idx]
    mini_batches.append((mini_batch_X, mini_batch_Y))

# If there are any remaining data points, create one more mini-batch
if len(X_train) % batch_size != 0:
    mini_batch_X = X_train[num_batches * batch_size:]
    mini_batch_Y = Y_train[num_batches * batch_size:]
    mini_batches.append((mini_batch_X, mini_batch_Y))

In [None]:
def ReLU(Z):
    return np.maximum(Z,0)

def derivative_ReLU(Z):
    return Z > 0

def softmax(Z):
    """Compute softmax values for each sets of scores in x."""
    exp = np.exp(Z - np.max(Z)) #le np.max(Z) evite un overflow en diminuant le contenu de exp
    return exp / exp.sum(axis=0)

def init_params(nodes,size):
    W1 = np.random.rand(nodes,size) * np.sqrt(1./(size))
    b1 = np.random.rand(nodes,1) * np.sqrt(1./nodes)
    W2 = np.random.rand(nodes,nodes) * np.sqrt(1./nodes)
    b2 = np.random.rand(nodes,1) * np.sqrt(1./(size))
    return W1,b1,W2,b2

def forward_propagation(X,W1,b1,W2,b2):
    Z1 = W1.dot(X) + b1 #10, m
    A1 = ReLU(Z1) # 10,m
    Z2 = W2.dot(A1) + b2 #10,m
    A2 = softmax(Z2) #10,m
    return Z1, A1, Z2, A2

def one_hot(Y,nodes):
    ''' return an 0 vector with 1 only in the position correspondind to the value in Y'''
    one_hot_Y = np.zeros((nodes, Y.size))
    one_hot_Y[Y, np.arange(Y.size)] = 1
    return one_hot_Y

def backward_propagation(nodes,X, Y, A1, A2, W2, Z1, m):
    one_hot_Y = one_hot(Y,nodes)
    dZ2 = 2*(A2 - one_hot_Y) #10,m
    dW2 = 1/m * (dZ2.dot(A1.T)) # 10 , 10
    db2 = 1/m * np.sum(dZ2,axis=1, keepdims=True) # 10, 1
    dZ1 = W2.T.dot(dZ2)*derivative_ReLU(Z1) # 10, m
    dW1 = 1/m * (dZ1.dot(X.T)) #10, 784
    db1 = 1/m * np.sum(dZ1,axis=1, keepdims=True) # 10, 1

    return dW1, db1, dW2, db2

def update_params(nodes,alpha, W1, b1, W2, b2, dW1, db1, dW2, db2):
    W1 -= alpha * dW1
    b1 -= alpha * np.reshape(db1, (nodes,1))
    W2 -= alpha * dW2
    b2 -= alpha * np.reshape(db2, (nodes,1))

    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y)/Y.size

def gradient_descent(X, Y, alpha, iterations,batch_size):
    size , m = X.shape
    nodes = 512

    W1, b1, W2, b2 = init_params(nodes,size)
    for i in range(iterations):
        # Mini-batch training
        for batch_start in range(0, m, batch_size):
            batch_end = min(batch_start + batch_size, m)
            X_batch = X[:, batch_start:batch_end]
            Y_batch = Y[batch_start:batch_end]
            
        Z1, A1, Z2, A2 = forward_propagation(X, W1, b1, W2, b2)
        dW1, db1, dW2, db2 = backward_propagation(nodes,X, Y, A1, A2, W2, Z1, m)

        W1, b1, W2, b2 = update_params(nodes,alpha, W1, b1, W2, b2, dW1, db1, dW2, db2)   

        if (i+1) % int(iterations/10) == 0:
            print(f"Iteration: {i+1} / {iterations}")
            prediction = get_predictions(A2)
            print(f'{get_accuracy(prediction, Y):.3%}')
    return W1, b1, W2, b2

def make_predictions(X, W1 ,b1, W2, b2):
    _, _, _, A2 = forward_propagation(X, W1, b1, W2, b2)
    predictions = get_predictions(A2)
    return predictions

def show_prediction(index,X, Y, W1, b1, W2, b2):
    # None => cree un nouvel axe de dimension 1, cela a pour effet de transposer X[:,index] qui un np.array de dimension 1 (ligne) et qui devient un vecteur (colonne)
    #  ce qui correspond bien a ce qui est demande par make_predictions qui attend une matrice dont les colonnes sont les pixels de l'image, la on donne une seule colonne
    vect_X = X[:, index,None]
    prediction = make_predictions(vect_X, W1, b1, W2, b2)
    label = Y[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

    current_image = vect_X.reshape((WIDTH, HEIGHT)) * SCALE_FACTOR

    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

In [None]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.15, 200,batch_size)
with open("trained_params.pkl","wb") as dump_file:
    pickle.dump((W1, b1, W2, b2),dump_file)

In [None]:
with open("trained_params.pkl","rb") as dump_file:
    W1, b1, W2, b2=pickle.load(dump_file)
show_prediction(0,X_test, Y_test, W1, b1, W2, b2)
show_prediction(1,X_test, Y_test, W1, b1, W2, b2)
show_prediction(2,X_test, Y_test, W1, b1, W2, b2)
show_prediction(100,X_test, Y_test, W1, b1, W2, b2)
show_prediction(200,X_test, Y_test, W1, b1, W2, b2)

In [None]:
dev_predictions = make_predictions(X_test, W1, b1, W2, b2)
accuracy = get_accuracy(dev_predictions, Y_test)
print(f"Accuracy on the test dataset: {accuracy:.3%}")