In [None]:
'''
(c) 2023 Charles Ide
This is a project designed to explore optical character recognition with TensorFlow. This is the main file used to execute the program - all other functionality
should be refactored away to modules during development.
'''

import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

# Load and preprocess the dataset
(x_base, y_base), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Split training and validation data
x_train, x_val, y_train, y_val = train_test_split(x_base, y_base, test_size=0.2, random_state=42)

# Scale pixel values to the range [0, 1]
x_train = x_train.astype('float32') / 255.0
x_val = x_val.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Reshape the images to have a single channel (grayscale) and the desired input shape
x_train = x_train.reshape((-1, 28, 28, 1))
x_val = x_val.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))

In [None]:
# Set labels
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_val = tf.keras.utils.to_categorical(y_val, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)


In [None]:
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

In [None]:
# Build our model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_val, y_val))


In [31]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_accuracy}')


Test accuracy: 0.9865999817848206


In [40]:
# Use the model to predict an actual picture

from PIL import Image
def preprocess_image(image_path):
    image = Image.open(image_path)
    image = image.resize((28, 28))  # Resize to 28x28 pixels
    image = image.convert('L')  # Convert to grayscale
    image = np.array(image) / 255.0  # Scale pixel values to [0, 1]
    image = image.reshape((1, 28, 28, 1))  # Reshape to match the model's input shape
    return image

input_image = preprocess_image('/Users/charleside/Downloads/Test_Digit2.jpg')

# Make predictions
predictions = model.predict(input_image)

# Get the predicted digit based on class probability
predicted_digit = np.argmax(predictions)


# Print the predicted digit
print(f'Predicted Digit: {predicted_digit}')
print(predictions)


Predicted Digit: 6
[[0.09293669 0.00579865 0.04019827 0.03549934 0.00910028 0.10255522
  0.42194396 0.00869054 0.24806945 0.03520753]]
