In [1]:
#GETTING IMAGE DATA
import os
import numpy as np
from PIL import Image

empty_matrix = []
path = r'F:\Handwritten Calculator Project\1. Creating Dataset\Hand Drawn Images'

for file in os.listdir(path):
    file_path = os.path.join(path, file)
    image = Image.open(file_path)
    greyscale_image = image.convert('L')
    pixel_intensities = np.array(greyscale_image.getdata())
    pixel_array = pixel_intensities.reshape(28, 28)
    empty_matrix.append(pixel_array)
        
Matrix_0 = np.array(empty_matrix, dtype=np.uint8)

In [2]:
Matrix_0.shape

(300, 28, 28)

In [3]:
#LABEL CREATOR
Labels = []

for label in range(10,16):
    for i in range(50):
        Labels.append(label)

Labels_1 = np.array(Labels, dtype=np.uint8)

In [4]:
Labels_1.shape

(300,)

In [5]:
#TRANSFORMATION - PIXEL INVERSION

Matrix_1 = np.invert(Matrix_0)

In [6]:
#TRANSFORMATION - MIRROR IMAGES

def flip_image(image):
    image = image.reshape((28, 28))
    flipped_image = np.flipud(image)
    return flipped_image[np.newaxis, :]

def mirror_image(image):
    image = image.reshape((28, 28))
    mirrored_image = np.fliplr(image)
    return mirrored_image

Matrix_temp_1 = np.array([image for image in Matrix_1])
Labels_temp_1 = [label for label in Labels_1]

idx=0
for image, label in zip(Matrix_1, Labels_1):
    if idx<50:
        mirrored_image = mirror_image(image)
        Matrix_temp_1 = np.concatenate((Matrix_temp_1, flip_image(mirrored_image)))
        Labels_temp_1.append(label)  
    else:
        Matrix_temp_1 = np.concatenate((Matrix_temp_1, flip_image(image)))
        Labels_temp_1.append(label)
    idx=idx+1

Matrix_2 = Matrix_temp_1
Labels_2 = np.array(Labels_temp_1)

In [7]:
print(Matrix_2[0].shape)

(28, 28)


In [8]:
print(Matrix_2.shape)
print(Labels_2.shape)

(600, 28, 28)
(600,)


In [9]:
#TRANSFORMATION - PIXEL SHIFT
from scipy.ndimage.interpolation import shift

def shift_image(image, dx, dy):
    image = image.reshape((28, 28))
    shifted_image = shift(image, [dy, dx], cval=0, mode="constant")
    return shifted_image[np.newaxis, :]

Matrix_temp_2 = [image for image in Matrix_2]
Labels_temp_2 = [label for label in Labels_2]

for dx, dy in ((1, 0), (-1, 0), (0, 1), (0, -1)):
    for image, label in zip(Matrix_2, Labels_2):
        Matrix_temp_2 = np.concatenate((Matrix_temp_2, shift_image(image, dx, dy)))
        Labels_temp_2.append(label)

Matrix_3 = Matrix_temp_2
Labels_3 = np.array(Labels_temp_2)

In [10]:
print(Matrix_3.shape)
print(Labels_3.shape)

(3000, 28, 28)
(3000,)


In [11]:
#TRANSFORMATION - ROTATION

def rotate_image(array, angle):
    array = array.reshape((28, 28))
    image = Image.fromarray(array, mode='L')
    image = image.rotate(angle)
    pixel_intensities = np.array(image.getdata())
    pixel_array = pixel_intensities.reshape(28, 28)
    return pixel_array[np.newaxis, :]

Matrix_temp_3 = [image for image in Matrix_3]                 
Labels_temp_3 = [label for label in Labels_3]

for angle in (10, 350):
    for image, label in zip(Matrix_3, Labels_3):
        Matrix_temp_3 = np.concatenate((Matrix_temp_3, rotate_image(image, angle)))
        Labels_temp_3.append(label)

Matrix_4 = Matrix_temp_3
Labels_4 = np.array(Labels_temp_3)

In [12]:
print(Matrix_4.shape)
print(Labels_4.shape)

(9000, 28, 28)
(9000,)


In [13]:
#REPEATING DATASET 4 TIMES TO GET TOTAL SIZE OF 36,000 (6000 EXAMPLES PER IMAGE)

Matrix_5 = np.tile(Matrix_4, (4,1,1))
Labels_5 = np.tile(Labels_4, (4))
print(Matrix_5.shape)
print(Labels_5.shape)

(36000, 28, 28)
(36000,)


In [14]:
#SHUFFLING DATASET
shuffle_idx = np.random.permutation(len(Matrix_5))
Matrix_5 = Matrix_5[shuffle_idx]
Labels_5 = Labels_5[shuffle_idx]

In [15]:
#SPLITTING DATASETS
from sklearn.model_selection import train_test_split
Matrix_train_set, Matrix_test_set = train_test_split(Matrix_5, test_size=6000, shuffle=False)
Labels_train_set, Labels_test_set = train_test_split(Labels_5, test_size=6000, shuffle=False)
print(Matrix_train_set.shape)
print(Matrix_test_set.shape)
print(Labels_train_set.shape)
print(Labels_test_set.shape)

(30000, 28, 28)
(6000, 28, 28)
(30000,)
(6000,)


In [19]:
#IMPORTING MNIST
import tensorflow as tf
from tensorflow import keras

mnist = keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [21]:
#JOINING MNIST WITH OUR DATASETS
X_train_full = np.concatenate((X_train, Matrix_train_set))
y_train_full = np.concatenate((y_train, Labels_train_set))
X_test_full = np.concatenate((X_test, Matrix_test_set))
y_test_full = np.concatenate((y_test, Labels_test_set))
print(X_train_full.shape)
print(y_train_full.shape)
print(X_test_full.shape)
print(y_test_full.shape)

(90000, 28, 28)
(90000,)
(16000, 28, 28)
(16000,)


In [22]:
#SHUFFLING DATASETS
shuffle_idx_1 = np.random.permutation(len(X_train_full))
shuffle_idx_2 = np.random.permutation(len(X_test_full))

X_train_full = X_train_full[shuffle_idx_1]
y_train_full = y_train_full[shuffle_idx_1]
X_test_full = X_test_full[shuffle_idx_2]
y_test_full = y_test_full[shuffle_idx_2]
print(X_train_full.shape)
print(y_train_full.shape)
print(X_test_full.shape)
print(y_test_full.shape)

(90000, 28, 28)
(90000,)
(16000, 28, 28)
(16000,)


In [23]:
#EXPORTING DATA (NEED TO COPY FILE TO SECOND FOLDER)
# import h5py
# with h5py.File('Final Dataset.h5', 'w') as hdf:
#     hdf.create_dataset('X_train_full', data=X_train_full)
#     hdf.create_dataset('y_train_full', data=y_train_full)
#     hdf.create_dataset('X_test_full', data=X_test_full)
#     hdf.create_dataset('y_test_full', data=y_test_full)