In [1]:
import json
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer

## If running from colab

In [None]:
!wget http://ufldl.stanford.edu/housenumbers/train.tar.gz
!wget http://ufldl.stanford.edu/housenumbers/test.tar.gz 
!wget http://ufldl.stanford.edu/housenumbers/extra.tar.gz

In [None]:
!tar -xvzf train.tar.gz
!tar -xvzf test.tar.gz 
!tar -xvzf extra.tar.gz 

## Configs

In [9]:
# Config
digits_to_predict = 1 # Use only images with this number of digits

# Input
img_size = (180, 180)
img_channels = 3

# paths
TRAIN_IMGS_PATH = 'train' # Directory with the training original images
TEST_IMGS_PATH = 'test' # Directory with the test original images
EXTRA_TRAIN_IMGS_PATH = 'extra' 

# Pre-processing the data

In [14]:
# Load the annotations file
with open('annotations/trainDigitStruct.json') as f:
    dataTrain = json.load(f)

with open('annotations/extraDigitStruct.json') as f:
    extraDataTrain = json.load(f)

with open('annotations/testDigitStruct.json') as f:
    dataTest = json.load(f)

In [14]:
Xtrain = []
labels = []
bboxes = []
imagePaths = []

In [7]:
def pre_process_images(annotations_file, imgs_folder):
	for item in annotations_file:
		imagePath = os.path.sep.join([imgs_folder, item['filename']])
		image = load_img(imagePath)
		(w, h) = image.size

		if len(item['boxes']) != digits_to_predict:
			continue

		# scale the bounding box coordinates relative to the dimensions of the input image
		image_boxes = []
		img_labels = []
		for i in range(digits_to_predict):
			try:
				startX = item['boxes'][i]['left'] / w
				startY = item['boxes'][i]['top'] / h
				endX = (item['boxes'][i]['left'] + item['boxes'][i]['width']) / w
				endY = (item['boxes'][i]['top'] + item['boxes'][i]['height']) / h
				image_boxes.extend([startX, startY, endX, endY])

				# Fix to change the 10.0 label for a 0
				img_labels.append(0.0 if item['boxes'][i]['label'] == 10.0 else item['boxes'][i]['label'])
			# If the img does not have enough digits in it
			except:
				image_boxes.extend([0, 0, 0, 0])
				img_labels.append(11)

		# load the image and preprocess it
		color_mode = 'rgb' if img_channels == 3 else 'grayscale'
		image = load_img(imagePath, target_size=(img_size), color_mode=color_mode)
		# image = tf.image.rgb_to_grayscale(image)
		image = img_to_array(image) 
		
		Xtrain.append(image)
		labels.append(img_labels)
		bboxes.append(image_boxes)
		imagePaths.append(imagePath)

In [16]:
pre_process_images(dataTrain, TRAIN_IMGS_PATH)

In [17]:
# Uncomment if you want to iclude the extra dataset
# pre_process_images(extraDataTrain, EXTRA_TRAIN_IMGS_PATH)

In [18]:
# convert the data, class labels, bounding boxes, and image paths to
# NumPy arrays, scaling the input pixel intensities from the range
# [0, 255] to [0, 1]
Xtrain = np.array(Xtrain) / 255.0
labels = np.array(labels)
bboxes = np.array(bboxes)
imagePaths = np.array(imagePaths)

In [19]:
print(Xtrain.shape)
print(labels.shape)
print(bboxes.shape)
print(imagePaths.shape)

(14522, 180, 180, 3)
(14522, 1)
(14522, 4)
(14522,)


In [20]:
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

In [21]:
labels.shape

(14522, 10)

In [22]:
np.save(f'preprocessed_train/Xtrain.npy', Xtrain)
np.save(f'preprocessed_train/labels.npy', labels)
np.save(f'preprocessed_train/bboxes.npy', bboxes)
np.save(f'preprocessed_train/imagePaths.npy', imagePaths)

# If you used the extra dataset save the files with the following names
# np.save(f'preprocessed_train/Xtrain2.npy', Xtrain)
# np.save(f'preprocessed_train/labels2.npy', labels)
# np.save(f'preprocessed_train/bboxes2.npy', bboxes)
# np.save(f'preprocessed_train/imagePaths2.npy', imagePaths)

# Test dataset

In [15]:
Xtrain = []
labels = []
bboxes = []
imagePaths = []

In [16]:
pre_process_images(dataTest, TEST_IMGS_PATH)

In [17]:
# convert the data, class labels, bounding boxes, and image paths to
# NumPy arrays, scaling the input pixel intensities from the range
# [0, 255] to [0, 1]
Xtrain = np.array(Xtrain) / 255.0
labels = np.array(labels)
bboxes = np.array(bboxes)
imagePaths = np.array(imagePaths)

In [18]:
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

In [19]:
print(Xtrain.shape)
print(labels.shape)
print(bboxes.shape)
print(imagePaths.shape)

(2483, 180, 180, 3)
(2483, 10)
(2483, 4)
(2483,)


In [20]:
np.save(f'preprocessed_test/Xdata.npy', Xtrain)
np.save(f'preprocessed_test/labels.npy', labels)
np.save(f'preprocessed_test/bboxes.npy', bboxes)
np.save(f'preprocessed_test/imagePaths.npy', imagePaths)