# Acquiring Data
### Download data and pre-process

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist

## Helper functions

In [3]:
### Normlize features ###
# Assume features are numpy matrices between 0-255
def scaleFeatures(X_train, X_test):
    X_train = X_train / 255
    X_test = X_test / 255
    
    return X_train, X_test


### One-hot encode integer labels ###
# Assumes that the integer label correspond to the vector
# element index in the one-hot-encoded representation
def oneHotEncode(y_train_int, y_test_int):
    # initialize encoder
    num_classes = len(set(y_train_int))
    
    # Initialize output matrices
    y_train = np.zeros((len(y_train_int),num_classes))
    y_test = np.zeros((len(y_test_int),num_classes))
        
    # One hot encode training labels
    for i, labIdx in enumerate(y_train_int):
        y_train[i,labIdx] = 1
    
    # One hot encode the testing labels
    for i, labIdx in enumerate(y_test_int):
        y_test[i,labIdx] = 1
    
    return y_train, y_test

## Download mnist dataset

In [4]:
(x_train, y_train_int), (x_test, y_test_int) = mnist.load_data()

print(np.shape(x_train))
print(np.shape(y_train_int))
print(np.shape(x_test))
print(np.shape(y_test_int))

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


#### Scale the features

In [5]:
# Initial max and min
X_train, X_test = scaleFeatures(x_train, x_test)

print(np.min(X_train), np.max(X_train))
print(np.min(X_test), np.max(X_test))

0.0 1.0
0.0 1.0


#### One-hot encode the labels

In [6]:
y_train, y_test = oneHotEncode(y_train_int, y_test_int)
print(np.shape(y_train))
print(np.shape(y_test))

(60000, 10)
(10000, 10)


In [7]:
SAVE_DATA = False
# Change this to whereever you want to save the data to
SAVE_PATH = "/Users/anthony/Documents/Comp551_Projects/CapNET/data/mnistData.npz"

if SAVE_DATA:
    np.savez(SAVE_PATH,
             X_train = X_train, y_train = y_train,
             X_test = X_test, y_test = y_test
            )
    print("Saved to: %s" % SAVE_PATH)

del SAVE_DATA, SAVE_PATH

Saved to: /Users/anthony/Documents/Comp551_Projects/CapNET/data/mnistData.npz


In [8]:
del x_train, y_train_int, x_test, y_test_int
del X_train, X_test, y_train, y_test

## Download fashion-mnist dataset

In [10]:
from keras.datasets import fashion_mnist

(x_train, y_train_int), (x_test, y_test_int) = fashion_mnist.load_data()

print(np.shape(x_train))
print(np.shape(y_train_int))
print(np.shape(x_test))
print(np.shape(y_test_int))

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


#### Scale the features

In [11]:
# Initial max and min
X_train, X_test = scaleFeatures(x_train, x_test)

print(np.min(X_train), np.max(X_train))
print(np.min(X_test), np.max(X_test))

0.0 1.0
0.0 1.0


#### One-hot encode the labels

In [12]:
y_train, y_test = oneHotEncode(y_train_int, y_test_int)
print(np.shape(y_train))
print(np.shape(y_test))

(60000, 10)
(10000, 10)


In [13]:
SAVE_DATA = False
# Change this to whereever you want to save the data to
SAVE_PATH = "/Users/anthony/Documents/Comp551_Projects/CapNET/data/fashion_mnist_Data.npz"

if SAVE_DATA:
    np.savez(SAVE_PATH,
             X_train = x_train, y_train = y_train,
             X_test = x_test, y_test = y_test
            )
    print("Saved to: %s" % SAVE_PATH)

del SAVE_DATA, SAVE_PATH

Saved to: /Users/anthony/Documents/Comp551_Projects/CapNET/data/fashion_mnist_Data.npz
