In [11]:
import hashlib
import os
import pickle
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import resample
from tqdm import tqdm
from zipfile import ZipFile
import tensorflow as tf
import math
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def uncompress_files(file):
    features = []
    labels = []
    with ZipFile(file) as zipf:
        filenames_pbar = tqdm(zipf.namelist(), unit='files')
        for filename in filenames_pbar:
            if not filename.endswith('/'):
                with zipf.open(filename) as image_file:
                    image = Image.open(image_file)
                    image.load()
                    feature = np.array(image, dtype=np.float32).flatten()
                label = os.path.split(filename)[1][0]
                features.append(feature)
                labels.append(label)
    return np.array(features), np.array(labels)
train_features, train_labels = uncompress_files('notMNIST_train.zip')
test_features, test_labels = uncompress_files('notMNIST_test.zip')    

train_features, train_labels = resample(train_features, train_labels, n_samples = 150000)
print('All the data is loaded')

100%|█████████████████████████████████████████████████████████████████████| 210001/210001 [00:32<00:00, 6433.85files/s]
100%|███████████████████████████████████████████████████████████████████████| 10001/10001 [00:01<00:00, 6688.77files/s]


All the data is loaded


In [3]:
def normalize_grayscale(image_data):
    a = 0.1
    b = 0.9
    grayscale_min = 0
    grayscale_max = 255
    return a + (((image_data - grayscale_min)*(b - a))/(grayscale_max - grayscale_min))

train_features = normalize_grayscale(train_features)
test_features = normalize_grayscale(test_features)
print('Features are normalized')

Features are normalized


In [4]:
encoder = LabelBinarizer()
encoder.fit(train_labels)
train_labels = encoder.transform(train_labels)
test_labels = encoder.transform(test_labels)

train_labels = train_labels.astype('float32')
test_labels = test_labels.astype('float32')
print('Labels are One-Hot Encoded')

Labels are One-Hot Encoded


In [5]:
train_features, valid_features, train_labels, valid_labels = train_test_split(train_features, train_labels, test_size = 0.05, random_state = 0)

In [6]:
# Saving the data for easy access
pickle_file = 'notMNIST.pickle'
if not os.path.isfile(pickle_file):
    print('Saving data to pickle file ...')
    try:
        with open('notMNIST.pickle', 'wb') as pfile:
            pickle.dump({
                'train_features': train_features,
                'train_labels': train_labels,
                'valid_features': valid_features,
                'valid_labels': valid_labels,
                'test_features': test_features,
                'test_labels': test_labels
                
            }, pfile, pickle.HIGHEST_PROTOCOL)
    except Exception as e:
        print('Unable to save data to', pickle_file, ':', e)
        raise
print('Data cached in pickle file.')

Saving data to pickle file ...
Data cached in pickle file.


In [9]:
# Reloading the data
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
    pickle_data = pickle.load(f)
    train_features = pickle_data['train_features']
    train_labels = pickle_data['train_labels']
    valid_features = pickle_data['valid_features']
    valid_labels = pickle_data['valid_labels']
    test_features = pickle_data['test_features']
    test_labels = pickle_data['test_labels']
    del pickle_data
print('Data successfully loaded')

Data successfully loaded


In [15]:
features_count = 784
labels_count = 10
features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)
weights = tf.Variable(tf.truncated_normal((features_count, labels_count)))
biases = tf.Variable(tf.zeros(labels_count))

train_feed_dict = {features: train_features, labels: train_labels}
valid_feed_dict = {features: valid_features, labels: valid_labels}
test_feed_dict = {features: test_features, labels: test_labels}

# Linear Function WX+b
logits = tf.add(tf.matmul(features, weights), biases)

predictions = tf.nn.softmax(logits)

# Cross Entropy
cross_entropy = -tf.reduce_sum(labels*tf.log(predictions), reduction_indices=1)

# Training loss
loss = tf.reduce_mean(cross_entropy)

init = tf.global_variables_initializer()

# Test Case
with tf.Session() as sess:
    sess.run(init)
    sess.run(loss, feed_dict=train_feed_dict)
    sess.run(loss, feed_dict=valid_feed_dict)    
    sess.run(loss, feed_dict=test_feed_dict)
    biases_data = sess.run(biases)

assert not np.count_nonzero(biases_data), 'biases must be zero'
print('Test Case Passed')

Test Case Passed


In [17]:
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accurcy function created')

Accurcy function created
