In [2]:
from __future__ import print_function
import numpy as np
import pandas as pd
from six.moves import cPickle as pickle
import os
import sys
import tensorflow as tf
%matplotlib inline

<h1>Convert data to .pickle format </h1>

In [None]:
train_folders=['./train/shirt','./train/not_a_shirt']
test_folders=['./test/shirt','./test/not_a_shirt']

In [None]:
image_size = 64 # Pixel width and height.
pixel_depth = 255.0  # Number of levels per pixel.

def load_letter(folder):
    image_files = os.listdir(folder)
    dataset = np.ndarray(shape=(len(image_files), image_size, image_size),
                         dtype=np.float32)
    print(folder)
    
    cloth_images = 0
    for image in image_files:
        image_file = os.path.join(folder, image)
        try:
            image_data = (ndimage.imread(image_file).astype(float) - 
                    pixel_depth / 2) / pixel_depth
            if image_data.shape != (image_size, image_size):
                raise Exception('Unexpected image shape: %s' % str(image_data.shape))
            dataset[cloth_images, :, :] = image_data
            cloth_images = cloth_images + 1
        except IOError as e:
            print('Could not read:', image_file, ':', e)
    
    dataset = dataset[0:cloth_images, :, :]
    
    print('Full dataset tensor:', dataset.shape)
    print('Mean:', np.mean(dataset))
    print('Standard deviation:', np.std(dataset))
    return dataset
        
def try_pickle(data_folders):
    dataset_names = []
    for folder in data_folders:
        set_filename = folder + '.pickle'
        dataset_names.append(set_filename)
        print('Pickling %s.' % set_filename)
        dataset = load_letter(folder)
        
        try:
            with open(set_filename, 'wb') as f:
                pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
        except Exception as e:
            print('Unable to save data to', set_filename, ':', e)
  
  return dataset_names

train_datasets = try_pickle(train_folders) #train folder contains ['./train/shirt','./train/not_a_shirt']
test_datasets = try_pickle(test_folders) #test folder conatins ['./test/shirt','./test/not_a_shirt']

In [None]:
def make_arrays(nb_rows, img_size):
    if nb_rows:
        dataset = np.ndarray((nb_rows, img_size, img_size), dtype=np.float32)
        labels = np.ndarray(nb_rows, dtype=np.int32)
    else:
        dataset, labels = None, None
    return dataset, labels


def merge_datasets(pickle_files, train_size, valid_size=0):
    cloth_classes = len(pickle_files)
    valid_dataset, valid_labels = make_arrays(valid_size, image_size)
    train_dataset, train_labels = make_arrays(train_size, image_size)
    
    vsize_per_class = valid_size // num_classes
    tsize_per_class = train_size // num_classes
    
    start_v, start_t = 0, 0
    end_v, end_t = vsize_per_class, tsize_per_class
    end_l = vsize_per_class+tsize_per_class
    
    for label, pickle_file in enumerate(pickle_files):       
        try:
            with open(pickle_file, 'rb') as f:
                shirt_set = pickle.load(f)
                # shuffle the letters to have random validation and training set
                np.random.shuffle(shirt_set)
                if valid_dataset is not None:
                    valid_letter = letter_set[:vsize_per_class, :, :]
                    valid_dataset[start_v:end_v, :, :] = valid_letter
                    valid_labels[start_v:end_v] = label
                    start_v += vsize_per_class
                    end_v += vsize_per_class
                    
                train_letter = letter_set[vsize_per_class:end_l, :, :]
                train_dataset[start_t:end_t, :, :] = train_letter
                train_labels[start_t:end_t] = label
                start_t += tsize_per_class
                end_t += tsize_per_class
        except Exception as e:
            print('Unable to process data from', pickle_file, ':', e)
        raise
    
    return valid_dataset, valid_labels, train_dataset, train_labels
            
            
train_size =   #90%
valid_size =   #10%
test_size =    #100%

valid_dataset, valid_labels, train_dataset, train_labels = merge_datasets(train_datasets, train_size, valid_size)
_, _, test_dataset, test_labels = merge_datasets(test_datasets, test_size)

print('Training:', train_dataset.shape, train_labels.shape)
print('Validation:', valid_dataset.shape, valid_labels.shape)
print('Testing:', test_dataset.shape, test_labels.shape)

In [None]:
#Randomize the dataset

def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation,:,:]
    shuffled_labels = labels[permutation]
    return shuffled_dataset, shuffled_labels


train_dataset, train_labels = randomize(train_dataset, train_labels)
test_dataset, test_labels = randomize(test_dataset, test_labels)
valid_dataset, valid_labels = randomize(valid_dataset, valid_labels)

In [None]:
#dump it in a common .pickle file to be reused many times
data_root='.'
pickle_file = os.path.join(data_root, 'IMAGE_data.pickle')

with open(pickle_file, 'wb') as f:
    save = {
    'train_dataset': train_dataset,
    'train_labels': train_labels,
    'valid_dataset': valid_dataset,
    'valid_labels': valid_labels,
    'test_dataset': test_dataset,
    'test_labels': test_labels,
    }
    pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
    f.close()

In [None]:
#create dataset from pickle file

pickle_file="IMAGE_data.pickle"
with open(pickle_file,'rb')as f:
    full_data=pickle.load(f)
    train_dataset=full_data['train_dataset']
    train_labels=full_data['train_labels']
    
    valid_dataset=full_data['valid_dataset']
    valid_labels=full_data['valid_labels']
    
    test_dataset=full_data['test_dataset']
    test_labels=full_data['test_labels']
    
    del full_data
    print("Train:",train_dataset.shape,train_labels.shape)
    print("Validation:",valid_dataset.shape,valid_labels.shape)
    print("Test:",test_dataset.shape,test_labels.shape)

In [None]:
#reformat the dataset

image_size=28
cloth_labels=2
cloth_channels=1 #greyscale

def reformat(dataset,labels):
    dataset=dataset.reshape((-1,image_size,image_size,cloth_channels).astype(np.float32))
    labels=(np.arange(cloth_labels)==labels[:,None]).astype(np.float32)
    return dataset,labels

In [None]:
#call reformat() and print the reformated dataset

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)

print('Train:', train_dataset.shape, train_labels.shape)
print('Validation:', valid_dataset.shape, valid_labels.shape)
print('Test:', test_dataset.shape, test_labels.shape)

In [None]:
#Accuracy

def accuracy(predictions,labels):
    return (100.0*np.sum(np.argmax(predictions,1)==np.argmax(labels,1))/predictions.shape[0])

In [None]:
#initialise Graph 