## Prepare Image Data

In [3]:
# load data from mat files

import scipy.io as sio
import os
import numpy as np
import matplotlib.pyplot as plt
from six.moves import cPickle as pickle
import h5py
f = h5py.File('crypto_expanded.mat')

%matplotlib inline

In [35]:
giardia_data = h5py.File('giardia_expanded.mat')
giardia_rotate = np.array(giardia_data['giardia_rotate'])
giardia_flip_rotate = np.array(giardia_data['giardia_flip_rotate'])
giardia_rotate = np.transpose(giardia_rotate, (2, 0, 1))
giardia_flip_rotate = np.transpose(giardia_flip_rotate, (2, 0, 1))

defects_data = h5py.File('defects_expanded.mat')
defects_rotate = np.array(defects_data['defects_rotate'])
defects_flip_rotate = np.array(defects_data['defects_flip_rotate'])
defects_rotate = np.transpose(defects_rotate, (2, 0, 1))
defects_flip_rotate = np.transpose(defects_flip_rotate, (2, 0, 1))

crypto_data = h5py.File('crypto_expanded.mat')
crypto_rotate = np.array(crypto_data['crypto_rotate'])
crypto_flip_rotate = np.array(crypto_data['crypto_flip_rotate'])
crypto_rotate = np.transpose(crypto_rotate, (2, 0, 1))
crypto_flip_rotate = np.transpose(crypto_flip_rotate, (2, 0, 1))

In [48]:
# combine rotate and flip_rotate data and prepare labels
giardia_data = np.concatenate((giardia_rotate, giardia_flip_rotate), axis=0)
giardia_label = np.array([[1, 0, 0],] * len(giardia_data))

defects_data = np.concatenate((defects_rotate, defects_flip_rotate), axis=0)
defects_label = np.array([[0, 1, 0],] * len(defects_data))

crypto_data = np.concatenate((crypto_rotate, crypto_flip_rotate), axis=0)
crypto_label = np.array([[0, 0, 1],] * len(crypto_data))

In [49]:
# delete redundant variables to free memory 
del giardia_rotate
del giardia_flip_rotate
del defects_rotate
del defects_flip_rotate
del crypto_rotate
del crypto_flip_rotate

In [77]:
# combine everything and shuffle to create training set, test set and validation set
data_combined = np.concatenate((giardia_data, defects_data, crypto_data), axis=0)
label_combined = np.concatenate((giardia_label, defects_label, crypto_label), axis=0)

In [51]:
data_combined.shape

(29664, 200, 200)

In [52]:
label_combined.shape

(29664, 3)

In [79]:
from sklearn.utils import shuffle
data_combined, label_combined = shuffle(data_combined, label_combined, random_state=4)

In [80]:
data_combined.shape

(29664, 200, 200)

In [81]:
label_combined.shape

(29664, 3)

In [84]:
from sklearn.cross_validation import train_test_split

x_train, x_test, y_train, y_test = train_test_split(data_combined, label_combined, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)



In [88]:
x_train.shape

(20764, 200, 200)

In [89]:
x_test.shape

(4450, 200, 200)

In [90]:
del data_combined
del label_combined

In [91]:
pickle_file = 'Image_data.pickle'

try:
  f = open(pickle_file, 'wb')
  save = {
    'train_dataset': x_train,
    'train_labels': y_train,
    'valid_dataset': x_val,
    'valid_labels': y_val,
    'test_dataset': x_test,
    'test_labels': y_test,
    }
  pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
  f.close()
except Exception as e:
  print('Unable to save data to', pickle_file, ':', e)
  raise

statinfo = os.stat(pickle_file)
print('Compressed pickle size:', statinfo.st_size)

Compressed pickle size: 9492836486
