# Matrix Serialzer

Serialize matrix using Protocol Buffer (Protobuf), Numpy, Pickle, and HDF5.

## License

MIT License

Copyright (c) 2018 PT Bukalapak.com

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

## Software Version

In [1]:
import sys
print("Python %s" % sys.version)
import pickle
import h5py

Python 3.6.3 |Anaconda, Inc.| (default, Nov  9 2017, 00:19:18) 
[GCC 7.2.0]


In [2]:
import numpy as np
print("NumPy %s" % np.__version__)

NumPy 1.13.3


In [3]:
import tensorflow as tf
print("TensorFlow %s" % tf.__version__)

TensorFlow 1.11.0


In [4]:
import platform
print("platform %s" % platform.__version__)

platform 1.0.8


In [5]:
print("OS", platform.platform())

OS Linux-4.15.0-38-generic-x86_64-with-debian-buster-sid


In [6]:
%%bash
cat /etc/os-release

NAME="Ubuntu"
VERSION="18.04 LTS (Bionic Beaver)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 18.04 LTS"
VERSION_ID="18.04"
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
VERSION_CODENAME=bionic
UBUNTU_CODENAME=bionic


## Functions

Load numpy from pickle file.

In [7]:
def load_pickle_file_to_numpy(pickle_path_filename):
    with open(pickle_path_filename, "rb") as f:
        matrix_np = pickle.load(f)
    return matrix_np

Convert numpy to serialized protobuf.

In [8]:
def convert_numpy_to_protobuf_str(matrix_np):
    with tf.Graph().as_default():
        matrix_pb = tf.constant(matrix_np)._op.node_def.attr['value'].SerializeToString()
    return matrix_pb

Convert serialized protobuf to numpy.

In [9]:
def convert_protobuf_str_to_numpy(matrix_pb):
    with tf.Graph().as_default():
        matrix_tpb = tf.constant(1)._op.node_def.attr['value'].FromString(matrix_pb)
        matrix_np = tf.constant(matrix_tpb.tensor).eval(session=tf.Session())
    return matrix_np

Convert numpy to serialized HDF5.

In [10]:
def convert_numpy_to_hdf5_file(path_filename):
    hdf5_fh = h5py.File(path_filename, 'w')
    hdf5_fh.create_dataset('image_wo_norm', data=images_np_pickle)
    hdf5_fh.close()

Convert serialized HDF5 to numpy.

In [11]:
def convert_hdf5_file_to_numpy(path_filename):
    hdf5_fh = h5py.File(path_filename, 'r')
    images_np = list(hdf5_fh['image_wo_norm'])
    hdf5_fh.close()
    return images_np

## Load the Matrix

In [12]:
pickle_path_filename = \
    '/home/jovyan/work/' + \
    'image/preprocessed_wo_norm_jacket_python_source.pkl'

In [13]:
images_np_pickle = load_pickle_file_to_numpy(pickle_path_filename)

In [14]:
type(images_np_pickle)

numpy.ndarray

In [15]:
images_np_pickle.shape

(1, 224, 224, 3)

In [16]:
images_np_pickle

array([[[[ 32,  14,  77],
         [255, 255, 255],
         [255, 255, 255],
         ..., 
         [121,  82, 224],
         [121,  82, 224],
         [200, 195, 211]],

        [[ 31,  14,  75],
         [255, 255, 255],
         [255, 255, 255],
         ..., 
         [121,  82, 224],
         [255, 255, 255],
         [107,  76, 189]],

        [[ 31,  14,  75],
         [255, 255, 255],
         [255, 255, 255],
         ..., 
         [121,  82, 224],
         [121,  82, 224],
         [ 66,  22, 180]],

        ..., 
        [[ 86,  86,  86],
         [254, 254, 254],
         [254, 254, 254],
         ..., 
         [254, 254, 254],
         [254, 254, 254],
         [ 94,  94,  94]],

        [[ 86,  86,  86],
         [254, 254, 254],
         [254, 254, 254],
         ..., 
         [254, 254, 254],
         [254, 254, 254],
         [ 94,  94,  94]],

        [[ 85,  85,  85],
         [219, 219, 219],
         [219, 219, 219],
         ..., 
         [219, 219, 219],
  

## Serialize Protobuf (pb)

Serialize matrix.

In [17]:
images_pb = convert_numpy_to_protobuf_str(images_np_pickle)

In [18]:
type(images_pb)

bytes

In [19]:
len(images_pb)

150558

In [20]:
images_pb[:20]

b'B\x9a\x98\t\x08\x04\x12\x12\x12\x02\x08\x01\x12\x03\x08\xe0\x01\x12\x03\x08'

In [21]:
images_pb[-20:]

b'\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb]]]'

Convert back to matrix for testing.

In [22]:
images_np_new = convert_protobuf_str_to_numpy(images_pb)

Following has to be zero!

In [23]:
np.sum(images_np_pickle != images_np_new)

0

Save to file.

In [24]:
protobuf_path_filename = \
    '/home/jovyan/work/' + \
    'image/preprocessed_wo_norm_jacket_python.pb'

In [25]:
with open(protobuf_path_filename, "wb") as f:
    f.write(images_pb)

## Serialize Numpy (npy)

Serialize matrix.

In [26]:
images_npy = images_np_pickle.dumps()

In [27]:
len(images_npy)

269061

In [28]:
images_npy[:50]

b'\x80\x02cnumpy.core.multiarray\n_reconstruct\nq\x00cnumpy\nnda'

In [29]:
images_npy[-20:]

b'\xc3\x9b]]]q\x11h\x05\x86q\x12Rq\x13tq\x14b.'

Convert back to matrix for testing.

In [30]:
images_np_new = np.loads(images_npy)

Following has to be zero!

In [31]:
np.sum(images_np_pickle != images_np_new)

0

Save to file.

In [32]:
numpy_path_filename = \
    '/home/jovyan/work/' + \
    'image/preprocessed_wo_norm_jacket_python.npy'

In [33]:
with open(numpy_path_filename, "wb") as f:
    f.write(images_npy)

## Serialize Pickle (pkl)

Serialize matrix.

In [34]:
images_pkl = pickle.dumps(images_np_pickle)

In [35]:
len(images_pkl)

150693

In [36]:
images_pkl[:50]

b'\x80\x03cnumpy.core.multiarray\n_reconstruct\nq\x00cnumpy\nnda'

In [37]:
images_pkl[-20:]

b'\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb\xdb]]]q\rtq\x0eb.'

Convert back to matrix for testing.

In [38]:
images_np_new = pickle.loads(images_pkl)

Following has to be zero!

In [39]:
np.sum(images_np_pickle != images_np_new)

0

Save to file.

In [40]:
pickle_path_filename = \
    '/home/jovyan/work/' + \
    'image/preprocessed_wo_norm_jacket_python.pkl'

In [41]:
with open(pickle_path_filename, "wb") as f:
    f.write(images_pkl)

## Serialize HDF5 (hdf5)

Serialize matrix.

In [42]:
hdf5_path_filename = \
    '/home/jovyan/work/' + \
    'image/preprocessed_wo_norm_jacket_python.hdf5'

In [43]:
convert_numpy_to_hdf5_file(hdf5_path_filename)

Convert back to matrix for testing.

In [44]:
images_np_new = convert_hdf5_file_to_numpy(hdf5_path_filename)

Following has to be zero!

In [45]:
np.sum(images_np_pickle != images_np_new)

0