In [1]:
import numpy as np
import lmdb
import caffe
import pandas as pd

## 1. Load data

In [2]:
X_data = pd.read_csv('X_test_sat6.csv', header=None).values.reshape(-1, 28, 28, 4).astype('uint8')
y_data = pd.read_csv('y_test_sat6.csv', header=None).values.astype('uint8')

In [3]:
X_data.shape, y_data.shape

((81000L, 28L, 28L, 4L), (81000L, 6L))

## 2. Split train and test data

In [4]:
np.random.seed(123)
shuffle_ids = np.array([i for i in range(len(y_data))])
np.random.shuffle(shuffle_ids)
train_ids = shuffle_ids[:int(len(y_data)*0.8)]
test_ids = shuffle_ids[int(len(y_data)*0.8):]

In [5]:
X_train, y_train = X_data[train_ids], y_data[train_ids]
X_test, y_test = X_data[test_ids], y_data[test_ids]

## 3. Training data
<font color='red'>We have to set enough <font color='blue'>**map_size**</font>(byte size) for lmdb data, I set two times  as big as input.

In [6]:
map_size_train = X_train.nbytes * 2

In [7]:
with lmdb.open('sat6_train', map_size=map_size_train) as env:
    with env.begin(write=True) as txn:
        # txn is a Transaction object
        for i in range(X_train.shape[0]):
            datum = caffe.proto.caffe_pb2.Datum()
            datum.height = X_train.shape[1]
            datum.width = X_train.shape[2]
            datum.channels = X_train.shape[3]
            datum.data = X_train[i].tobytes()  # or .tostring() if numpy < 1.9
            datum.label = np.argmax(y_train[i])
            str_id = '{:08}'.format(i)

            # The encode is only essential in Python 3
            # txn.put(str_id.encode('ascii'), datum.SerializeToString())
            txn.put(str_id, datum.SerializeToString())

## 4. Test data

In [8]:
map_size_test = X_test.nbytes * 2

In [9]:
with lmdb.open('sat6_test', map_size=map_size_test) as env:
    with env.begin(write=True) as txn:
        # txn is a Transaction object
        for i in range(X_test.shape[0]):
            datum = caffe.proto.caffe_pb2.Datum()
            datum.height = X_test.shape[1]
            datum.width = X_test.shape[2]
            datum.channels = X_test.shape[3]
            datum.data = X_test[i].tobytes()  # or .tostring() if numpy < 1.9
            datum.label = np.argmax(y_test[i])
            str_id = '{:08}'.format(i)

            # The encode is only essential in Python 3
            # txn.put(str_id.encode('ascii'), datum.SerializeToString())
            txn.put(str_id, datum.SerializeToString())