In [1]:
import numpy as np
import os
import scipy.io
import spectral.io.envi as envi

In [3]:
DATASET_NAME = 'leaf'
NEW_DATA_PATH = os.path.join(os.getcwd(), DATASET_NAME)  # 存放数据路径 patch是文件夹名称


In [20]:
def loadData(flieName, dataIndex, temp_split=2):
    
    print("------------  loadData  ", dataIndex)
    # 原始数据路径
    DATA_PATH = os.path.join(os.getcwd(), flieName)

    index = str(dataIndex)
    data = envi.open( os.path.join(DATA_PATH, "{}.hdr".format(index)) ,os.path.join(DATA_PATH, "{}.dat".format(index)))
    mask_data = envi.open( os.path.join(DATA_PATH, "mask_{}.hdr".format(index)) ,os.path.join(DATA_PATH, "mask_{}.tiff".format(index)))

    HEIGHT = data.shape[0] //temp_split
    WIDTH = data.shape[1] //temp_split
    BAND = data.shape[2] 
#     BAND = BAND_SIZE
    new_shape=(BAND,HEIGHT,WIDTH)
    new_data = np.zeros(new_shape, dtype = float)
    label = np.zeros((HEIGHT, WIDTH), dtype = int)
    

    sample_count = 0
    for h in range(HEIGHT): 
        for w in range(WIDTH):
            x = h*temp_split
            y = w*temp_split
            for b in range(BAND):
                new_data[b][h][w] = data[x,y][b]

            if(sum(mask_data[x, y])  > 0.01 ):
                label[h][w] = dataIndex 
                sample_count += 1
            else:
                label[h][w] = 0
    
    
    new_data = np.transpose(new_data, (1, 2, 0))  # 将通道数提前，便于数组处理操作
    print("sample_count = {} ".format(sample_count))
    print("data shape : ", new_data.shape)
    print("label shape : ", label.shape)
    return new_data, label

In [21]:
%%time
data1, label1 = loadData("dataset", 1)
data2, label2 = loadData("dataset", 2)
data3, label3 = loadData("dataset", 3)
data4, label4 = loadData("dataset", 4)

------------  loadData   1
sample_count = 21324 
data shape :  (256, 256, 204)
label shape :  (256, 256)
------------  loadData   2
sample_count = 18157 
data shape :  (256, 256, 204)
label shape :  (256, 256)
------------  loadData   3
sample_count = 20040 
data shape :  (256, 256, 204)
label shape :  (256, 256)
------------  loadData   4
sample_count = 20350 
data shape :  (256, 256, 204)
label shape :  (256, 256)
CPU times: user 9min 35s, sys: 760 ms, total: 9min 36s
Wall time: 9min 35s


In [22]:
X1 = np.hstack((data1, data2))
X2 = np.hstack((data3, data4))

gt1 = np.hstack((label1, label2))
gt2 = np.hstack((label3, label4))

X = np.vstack((X1, X2))

gt = np.vstack((gt1, gt2))

In [23]:

if not os.path.exists(NEW_DATA_PATH):
    os.mkdir(NEW_DATA_PATH)

In [24]:
train_dict, test_dict = {}, {}
train_dict[DATASET_NAME] = X
file_name = "{}.mat".format(DATASET_NAME) 
scipy.io.savemat(os.path.join(NEW_DATA_PATH, file_name), train_dict)
test_dict["{}_gt".format(DATASET_NAME)] = gt
file_name = "{}_gt.mat".format(DATASET_NAME)
scipy.io.savemat(os.path.join(NEW_DATA_PATH, file_name), test_dict)
print("Save target data\n")

Save target data



In [25]:
data_path = os.path.join(os.getcwd(), DATASET_NAME)
data = scipy.io.loadmat(os.path.join(data_path, '{}.mat'.format(DATASET_NAME)))[DATASET_NAME]
labels = scipy.io.loadmat(os.path.join(data_path, '{}_gt.mat'.format(DATASET_NAME)))['{}_gt'.format(DATASET_NAME)]

In [26]:
data.shape

(512, 512, 204)

In [27]:
labels.shape

(512, 512)