In [1]:
import h5py
import json
import keras
import numpy as np
import pandas as pd
from keras.preprocessing import image
from sklearn.model_selection import train_test_split

## Exemplo 

In [None]:
def load_dataset():
    train_dataset = h5py.File('dados/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    #test_dataset = h5py.File('dados/test_catvnoncat.h5', "r")
    #test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    #test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [2]:
train_dataset = h5py.File('dados/train_catvnoncat.h5', "r")
train_dataset

<HDF5 file "train_catvnoncat.h5" (mode r)>

In [3]:
train_dataset.keys()

<KeysViewHDF5 ['list_classes', 'train_set_x', 'train_set_y']>

In [4]:
print(train_dataset['train_set_x'].shape)
print(train_dataset['train_set_y'].shape)
print(train_dataset['list_classes'].shape)

(209, 64, 64, 3)
(209,)
(2,)


In [5]:
type(train_dataset["train_set_x"][:])

numpy.ndarray

In [6]:
train_dataset["train_set_x"][:]

array([[[[ 17,  31,  56],
         [ 22,  33,  59],
         [ 25,  35,  62],
         ...,
         [  1,  28,  57],
         [  1,  26,  56],
         [  1,  22,  51]],

        [[ 25,  36,  62],
         [ 28,  38,  64],
         [ 30,  40,  67],
         ...,
         [  1,  27,  56],
         [  1,  25,  55],
         [  2,  21,  51]],

        [[ 32,  40,  67],
         [ 34,  42,  69],
         [ 35,  42,  70],
         ...,
         [  1,  25,  55],
         [  0,  24,  54],
         [  1,  21,  51]],

        ...,

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
        

In [7]:
train_dataset["train_set_y"][:]

array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [8]:
train_dataset["list_classes"][:]

array([b'non-cat', b'cat'], dtype='|S7')

In [9]:
train_set_x_orig = np.array(train_dataset["train_set_x"][:])
# train_set_x_orig = train_dataset["train_set_x"][:]
type(train_set_x_orig), train_set_x_orig.shape

(numpy.ndarray, (209, 64, 64, 3))

In [10]:
train_set_x_orig

array([[[[ 17,  31,  56],
         [ 22,  33,  59],
         [ 25,  35,  62],
         ...,
         [  1,  28,  57],
         [  1,  26,  56],
         [  1,  22,  51]],

        [[ 25,  36,  62],
         [ 28,  38,  64],
         [ 30,  40,  67],
         ...,
         [  1,  27,  56],
         [  1,  25,  55],
         [  2,  21,  51]],

        [[ 32,  40,  67],
         [ 34,  42,  69],
         [ 35,  42,  70],
         ...,
         [  1,  25,  55],
         [  0,  24,  54],
         [  1,  21,  51]],

        ...,

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
        

In [11]:
train_set_y_orig = np.array(train_dataset["train_set_y"][:])
type(train_set_y_orig), train_set_y_orig.shape

(numpy.ndarray, (209,))

In [12]:
train_set_y_orig

array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [13]:
 train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
 train_set_y_orig

array([[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
        0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
        1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
        1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
        0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
        0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1,
        0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])

In [14]:
train_set_y_orig.shape

(1, 209)

In [None]:
train_x_orig = train_x_orig / 255.
test_x_orig = test_x_orig / 255.

train_y = keras.utils.to_categorical(train_y, 2)[0]
test_y = keras.utils.to_categorical(test_y, 2)[0]

## Usando h5py

In [None]:
# Importando csv do dataset
data = pd.read_csv('dados/train.csv', sep = ',')

# Lendo json e crinado mapping para para classe
with open('dados/label_num_to_disease_map.json') as f:
    mapping = json.loads(f.read())
    mapping = {int(k): v for k, v in mapping.items()}

# Adicionando label_name ao conjunto de dados, e ondenando colunas
data['label_name'] = data['label'].map(mapping)
data = data[['image_id', 'label_name', 'label']]

# Criando amostra para treino e teste estrafiticada
train, test = train_test_split(data, test_size = 0.10, shuffle = True, random_state = 0, stratify = data['label_name'])

In [None]:
# Definindo paths de treino e teste
TRAIN_PATH = 'dados/train_images/'


mixup_x_train = []
mixup_y_train = train.label.values

def image_mixup_train (x):
    img = image.load_img(TRAIN_PATH + x, target_size = (456, 456))
    img = image.img_to_array(img)
    mixup_x_train.append(img)


mixup_x_test = []
mixup_y_test = test.label.values

def image_mixup_test (x):
    img = image.load_img(TRAIN_PATH + x, target_size = (456, 456))
    img = image.img_to_array(img)
    mixup_x_test.append(img)

In [None]:
mixup_x_test = np.asarray(mixup_x_test)
mixup_x_test.shape

In [None]:
h5f = h5py.File('data.h5', 'w')
h5f.create_dataset('dataset_1', dtype = 'uint8', data = mixup_x_test)
h5f.close()

In [None]:
h5f = h5py.File('data.h5', 'w')
h5f.create_dataset('fature', dtype = 'uint8', data = mixup_x_test)
h5f.create_dataset('label', dtype = 'uint8', data = mixup_y_test)
h5f.close()

In [None]:
h5f = h5py.File('data.h5', 'r')

In [None]:
with h5py.File('teste2.h5', 'w') as hf:
    hf.create_dataset('fature', dtype = 'uint8', data = mixup_x_test)
    hf.create_dataset('label', dtype = 'uint8', data = mixup_y_test)

In [None]:
with h5py.File('teste2.h5', 'r') as hf:
    data2 = hf['label'][:]