In [1]:
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange


import numpy as np
import os

In [2]:
image_shape = [10, 10]


def fake_data_generator(num=4):
    # define make direction func
    def mkdir(name=None):
        if os.path.exists(name):
            pass
        else:
            os.mkdir(name)

    # generate images
    mkdir(name='images')
    for i in xrange(num):
        new_array = np.zeros(shape=(image_shape), dtype=np.float32) * num
        np.save(file='images/%s.npy' % i, arr=new_array)
    # generate text
    mkdir(name='text')
    for i in xrange(num):
        new_text = str(i)
        with open('text/%s.txt' % i, mode='w') as text_buffer:
            text_buffer.write(new_text)


fake_data_generator()

keras中对于内存或者显存不足的情况、存储空间充足的情况，都可以采用生成器的方式实现（如下）。

In [3]:
class Generator():
    def __init__(self, img_dir, text_dir, batch_size=2):
        self.img_dir = img_dir
        self.text_dir = text_dir

        self.images = [os.path.join(self.img_dir, fl) for fl in sorted(os.listdir(self.img_dir))]
        self.texts = [os.path.join(self.text_dir, fl) for fl in sorted(os.listdir(self.text_dir))]

        self.batch_size = batch_size
        assert len(os.listdir(self.text_dir)) == len(os.listdir(self.text_dir))
        self.NUM = len(os.listdir(self.text_dir))

    def next(self):
        batch_index = 0
        for _ in xrange(self.__len__()):
            batch_img = self.images[batch_index:batch_index + self.batch_size]
            batch_text = self.texts[batch_index:batch_index + self.batch_size]
            batch_index = (batch_index + self.batch_size) % self.NUM
            yield (self.load_npy(batch_img), self.load_txt(batch_text))

    def load_npy(self, files):
        return [np.load(fl) for fl in files]

    def load_txt(self, files):
        txt = []
        for fl in files:
            with open(fl, 'r') as stream:
                txt.append(stream.read())
        return txt

    def __len__(self):
        return self.NUM // self.batch_size


gen = Generator(img_dir='images', text_dir='text')

for batch_img, batch_text in gen.next():
    print(batch_img, batch_text)

[array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32), array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], d