# Working with Generators and Batches
Import a test dataset and test a batch generator

In [2]:
from keras.datasets import imdb
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import keras as keras
import os
import pandas as pd
import numpy as np

Using TensorFlow backend.


In [7]:
# Import the IMDB text dataset
# num of words in corpus to use
top_words = 5000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = top_words)

In [10]:
# truncate and pad input sequences
max_review_length = 500
train_data = keras.preprocessing.sequence.pad_sequences(train_data, maxlen = max_review_length)
test_data  = keras.preprocessing.sequence.pad_sequences(test_data, maxlen = max_review_length)

#resize the labels
train_labels = np.resize(train_labels, (len(train_labels), 1))

In [37]:
# define the batch size
batchsize = 5000

### Now time to make a batch generator

In [13]:
print('Input data shape:', train_data.shape)

Input data shape: (25000, 500)


In [42]:
def next_batch(data, batchsize):
    """
    Create generator with yield function.

    :param batch size
    :returns data from generator of size batch
    :raises none
    """
    num_samples = len(data)
    idx = np.random.permutation(num_samples)
    batches = range(0, num_samples - batchsize + 1, batchsize)    
    for batch in batches:
        data_batch = data[idx[batch:batch + batchsize]]
        yield data_batch

In [43]:
batch_gen = next_batch(train_data, batchsize)

In [45]:
for i,data in enumerate(batch_gen):
    print('Batch # ', i, ':',data.shape)
    print(data[:5])

Batch #  0 : (5000, 500)
[[   0    0    0 ...   94 1215 2577]
 [   0    0    0 ...    2 2299  489]
 [   0    0    0 ...   33  263  902]
 [   0    0    0 ...   38    2   40]
 [   0    0    0 ...   20  520   72]]
Batch #  1 : (5000, 500)
[[  0   0   0 ...  10 342 158]
 [  0   0   0 ... 528  30 685]
 [  0   0   0 ... 587   4 277]
 [742   6 185 ... 513  15   9]
 [  0   0   0 ...  19  49   2]]
Batch #  2 : (5000, 500)
[[   0    0    0 ...  158  158  381]
 [   0    0    0 ...  378    7 3596]
 [   0    0    0 ...   19  129  223]
 [   0    0    0 ...  118    6 1542]
 [   0    0    0 ...   46    7  158]]
Batch #  3 : (5000, 500)
[[   0    0    0 ...  157    7 1342]
 [   0    0    0 ...   58   11    2]
 [   0    0    0 ...   78  116  151]
 [   0    0    0 ...  796 1150 1479]
 [   0    0    0 ...    2 1830  750]]
Batch #  4 : (5000, 500)
[[  0   0   0 ... 207 126 110]
 [  0   0   0 ...   4 226  20]
 [  0   0   0 ...   2   2   2]
 [  0   0   0 ...  33  14  31]
 [  0   0   0 ...  30  66   2]]
