# Prepare the data

In [16]:
from os import listdir;
import numpy as np;
from keras.utils import np_utils; 

seqLength = 100;
path = "Stories/";
files = [f for f in listdir(path) if ("DS_Store" not in f)];
rawData = "";
startingData = [];
for f in files:
    text = open(path + f).read();
    rawData += text.lower();
    startingData.append(text.lower()[0:seqLength]);
    
chars = sorted(list(set(rawData)));
numChars = len(chars);
numData = len(rawData);

print("%d stories has been loaded"%len(files));
print("Number of different characters: %d"%numChars);
print("Number of characters in the text: %d"%numData);

charInt = dict((c, i) for i,c in enumerate(chars));
intChar = dict((i, c) for i,c in enumerate(chars));

print("Converting input data to sequences...");
dataX = [];
dataY = [];
for i in range(numData - seqLength):
    seqIn = rawData[i:i+seqLength];
    seqOut = rawData[i+seqLength];
    dataX.append([charInt[c] for c in seqIn]);
    dataY.append(charInt[seqOut]);
    
numPatterns = len(dataX);
print("All data has been converted...");
print("Number of patterns: %d"%numPatterns);
print("Reshaping the data");
X = np.reshape(dataX, (numPatterns, seqLength, 1));
X = X/float(numChars);
Y = np_utils.to_categorical(dataY);

135 stories has been loaded
Number of different characters: 58
Number of characters in the text: 3043260
Converting input data to sequences...
All data has been converted...
Number of patterns: 3043160
Reshaping the data


# Constructing Keras Model

In [7]:
from keras.models import Sequential;
from keras.layers import Dense, LSTM, Dropout;

model = Sequential();
model.add(LSTM(256, input_shape=[X.shape[1], X.shape[2]]));
model.add(Dropout(0.2));
model.add(Dense(Y.shape[1], activation="softmax"));
model.compile(loss="categorical_crossentropy", optimizer="adam");

# Training Keras Model

In [9]:
from keras.callbacks import ModelCheckpoint

# define the checkpoint
filepath="AlphabetWeights/weights-improvement-{epoch:02d}-{loss:.4f}.hdf5";
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min');
callbacksList = [checkpoint];
model.fit(X, Y, nb_epoch=2, batch_size=128, callbacks=callbacksList);

Epoch 1/20
   7296/3043160 [..............................] - ETA: 37330s - loss: 3.1959

KeyboardInterrupt: 

# Generating Story

In [22]:
numOfCharacters = 1000;

startText = startingData[np.random.randint(0, len(startingData))];
pattern = [charInt[c] for c in startText];
result = startText + "";
print("Starting Text: \"" + startText + "\"");

i = 0;
while i < numOfCharacters or result[-1] != '.':
    xTest = np.reshape(pattern, (1, seqLength, 1));
    xText = xTest/numChars;
    predicted = model.predict(xTest);
    predictedIndex = np.argmax(predicted);
    result += intChar[predictedIndex];
    pattern.append(predictedIndex);
    pattern = pattern[1:seqLength+1];
    i += 1;
    
print("The generated Story:\n");
print(result);
print("The end\n");

Starting Text: "the forest of the island is much thicker than the ones i am used to, the ferns and trees around
us g"


KeyboardInterrupt: 