### Imports and version checks

In [17]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

In [16]:
print("You are on TF{}.".format(tf.__version__))

gpus = tf.config.experimental.list_physical_devices('GPU')
if len(gpus) == 0:
    print("You are not GPU accelerated.")
else:
    for gpu in gpus:
        print("Name:", gpu.name, "  Type:", gpu.device_type)

You are on TF2.3.1.
You are not GPU accelerated.


### Load and preprocess data, create examples

In [10]:
path = "infinite_jest_text.txt"

with open(path, "r") as f:
    text = f.read()
    
text = text.lower().replace("\n", " ")

unique_chars = sorted(list(set(text)))

idx_to_char = dict((i,c) for (i,c) in enumerate(unique_chars))
char_to_idx = dict((c, i) for (i, c) in enumerate(unique_chars))

Now onto creating training examples out of this input data.

For this particular task, we don't need to worry about validation and test sets. We always predict the next character for a given sentence.

In [4]:
maxlen = 40
stride = 3
sentences = []
next_chars = []

for i in range(len(text)-maxlen):
    sentences.append(text[i:i+maxlen])
    next_chars.append(text[i+maxlen])

Let's take a look at a pair of a sentence + its next character.

In [9]:
print("Sentence: {}\nNext character: {}".format(sentences[25], next_chars[25]))

Sentence: ster wallace year of glad year of the de
Next character: p


We have sentences and the character that follows them. Now, we need to encode these into labelled training examples.

My thinking on the shape of `x` is:
- we take each sentence,
- we take each character in the sentence (40),
- we encode this character in a one-hot vector whose size is equal to however many unique characters we have.   

My thinking on the shape of `y` is:
- we take each sentence,
- we encode the character that follows it in a one-hot vector as above.

In [None]:
    
x = np.zeros(((len(sentences), maxlen, len(unique_chars))))

y = np.zeros((len(sentences), len(unique_chars)))

shape_of_examples = None # placeholder—I need to find out what my inputs look like

### Create a model, compile it

In [None]:
batch_size = 128

# Creating the model is the simplest part of this notebook.
model = keras.Sequential(
[
    # FIXME: what's the dimension of this input supposed to be?
    keras.layers.Input(shape_of_examples, batch_size), 
    keras.layers.LSTM(128),
    keras.layers.Dense(len(unique_chars), activation="softmax")
])

In [None]:
optimizer = keras.optimizers.Adam()

# what should the loss be? what is each loss good for?
model.compile(loss="")

### Create a function for sampling/generating sequences from a seed sequence using a (partially) trained model