In [1]:
#Do all necessary imports

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
import os

  from ._conv import register_converters as _register_converters


In [15]:
# Import Data

DIR_NAMES =['train-easy/', 'train-medium/', 'train-hard']
FILE_NAMES = ['algebra__linear_1d.txt']

BUFFER_SIZE = 50000
BATCH_SIZE = 64
TAKE_SIZE = 5000


current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
dataset_dir = parent_dir + '/Dataset'

# Based on https://www.tensorflow.org/tutorials/load_data/text

# Read in all files which are in FILE_NAMES 

labeled_data_sets = []


for file_name in FILE_NAMES:
    for dir_name in DIR_NAMES:
        concat_dir = os.path.join(dir_name,file_name)
        lines_dataset = tf.data.TextLineDataset(os.path.join(dataset_dir, concat_dir))
        labeled_data_sets.append(lines_dataset)

# Concatenate all File Data to one Big File Data

all_labeled_data = labeled_data_sets[0]
for labeled_dataset in labeled_data_sets[1:]:
    all_labeled_data = all_labeled_data.concatenate(labeled_dataset)
    
# Group Data as batches of two (input_sentence, answer)

all_labeled_data = all_labeled_data.batch(2)

# Make two independent Tensors as Tuple (Not needed)

# all_labeled_data = all_labeled_data.map(lambda x: (x[0], x[1]))

# Shuffle the Data

all_labeled_data = all_labeled_data.shuffle(BUFFER_SIZE, reshuffle_each_iteration=False)


# One Hot Encode the Characters
# https://stackoverflow.com/questions/49370940/one-hot-encoding-characters

alphabet = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789*+-.=/()?,'>:<!{}"
number_of_characters = len(alphabet)


def convert_to_onehot_tensor(data):
    
    data = data.numpy().decode("utf-8")
    
    #Creates a dict, that maps to every char of alphabet an unique int based on position
    char_to_int = dict((c,i) for i,c in enumerate(alphabet))
    encoded_data = []
    #Replaces every char in data with the mapped int
    
    encoded_data.append([char_to_int[char] for char in data])
    
    encoded_data = encoded_data[0]
    #This part now replaces the int by an one-hot array with size alphabet
    one_hot = []
    for value in encoded_data:
        #At first, the whole array is initialized with 0
        letter = [0 for _ in range(len(alphabet))]
        #Only at the number of the int, 1 is written
        letter[value] = 1
        one_hot.append(letter)
        
    return tf.transpose(tf.convert_to_tensor(one_hot))

# Map to Python Function from Tensorflow

def one_hot_encode_map(x):
  return (tf.py_function(convert_to_onehot_tensor, inp=[x[0]], Tout=(tf.int32)), tf.py_function(convert_to_onehot_tensor, inp=[x[1]], Tout=(tf.int32)))
        
#Map all Datapoints to One Hot Labeled Datapoints Character Wise
    
all_labeled_data = all_labeled_data.map(one_hot_encode_map)

print(next(iter(all_labeled_data)))


# Form Batches for train and test data and padd Questions and Answers with Zeros to equal Shapes

#all_labeled_data = all_labeled_data.padded_batch(BATCH_SIZE,padded_shapes = ([160,number_of_characters],[30,number_of_characters]))


train_data = all_labeled_data.skip(TAKE_SIZE).take(TAKE_SIZE).shuffle(BUFFER_SIZE)
train_data = train_data.padded_batch(BATCH_SIZE, padded_shapes=([number_of_characters, 160],[number_of_characters, 160]))

test_data = all_labeled_data.take(TAKE_SIZE)
test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=([number_of_characters,160],[number_of_characters,160]))


# Print out one Sample Inputs to see Format
#print(next(iter(test_data)))


model = tf.keras.Sequential()
model.add(layers.Input(shape=(80, 160)))
model.add(layers.LSTM(160, return_sequences=True))
print(model.summary())

optimizer = Adam(
    lr=6e-4,
    beta_1=0.9,
    beta_2=0.995,
    epsilon=1e-9,
    decay=0.0,
    amsgrad=False,
    clipnorm=0.1,
)

model.compile(optimizer=optimizer, loss="categorical_crossentropy")
    
model.fit(train_data,
          validation_data=test_data,
          epochs=3,
          verbose=1)



(<tf.Tensor: id=93004, shape=(80, 33), dtype=int32, numpy=
array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>, <tf.Tensor: id=93005, shape=(80, 2), dtype=int32, numpy=
array([[0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
    

Exception ignored in: <bound method IteratorResourceDeleter.__del__ of <tensorflow.python.data.ops.iterator_ops.IteratorResourceDeleter object at 0x7f9c6a76fd68>>
Traceback (most recent call last):
  File "/home/bernhard/anaconda3/lib/python3.6/site-packages/tensorflow_core/python/data/ops/iterator_ops.py", line 541, in __del__
    handle=self._handle, deleter=self._deleter)
  File "/home/bernhard/anaconda3/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_dataset_ops.py", line 1157, in delete_iterator
    "DeleteIterator", handle=handle, deleter=deleter, name=name)
  File "/home/bernhard/anaconda3/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py", line 793, in _apply_op_helper
    op_def=op_def)
  File "/home/bernhard/anaconda3/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py", line 544, in create_op
    inp = self.capture(inp)
  File "/home/bernhard/anaconda3/lib/python3.6/site-packages/tensorflow_core/python/ops/while_

Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f9c688ee048>

In [20]:
x ,y = next(iter(train_data))
print(x)
print(y)
y_pred = model.predict(x)
print(y_pred)

tf.Tensor(
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]], shape=(64, 80, 160), dtype=int32)
tf.Tensor(
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
 

TypeError: Input 'b' of 'MatMul' Op has type float32 that does not match type int32 of argument 'a'.