In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, SimpleRNN, RepeatVector, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

In [2]:
vocabulary ='0123456789+-*/.'
print('Vocabulary: ', vocabulary)
num_features = len(vocabulary)   
print('Number of features:', num_features)

Vocabulary:  0123456789+-*/.
Number of features: 15


In [3]:
char_to_index = dict((c, i) for i, c in enumerate(vocabulary))
index_to_char = dict((i, c) for i, c in enumerate(vocabulary))
print('char_to_index : ', char_to_index)
print('index_to_char : ', index_to_char)

char_to_index :  {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '+': 10, '-': 11, '*': 12, '/': 13, '.': 14}
index_to_char :  {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: '+', 11: '-', 12: '*', 13: '/', 14: '.'}


In [4]:

def weird_division(n, d):
    return n / d if d else 0

def generate_data():
    first_num = np.random.randint(low=0,high=100)
    second_num = np.random.randint(low=0,high=100)
    add = np.random.randint(low=0, high=100) 
    if add <25:
        expression = str(first_num) + '+' + str(second_num)
        result = str(first_num+second_num)
    elif add in range(26,50):
        expression = str(first_num) + '-' + str(second_num)
        result = str(first_num-second_num)
    elif add in range(50,75):
        expression = str(first_num) + '*' + str(second_num)
        result = str(first_num*second_num)
    else:
        expression = str(first_num) + '/' + str(second_num)
        res = weird_division(first_num, second_num)
        result = str(round(res,2))


    return expression, result

generate_data()

('83+51', '134')

In [5]:
hidden_units = 128
max_time_steps = 5    # maximum length of input sequence
def rnn_model():
  rnn_model = Sequential()
  rnn_model.add(SimpleRNN(hidden_units, input_shape=(None, num_features)))
  rnn_model.add(RepeatVector(max_time_steps))
  rnn_model.add(SimpleRNN(hidden_units, return_sequences=True))
  rnn_model.add(TimeDistributed(Dense(num_features, activation='softmax')))
  
  return rnn_model

model = rnn_model()
model.summary()
 
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 128)               18432     
                                                                 
 repeat_vector (RepeatVector  (None, 5, 128)           0         
 )                                                               
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 5, 128)            32896     
                                                                 
 time_distributed (TimeDistr  (None, 5, 15)            1935      
 ibuted)                                                         
                                                                 
Total params: 53,263
Trainable params: 53,263
Non-trainable params: 0
_________________________________________________________________


In [6]:
def vectorize_example(expression, result): 
    
    x = np.zeros((max_time_steps, num_features))                   
    y = np.zeros((max_time_steps, num_features))
    
    diff_x = max_time_steps - len(expression)                         
    diff_y = max_time_steps - len(result)                           
    
    for i, c in enumerate(expression):           
        x[diff_x+i, char_to_index[c]] = 1     
    for i in range(diff_x):                   
        x[i, char_to_index['0']] = 1         
   
    for i, c in enumerate(result):
        y[diff_y+i, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
        
    return x, y

In [7]:

def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

In [8]:
def strip_padding(example):
    encountered_non_zero = False
    output = ''
    for c in example:
        if not encountered_non_zero and c == '0':
            continue
        if c == '+' or c == '-' or c=='*' or c=='/' or c=='.':
            encountered_non_zero = False
        else:
            encountered_non_zero = True
        output += c
    return output

In [9]:

def create_dataset(num_examples):

    x_train = np.zeros((num_examples, max_time_steps, num_features))            
    y_train = np.zeros((num_examples, max_time_steps, num_features))            

    for i in range(num_examples):                                               
        e, l = generate_data()                                                  
        x, y = vectorize_example(e, l)                                          
        x_train[i] = x                                                          
        y_train[i] = y                                                          
    
    return x_train, y_train

In [10]:
x_train, y_train = create_dataset(50000)
simple_logger = LambdaCallback(
    on_epoch_end=lambda e, l: print('{:.2f}'.format(l['val_accuracy']), end=' _ ')
)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

model.fit(x_train, y_train, epochs=100, validation_split=0.2, verbose=0, 
           callbacks=[simple_logger, early_stopping])

2023-04-29 22:08:42.462790: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


0.63 _ 0.67 _ 0.70 _ 0.70 _ 0.73 _ 0.76 _ 0.77 _ 0.79 _ 0.80 _ 0.81 _ 0.81 _ 0.82 _ 0.81 _ 0.81 _ 0.83 _ 0.83 _ 0.82 _ 0.83 _ 0.82 _ 0.84 _ 0.84 _ 0.83 _ 0.84 _ 0.84 _ 0.84 _ 0.84 _ 0.85 _ 0.85 _ 0.85 _ 0.85 _ 0.85 _ 0.85 _ 0.85 _ 0.85 _ 0.85 _ 0.85 _ 0.86 _ 0.85 _ 0.85 _ 0.86 _ 0.86 _ 0.85 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.85 _ 0.86 _ 0.87 _ 0.87 _ 0.86 _ 0.87 _ 0.87 _ 0.87 _ 0.86 _ 0.86 _ 0.87 _ 0.87 _ 0.86 _ 0.87 _ 0.87 _ 0.86 _ 0.87 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.87 _ 

<keras.callbacks.History at 0x106495450>

In [11]:
x_test, y_test = create_dataset(num_examples=20)
preds = model.predict(x_test)
full_seq_acc = 0

for i, pred in enumerate(preds):
    pred_str = strip_padding(devectorize_example(pred))
    y_test_str = strip_padding(devectorize_example(y_test[i]))
    x_test_str = strip_padding(devectorize_example(x_test[i]))
    col = 'green' if pred_str == y_test_str else 'red'
    full_seq_acc += 1/len(preds) * int(pred_str == y_test_str)
    outstring = 'Input: {}, Output: {}, Prediction: {}'.format(x_test_str, y_test_str, pred_str)
    print(colored(outstring, col))
print('\nFull sequence accuracy: {:.3f} %'.format(100 * full_seq_acc))


Input: 77/47, Output: 1.64, Prediction: 1.74
Input: 23+94, Output: 117, Prediction: 107
Input: 51-98, Output: -47, Prediction: -47
Input: 68*41, Output: 2788, Prediction: 2998
Input: 63*45, Output: 2835, Prediction: 2845
Input: 70-43, Output: 27, Prediction: 27
Input: 29/11, Output: 2.64, Prediction: 2.24
Input: 8+85, Output: 93, Prediction: 93
Input: 2-35, Output: -33, Prediction: -33
Input: 60+93, Output: 153, Prediction: 153
Input: 19*89, Output: 1691, Prediction: 1691
Input: 80/95, Output: .84, Prediction: .84
Input: 78+22, Output: 100, Prediction: 100
Input: 90*30, Output: 2700, Prediction: 2700
Input: 32+97, Output: 129, Prediction: 139
Input: 62/68, Output: .91, Prediction: .89
Input: 26+24, Output: 50, Prediction: 50
Input: 43+15, Output: 58, Prediction: 58
Input: 71-71, Output: , Prediction: 
Input: 63/56, Output: 1.12, Prediction: 1.11

Full sequence accuracy: 60.000 %
