In [22]:
from Coder import Coder

In [23]:
import keras
from keras import layers
import numpy as np

In [24]:
TRAINING_SIZE = 50000
DIGITS = 3
BASE_NUMS = "0123456789"
REVERSE = True

MAXLEN = DIGITS + 1 + DIGITS # question max length (num_op_num)
MAXLEN_ANSWER = DIGITS + 1

In [25]:
def generate_question():
    return int("".join(
        np.random.choice(list(BASE_NUMS)) 
        for _ in range(np.random.randint(1, DIGITS + 1))))

In [26]:
chars = "0123456789+ "
questions = []
answers = []
seen = set()
ct =0
while len(questions) < TRAINING_SIZE and ct < TRAINING_SIZE:
    ct+=1
    a, b = generate_question(), generate_question()

    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)

    question = f"{a}+{b}"
    question += ' '*(MAXLEN - len(question))

    answer = str(a + b)
    answer += ' '*(MAXLEN_ANSWER-len(answer))

    if REVERSE:
        question = question[::-1]
    
    questions.append(question)
    answers.append(answer)

print(f"{TRAINING_SIZE} questions and answers generated.")

50000 questions and answers generated.


In [27]:
questions[0], answers[0]

(' 299+86', '1060')

In [28]:
coder = Coder(chars)

X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=bool)
y = np.zeros((len(questions), MAXLEN_ANSWER, len(chars)), dtype=bool)

for i, (q, a) in enumerate(zip(questions, answers)):
    X[i] = coder.encode(q, MAXLEN)
    y[i] = coder.encode(a, MAXLEN_ANSWER)

# random shuffle
indices = np.arange(len(y))
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

split_at = len(X) - len(X)//10
(X_train, X_test) = X[:split_at], X[split_at:]
(y_train, y_test) = y[:split_at], y[split_at:]

print("Training Data:")
print(X_train.shape)
print(y_train.shape)

print("Testing Data:")
print(X_test.shape)
print(y_test.shape)

Training Data:
(21789, 7, 12)
(21789, 4, 12)
Testing Data:
(2421, 7, 12)
(2421, 4, 12)


In [29]:
num_layers = 1

model = keras.Sequential()

model.add(layers.Input((MAXLEN, len(chars))))
model.add(layers.LSTM(128))

model.add(layers.RepeatVector(MAXLEN_ANSWER))

for _ in range(num_layers):
    model.add(layers.LSTM(128, return_sequences=True))

model.add(layers.Dense(len(chars), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 128)               72192     
                                                                 
 repeat_vector_2 (RepeatVec  (None, 4, 128)            0         
 tor)                                                            
                                                                 
 lstm_5 (LSTM)               (None, 4, 128)            131584    
                                                                 
 dense_2 (Dense)             (None, 4, 12)             1548      
                                                                 
Total params: 205324 (802.05 KB)
Trainable params: 205324 (802.05 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [30]:
epochs = 30
batch_size = 32

for i in range(1, epochs):
    print()
    print(f"Iteration {i}")
    model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=1,
        validation_data=(X_test, y_test)
    )

    for i in range(10):
        ind = np.random.randint(0, len(X_test))
        rowx, rowy = X_test[np.array([ind])], y_test[np.array([ind])]

        preds = np.argmax(model.predict(rowx, verbose=0), axis=-1)

        question = coder.decode(rowx[0])
        correct = coder.decode(rowy[0])
        guess = coder.decode(preds[0], calc_argmax=False)
        print("Q", question[::-1] if REVERSE else question, end=' ')
        print("T", correct, end=' ')
        if correct == guess:
            print(f'CORRECT : {guess}')
        else:
            print(f'FALSE : {guess}')



Iteration 1


Q 149+861 T 1010 FALSE : 111 
Q 513+687 T 1200 FALSE : 111 
Q 26+30   T 56   FALSE : 32  
Q 34+213  T 247  FALSE : 344 
Q 816+49  T 865  FALSE : 101 
Q 923+8   T 931  FALSE : 125 
Q 74+516  T 590  FALSE : 445 
Q 5+101   T 106  FALSE : 111 
Q 137+16  T 153  FALSE : 322 
Q 3+963   T 966  FALSE : 130 

Iteration 2
Q 79+1    T 80   FALSE : 17  
Q 32+89   T 121  FALSE : 138 
Q 199+377 T 576  FALSE : 718 
Q 8+188   T 196  FALSE : 888 
Q 563+73  T 636  FALSE : 601 
Q 364+356 T 720  FALSE : 119 
Q 280+91  T 371  FALSE : 298 
Q 34+57   T 91   FALSE : 118 
Q 6+701   T 707  FALSE : 777 
Q 66+83   T 149  FALSE : 117 

Iteration 3
Q 9+634   T 643  FALSE : 644 
Q 371+8   T 379  FALSE : 381 
Q 251+7   T 258  FALSE : 265 
Q 284+6   T 290  FALSE : 299 
Q 86+927  T 1013 FALSE : 101 
Q 2+651   T 653  FALSE : 665 
Q 73+582  T 655  FALSE : 605 
Q 218+17  T 235  FALSE : 255 
Q 236+578 T 814  FALSE : 805 
Q 95+626  T 721  FALSE : 705 

Iteration 4
Q 9+496   T 505  FALSE : 494 
Q 88+24   T 112  FALSE : 122 
Q

KeyboardInterrupt: 

# TESTING

In [None]:
demo_q = '27+273'
demo_q += ' '*(MAXLEN - len(question))
# demo_q = demo_q[::-1]
demo_q_en = np.array([coder.encode(demo_q, MAXLEN)])

demo_pred = model.predict(demo_q_en, verbose=0)
demo_pred = np.argmax(demo_pred, axis=-1)

coder.decode(demo_pred[0], calc_argmax=False)

IndexError: index 5 is out of bounds for axis 0 with size 5