Learns to solve simple math problems involving two ten-digit numbers.
My goal at the beginning was to have it do multiplication.
I had problems early on where it refused to converge at all. So I worked forwards starting on really easy problems.
It had no trouble converging when the problem was, "Take the first number and send it back out". That confirmed that I at
least had written the learning and question-generation correctly. I moved up to "Take the first number and add three".
That worked too. Then I had it add a larger number (which would involve more carrying). I realized that the problem might
be that I hadn't given it enough nodes to properly implement the logic. Once I upgraded the network from two 200-node
hidden layers to 2000 and 500, it was able to solve the problem of adding the two numbers together with good accuracy. I
still haven't gotten it to do multiplication.

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt
import random
import time

In [None]:
# Define custom layers

# This is like the Softmax function, but it does it independently on each digit in the result
# So each group of ten nodes should sum up to one
class TenSoftmax(keras.layers.Layer):
    def __init__(self, **kwargs):
        super(TenSoftmax, self).__init__(**kwargs)
        self.softmax = keras.layers.Softmax()
    
    def call(self, tensor, training=True):
        s0, s1, s2, s3, s4, s5, s6, s7, s8, s9 = tf.split(tensor, num_or_size_splits=10, axis=1)
        list = [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
        for i in range(10):
            list[i] = self.softmax(list[1])
        ret = tf.concat(list, axis=1)
        return ret

In [None]:
# Define the model
def math_model():
    # They happen in a linear order
    model = keras.Sequential()

    # Input layer / First Hidden Layer
    model.add(keras.layers.Dense(2000, input_shape=(200,)))
    model.add(keras.layers.LeakyReLU())

    # Second Hidden Layer
    model.add(keras.layers.Dense(500))
    model.add(keras.layers.LeakyReLU())

    # Output Layer
    model.add(keras.layers.Dense(100))
    model.add(keras.layers.LeakyReLU())
    # model.add(TenSoftmax())
    
    # Print summary
    print(model.summary())

    # Return
    return model

# Get the model
model = math_model()

In [None]:
# This method returns a helper function to compute cross entropy loss
loss_function = tf.keras.losses.MeanSquaredError()    
# loss_function = tf.keras.losses.CategoricalCrossentropy()    

# Declare optimizer (Use Adam optimizer w/ learning rate of 1e-4)
optimizer = tf.keras.optimizers.Adam(1e-4)

In [None]:
def evaluate(count):
    input_data, output_data = generate_data(count)
    output = model(input_data, training=True)

    print(output_data[0])
    print(output[0])

    # Iterate over answers
    correct_answers = 0
    wrongness = 0
    for yh, y in zip(output, output_data):
        yhr = tf.reshape(yh, (10, 10))
        yr = tf.reshape(y, (10, 10))
        correct = True
        for n1, n2 in zip(yhr, yr):
            if np.argmax(n1) != np.argmax(n2):
                correct = False
                break
        if correct:
            correct_answers += 1

    wrongness = loss_function(output, output_data)
    
    return correct_answers, wrongness

def get_digit(number, digit):
    number = number % (10**(digit+1))
    number = int(number / (10**digit))
    return number

def digit_vector(num):
    e = np.zeros([10])
    e[num] = 1
    return e

def generate_data(data_points):
    entries_input = []
    entries_output = []
    for i in range(data_points):
        # Generate numbers
        num1 = int(random.random() * 10000000000)
        num2 = int(random.random() * 10000000000)
        answer = num1 + num2

        # print(num1)
        # print(num2)
        # print(answer)

        # Convert to vectors
        entry_input = []
        for i in range(9, -1, -1):
            entry_input.extend(digit_vector(get_digit(num1, i)))
        for i in range(9, -1, -1):
            entry_input.extend(digit_vector(get_digit(num2, i)))
        entry_output = []
        for i in range(9, -1, -1):
            entry_output.extend(digit_vector(get_digit(answer, i)))
        
        # Append
        entries_input.append(entry_input)
        entries_output.append(entry_output)
    
    ret_input = tf.Variable(entries_input, tf.float64)
    ret_output = tf.Variable(entries_output, tf.float64)

    # print (ret_input)
    # print (ret_output)
    
    return ret_input, ret_output

# generate_data(1)

In [None]:
# tf.function annotation causes the function 
# to be "compiled" as part of the training
@tf.function
def train_step(input_data, output_data):
    # Set up tape
    with tf.GradientTape() as tape:
      output = model(input_data, training=True)

      loss = loss_function(output_data, output)

      gradients = tape.gradient(loss, model.trainable_variables)

      optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    # Evaluate
    #input_data_test, output_data_test = generate_data(100)
    #output = model(input_data_test, training=False)

In [None]:
def train(epochs):
  # For each epoch...
  for epoch in range(epochs):
    start = time.time()

    # For each minibatch...
    for _ in range(200):
      input_data, output_data = generate_data(100)
      train_step(input_data, output_data)

    # Print output
    print (f"Epoch {epoch + 1} took {time.time() - start} seconds")
    PROBLEMS = 100
    s, w = evaluate(PROBLEMS)
    print (f"Score: {s}/{PROBLEMS}")
    print (f"Wrongness: {w}")
    print()

  
train(1000000)