<a href="https://colab.research.google.com/github/domschl/ALU_Net/blob/main/ALU_Net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A neural net that tries to become an ALU (arithmetic logic unit)

This notebook can run

- on local jupyter instances with a local graphics card
- on Mac M1 with local jupyter instance and [Apple's tensorflow-plugin](https://developer.apple.com/metal/tensorflow-plugin/)
- on Google Colab instances with either GPU or TPU runtime. The colab version uses a Google Drive account to cache data and model state within a Google Drive directory `My Drive/Colab Notebooks/ALU_Net`.

## 1. Configuration and setup

In [None]:
import os
import copy
import json
try:
    %tensorflow_version 2.x
except:
    pass
import tensorflow as tf
import numpy as np

use_keras_project_versions=False
# Namespaces, namespaces
if use_keras_project_versions is False:
    # print("Importing Keras from tensorflow project (it won't work otherwise with TPU)")
    from tensorflow import keras
    from tensorflow.keras import layers, regularizers, callbacks, metrics, optimizers
else:
    # print("Importing Keras from keras project (which had recently declared independence [again]) -- as recommended")
    use_keras_project_versions=True
    import keras
    from keras import layers, regularizers, callbacks, metrics, optimizers

try:
    # Google Drive is used in Colab instances to save trained nets and tensorboard logs
    from google.colab import drive
    is_colab_init = True
except:
    is_colab_init = False
    pass

if is_colab_init is True:
    # The following code loads the utility module ALU_Tools.py directly from github
    # Into Google Colab (or other jupyter instances)
    def import_from_github(fn, repo_link, force_github_update=False):
        if os.path.exists(fn) is False or force_github_update is True:
            print(f"Loading {fn} module from github...")
            if os.path.exists(fn) is True:
                !rm -v {fn}
            !wget {repo_link}
    force_github_update = True  # Note: Even if set to True, you still need to restart the runtime to get an updated version.
    import_from_github('ml_env_tools.py','https://raw.githubusercontent.com/domschl/ALU_Net/main/ml_env_tools.py',force_github_update)
    import_from_github('ml_tuner.py','https://raw.githubusercontent.com/domschl/ALU_Net/main/ml_tuner.py',force_github_update)
    import_from_github('ALU_Dataset.py','https://raw.githubusercontent.com/domschl/ALU_Net/main/ALU_Dataset.py',force_github_update)

from ml_env_tools import MLEnv
from ml_tuner import MLTuner
from ALU_Dataset import ALU_Dataset

In [None]:
def model_large(inputs, params):
    # Input goes parallel into 3 streams which will be combined at the end:
    # Stream 1: convolutions
    d=[]
    xs=[]
    x=[]

    shaper = layers.Reshape(target_shape=(36, 1,), input_shape=(36,))
    x.append(shaper(inputs))  # x[0]

    for layer in range(0, params["conv_layers"]):
        d.append(layers.Conv1D(filters=params["filters"], kernel_size=params["kernel_size"], strides=params["strides"], padding=params["padding"], kernel_regularizer=regularizers.l2(
            params["regu1"]), activation="relu"))
        x.append(d[layer](x[layer]))

    flatter = layers.Flatten()
    xf = flatter(x[-1])
    de1 = layers.Dense(params["neurons"], kernel_regularizer=regularizers.l2(
        params["regu2"]), activation="relu")
    xe1 = de1(xf)

    # Use sigmoid to map to bits 0..1
    de2 = layers.Dense(32, activation="sigmoid")
    outputs = de2(xe1)
    return outputs

In [None]:
def model_medium(inputs, params):
    x=[]
    d=[]
    shaper = layers.Reshape(target_shape=(36, 1,), input_shape=(36,))
    x.append(shaper(inputs))

    for layer in range(0,params["lstm_layers"]):
        if layer<params["lstm_layers"]-1:
            d.append(layers.LSTM(params["lstm_neurons"], return_sequences=True))
        else:
            d.append(layers.LSTM(params["lstm_neurons"]))
        x.append(d[layer](x[layer]))

    # x3t = tf.transpose(x3,[0,2,1])

    # flatter = layers.Flatten()
    # xf = flatter(x[-1])

    de1 = layers.Dense(params["neurons"], kernel_regularizer=regularizers.l2(
        params["regu1"]), activation="relu")
    xe1 = de1(x[-1]) # xf)

    de2 = layers.Dense(32, activation="sigmoid")
    outputs = de2(xe1)
    return outputs

In [None]:
def model_minimal_prm(inputs, params):
    df=[]
    xf=[]
    xfs=[]
    dc=[]
    df.append(layers.Dense(params["neurons"], kernel_regularizer=regularizers.l2(
        params["regu1"]), activation="relu"))
    xf.append(df[0](inputs))

    for layer in range(1,params["layer_cnt"]):
        df.append(layers.Dense(params["neurons"], kernel_regularizer=regularizers.l2(
            params["regu1"]), activation="relu"))
        xfs.append(df[layer](xf[layer-1]))
        dc.append(layers.Concatenate())
        xf.append(dc[layer-1]([xfs[layer-1], xf[layer-1]]))
    print(f"len xf: {len(xf)}, {params['layer_cnt']}")
    de2 = layers.Dense(32, activation="sigmoid")
    outputs = de2(xf[params["layer_cnt"]-1])
    return outputs

In [None]:
def create_load_model(model_variant, params, save_path=None, import_weights=True):
    """ Create or load a model """
    if save_path is None or not os.path.exists(save_path) or import_weights is False: #or is_tpu is True:
        print("Initializing new model...")
        inputs = keras.Input(shape=(36,))  # depends on encoding of op-code!
        if model_variant not in model_variants:
            print('Unkown model type')
            return None
        outputs = model_variants[model_variant](inputs, params)
        model = keras.Model(inputs=inputs, outputs=outputs, name="maths_"+model_variant)
        print(f"Compiling new model of type {model_variant}")
        if use_keras_project_versions is False: 
            opti = keras.optimizers.Adam(learning_rate=params["learning_rate"])
        else:
            opti = optimizers.Adam(learning_rate=params["learning_rate"])
        model.compile(loss="mean_squared_error", optimizer=opti, metrics=[metrics.MeanSquaredError(), 'accuracy'])
    else:
        print(f"Loading standard-format model of type {model_variant} from {model_path}")
        model = tf.keras.models.load_model(save_path)
        print("Continuing training from existing model")
    model.summary()
    return model

In [None]:
def get_model(ml_env, model_variant, params, save_path=None, on_tpu=False, import_weights=False):
    if on_tpu is True:
        if ml_env.tpu_is_init is False:
            cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=ml_env.tpu_address)
            tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
            tpu_strategy = tf.distribute.TPUStrategy(cluster_resolver)    
            ml_env.tpu_is_init=True
        with tpu_strategy.scope():
            print("Creating TPU-scope model")
            model = create_load_model(model_variant, params, save_path=save_path, import_weights=import_weights)
        if import_weights is True and ml_env.weights_file is not None and os.path.exists(ml_env.weights_file):
            print("Injecting saved weights into TPU model, loading...")
            temp_model = create_load_model(model_variant, params, save_path=save_path, import_weights=import_weights)
            temp_model.load_weights(ml_env.weights_file)
            print("Injecting...")
            model.set_weights(temp_model.get_weights())
            print("Updated TPU weights from saved model")
        return model
    else:
        print("Creating standard-scope model")
        model = create_load_model(model_variant, params, save_path=save_path, import_weights=import_weights)
        if import_weights is True and ml_env.weights_file is not None and os.path.exists(ml_env.weights_file):
            print("Injecting saved weights into model, loading...")        
            model.load_weights(ml_env.weights_file)
            imported_weights_file = ml_env.weights_file+'-imported'
            os.rename(ml_env.weights_file, imported_weights_file)
            print(f"Renamed weights file {ml_env.weights_file} to {imported_weights_file} to prevent further imports!")
        return model

In [None]:
def math_train(mlenv:MLEnv, model, dataset, validation, batch_size=8192, epochs=5000, steps_per_epoch=2000, log_path="./logs"):
    """ Training loop """
    interrupted = 2
    hist = None
    tensorboard_callback = callbacks.TensorBoard(
        log_dir=log_path
        # histogram_freq=1
        # update_freq='batch'
        )
    if mlenv.is_tpu is False: # TPUs update Tensorboard too asynchronously, data is corrupted by updates during mirroring.
        lambda_callback = tf.keras.callbacks.LambdaCallback(
            on_epoch_end = ml_env.epoch_time_func
        )
    try:
        if ml_env.is_tpu:
            hist = model.fit(dataset, validation_data=validation, epochs=epochs, steps_per_epoch=steps_per_epoch, verbose=1, callbacks=[tensorboard_callback])
            interrupted=0
        else:
            hist = model.fit(dataset, validation_data=validation, epochs=epochs, batch_size=batch_size, verbose=1, callbacks=[tensorboard_callback, lambda_callback])
            interrupted=0
    except KeyboardInterrupt:
        print("")
        print("")
        print("---------INTERRUPT----------")
        print("")
        print("Training interrupted")
        interrupted = 1 # user stopped runtime
    except Exception as e:
        interruped = 2  # Bad: something crashed.
        print(f"INTERNAL ERROR")
        print(f"Exception {e}")
    finally:
        return interrupted, hist

In [None]:
def instantiate_models(ml_env:MLEnv, model_variant, params, save_path=None, import_weights=True):
    if ml_env.is_tpu:
        # Generate a second CPU model for testing:
        test_model = get_model(ml_env, model_variant, params, save_path=save_path, on_tpu=False, import_weights=import_weights)
        math_model = get_model(ml_env, model_variant, params, save_path=save_path, on_tpu=True, import_weights=import_weights)
    else:
        test_model = None
        math_model = get_model(ml_env, model_variant, params, save_path=save_path, on_tpu=False, import_weights=import_weights)
    return math_model, test_model

In [None]:
def do_training(mlenv:MLEnv, math_model, training_dataset, validation_dataset, math_data, epochs_per_cycle, model_path=None, 
                weights_file=None, test_model=None, cycles=100, steps_per_epoch=1000, reweight_size=1000, valid_ops=None, regenerate_data_after_cycles=0, data_func=None,
                log_path='./logs'):
    # Training
    for mep in range(0, cycles):
        print()
        print()
        print(f"------ Meta-Epoch {mep+1}/{cycles} ------")
        print()
        if regenerate_data_after_cycles!=0 and data_func is not None:
            if mep>0 and (mep+1)%regenerate_data_after_cycles==0:
                training_dataset, validation_dataset = data_func()
        if mep==0 and ml_env.is_tpu is True:
            print("There will be some warnings by Tensorflow, documenting some state of internal decoherence, currently they can be ignored.")
        interrupted, hist = math_train(ml_env, math_model, training_dataset, validation=validation_dataset, epochs=epochs_per_cycle, steps_per_epoch=steps_per_epoch, log_path=log_path)
        if interrupted <2:
            if ml_env.is_tpu:
                mlenv.gdrive_log_mirror()  # TPUs can only savely mirror Tensorboard data once training is finished for an meta-epoch.
                if test_model is None:
                    print("Fatal: tpu-mode needs test_model on CPU")
                    return False
                print("Injecting weights into test_model:")
                test_model.set_weights(math_model.get_weights())
                if weights_file is not None:
                    print(f"Saving test-model weights to {weights_file}")
                    test_model.save_weights(weights_file)
                    print("Done")
                print(f"Checking {reweight_size} datapoints for accuracy...")
                math_data.check_results(test_model, samples=reweight_size, short_math=False, valid_ops=valid_ops, verbose=False)
            else:
                if model_path is not None:
                    print("Saving math-model")
                    math_model.save(model_path)
                    print("Done")
                print(f"Checking {reweight_size} datapoints for accuracy...")
                math_data.check_results(math_model, samples=reweight_size, short_math=False, valid_ops=valid_ops, verbose=False)
        if interrupted>0:
            break

In [None]:
model_variants = {"large": model_large,
                  "medium": model_medium,
                  "minimal_prm": model_minimal_prm
                  }

model_variant = 'medium'  # see: model_variants definition.
epochs_per_cycle = 250
cycles = 10  # perform 100 cycles, each cycle trains with epochs_per_cycle epochs.
regenerate_data_after_cycles = 3  # if !=0, the training data will be created anew after each number of 
                                  # regenerace_data_after_cycles cycles. Disadvantage: when training TPU, 
                                  # Google might use the time it takes to regenerate to training data to 
                                  # terminate your session :-/
samples = 2000000  # Number training data examples
batch_size = 20000
learning_rate = 0.001
import_weights=True
valid_ops = None  # Default: None (all ops), or list of ops, e.g. ['*', '/'] trains only multiplication and division.
steps_per_epoch = samples // batch_size  # TPU stuff

params_large={
    "conv_layers": 4,
    "filters":64,
    "kernel_size": 3,
    "strides": 1,
    "padding": "same",
    "neurons": 128,
    "learning_rate": 0.001,
    "regu1": 1e-8,
    "regu2": 1e-8
}

# params_medium = {'lstm_layers': 4, 'lstm_neurons': 704, 'neurons': 368, 'learning_rate': 0.0014, 'regu1': 2e-07}  # with full history
params_medium = {'learning_rate': 0.0012, 'lstm_layers': 5, 'lstm_neurons': 640, 'neurons': 368, 'regu1': 8e-07}      # (with final layer not giving full history)

params_minimal_prm={
    "layer_cnt": 8,
    "neurons":512, 
    "learning_rate": 0.001,
    "regu1": 1e-8
}

params=params_medium

In [None]:
ml_env=MLEnv()
math_data=ALU_Dataset(ml_env)

root_path, project_path, model_path, weights_file, cache_path, log_path = ml_env.init_paths("ALU_Net", "math_model", model_variant=model_variant, log_to_gdrive=False)

In [None]:
apply_model_tuner = False

In [None]:
if apply_model_tuner is True:
    as_train, as_val = math_data.get_datasets(samples=100000, validation_samples=10000, cache_path=cache_path)

    def tuner_eval(ml_env:MLEnv, model_variant, params, batch_size, epochs):
        math_model, _ = instantiate_models(ml_env, model_variant, params, save_path=None, import_weights=False)
        interrupted, hist = math_train(ml_env, math_model, as_train, as_val, batch_size=batch_size, epochs=epochs)
        print(params, end=" [ ")
        ev = 1/hist.history['val_loss'][-1]+hist.history['val_accuracy'][-1]*50
        return ev

    tuner_eval_func = lambda params : tuner_eval(ml_env, model_variant, params, batch_size=1024, epochs=15)
    ml_tuner = MLTuner(ml_env, model_variant)

    # ev=41.651991886632274: {'lstm_layers': 5, 'lstm_neurons': 640, 'neurons': 368, 'learning_rate': 0.0014, 'regu1': 8e-07}
    param_space = {
            "lstm_layers": [3,4,5],
            "lstm_neurons": [640, 704, 768],
            "neurons": [368, 432, 512],
            "learning_rate": [0.0014, 0.0012, 0.0016],
            "regu1": [8e-7, 2e-7, 1e-7, 8e-8]
        }

    best_params = ml_tuner.tune(param_space, tuner_eval_func)
    params = best_params
    import_weights=False

In [None]:
params

In [None]:
create_train_val_data = lambda regen : math_data.get_datasets(pre_weight=True, samples=samples, validation_samples=50000, batch_size=batch_size, short_math=False, 
                                     valid_ops=valid_ops, cache_path=cache_path, use_cache=True, regenerate_cached_data=regen)
create_train_val_data_regen = lambda : create_train_val_data(True)
train, val = create_train_val_data(False)

In [None]:
math_model, test_model = instantiate_models(ml_env, model_variant, params, save_path=model_path, import_weights=import_weights)
# math_model, test_model = instantiate_models(ml_env, model_variant, params, save_path=None, import_weights=False)

In [None]:
try:
    # use the python variable log_path:
    get_ipython().run_line_magic('tensorboard', '--logdir "{log_path}"')
except:
    pass

In [None]:
do_training(ml_env, math_model, train, val, math_data, epochs_per_cycle, model_path=model_path, 
            weights_file=weights_file, test_model=test_model, cycles=cycles, steps_per_epoch=steps_per_epoch, valid_ops=valid_ops, 
            regenerate_data_after_cycles=regenerate_data_after_cycles, data_func=create_train_val_data_regen, log_path=log_path)

# Testing and applying the trained model

In [None]:
if ml_env.is_tpu is False:
    test_model = math_model
math_data.check_results(test_model, samples=100, short_math=False, verbose=True)

In [None]:
dx,dy,_,_,_=math_data.create_data_point(22,33,'+')

In [None]:
math_data.decode_results(test_model.predict(np.array([dx])))

In [None]:
def calc(inp):
    args=inp.split(' ')
    if len(args)!=3:
        print("need three space separated tokens: <int> <operator> <int>, e.g. '3 + 4' or '4 XOR 5'")
        return False
    if args[1] not in math_data.model_ops:
        print(f"{args[1]} is not a known operator.")
        return False
    op1=int(args[0])
    op2=int(args[2])
    dx,dy,_,_,_=math_data.create_data_point(op1, op2, args[1])
    ans=math_data.decode_results(test_model.predict(np.array([dx])))
    print(f"{op1} {args[1]} {op2} = {ans[0]}")
    op=f"{op1} {args[1]} {op2}"
    op=op.replace('AND', '&').replace('XOR','^').replace('=','==').replace('OR','|')
    an2=eval(op)
    if ans[0]!=an2:
        print("Error")
        print(bin(ans[0]))
        print(bin(an2))
    return ans[0],an2

In [None]:
calc("222 = 223")

In [None]:
calc("8812 = 8812")

In [None]:
calc("3 * 4")

In [None]:
calc ("1 AND 3")