<a href="https://colab.research.google.com/github/domschl/ALU_Net/blob/main/ALU_Net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simulating an ALU (arithmetic logic unit) with a neural network

The neural network is trained to perform the operations `+`, `-`, `*`, `/`, `%`, `AND`, `OR`, `XOR`, `>`, `<`, `=`, `!=` on two unsigned integers and return the result.

## This notebook can run

- on local jupyter instances with a local graphics card
- on Mac M1 with local jupyter instance and [Apple's tensorflow-plugin](https://developer.apple.com/metal/tensorflow-plugin/)
- on Google Colab instances with either GPU or TPU runtime. The colab version uses a Google Drive account to cache data and model state within a Google Drive directory `My Drive/Colab Notebooks/ALU_Net`.

## 1. Configuration and setup

In [None]:
!pip install -U ml-indie-tools

In [None]:
import os
import copy
import json
try:
    %tensorflow_version 2.x
except:
    pass
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers, regularizers, callbacks, metrics, optimizers
import numpy as np

from ml_indie_tools.env_tools import MLEnv
from ml_indie_tools.ALU_Dataset import ALU_Dataset
from ml_indie_tools.keras_custom_layers import ResidualBlock, ResidualDense, ResidualDenseStack, ParallelResidualDenseStacks, SelfAttention, MultiHeadSelfAttention    

In [None]:
def model_res_mod(inputs, params):
    # see: keras_custom_layers.py for layer definition:
    x=inputs
    print(f"input-shape: {x.shape}")
    self_att=[]
    for _ in range(0, params['self_attention_layers']):
        self_att.append(MultiHeadSelfAttention(params['self_attention_heads'], norm=params['norm'])) # , units=params['self_attention_units']))
        #self_att.append(SelfAttention(units=params['self_attention_units']))
    for i in range(0, params['self_attention_layers']):
        if i==0:
            x=self_att[i](x)
        else:
            x=self_att[i](x)
    fl = layers.Flatten()
    x = fl(x)
    if params["layers"]>0:
        scale = layers.Dense(params['units'], activation=None)
        x=scale(x)
        prds = ResidualDenseStack(params["units"], params["layers"], regularizer=params["regularizer"])
        x=prds(x)
    rescale = layers.Dense(params['output_size'], activation="sigmoid")
    outputs = rescale(x)
    return outputs

In [None]:
def create_load_model(ml_env:MLEnv, model_variant, params, save_path=None, weights_file=None):
    """ Create or load a model """
    if save_path is None or not os.path.exists(save_path) or weights_file is None: #or is_tpu is True:
        print("Initializing new model...")
        inputs = keras.Input(shape=params['input_size'])  # depends on encoding of op-code!
        if model_variant not in model_variants:
            print('Unkown model type')
            return None
        outputs = model_variants[model_variant](inputs, params)
        model = keras.Model(inputs=inputs, outputs=outputs, name="maths_"+model_variant)
        print(f"Compiling new model of type {model_variant}")
        opti = keras.optimizers.Adam(learning_rate=params["learning_rate"])
        if ml_env.is_tpu:
            # use steps_per_execution magic (or not)
            # model.compile(loss="mean_squared_error", optimizer=opti, steps_per_execution=50, metrics=[metrics.MeanSquaredError(), 'accuracy'])
            model.compile(loss="mean_squared_error", optimizer=opti, metrics=[metrics.MeanSquaredError(), 'accuracy'])
        else:
            model.compile(loss="mean_squared_error", optimizer=opti, metrics=[metrics.MeanSquaredError(), 'accuracy'])
    else:
        print(f"Loading standard-format model of type {model_variant} from {model_path}")
        model = tf.keras.models.load_model(save_path)
        print("Continuing training from existing model")
    model.summary()
    return model

In [None]:
def get_model(ml_env, model_variant, params, save_path=None, on_tpu=False, weights_file=None):
    if ml_env.is_tpu is True and on_tpu is True:
        tpu_strategy = ml_env.tpu_strategy    
        with tpu_strategy.scope():
            print("Creating TPU-scope model")
            model = create_load_model(ml_env, model_variant, params, save_path=save_path, weights_file=weights_file)
        if weights_file is not None and os.path.exists(weights_file):
            print("Injecting saved weights into TPU model, loading...")
            temp_model = create_load_model(ml_env, model_variant, params, save_path=save_path, weights_file=weights_file)
            temp_model.load_weights(weights_file)
            print("Injecting...")
            model.set_weights(temp_model.get_weights())
            print("Updated TPU weights from saved model")
        return model
    else:
        print("Creating standard-scope model")
        model = create_load_model(ml_env, model_variant, params, save_path=save_path, weights_file=weights_file)
        if weights_file is not None and os.path.exists(weights_file):
            print("Injecting saved weights into model, loading...")        
            model.load_weights(weights_file)
            imported_weights_file = weights_file+'-imported'
            os.rename(weights_file, imported_weights_file)
            print(f"Renamed weights file {weights_file} to {imported_weights_file} to prevent further imports!")
        return model

In [None]:
def math_train(mlenv:MLEnv, model, dataset, validation, batch_size=8192, epochs=5000, steps_per_epoch=2000, log_path="./logs"):
    """ Training loop """
    interrupted = 2
    hist = None
    tensorboard_callback = callbacks.TensorBoard(
        log_dir=log_path
        # histogram_freq=1
        # update_freq='batch'
        )
    # if mlenv.is_tpu is False: # TPUs update Tensorboard too asynchronously, data is corrupted by updates during mirroring.
    #     lambda_callback = tf.keras.callbacks.LambdaCallback(
    #         on_epoch_end = ml_env.epoch_time_func
    #     )
    try:
        if ml_env.is_tpu:
            if use_validation_with_tpu is True:
                hist = model.fit(dataset, validation_data=validation, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, verbose=1, callbacks=[tensorboard_callback])
            else:
                hist = model.fit(dataset, epochs=epochs, steps_per_epoch=steps_per_epoch, verbose=1, callbacks=[tensorboard_callback])
            interrupted=0
        else:
            hist = model.fit(dataset, validation_data=validation, epochs=epochs, batch_size=batch_size, verbose=1, callbacks=[tensorboard_callback]) # , lambda_callback])
            interrupted=0
    except KeyboardInterrupt:
        print("")
        print("")
        print("---------INTERRUPT----------")
        print("")
        print("Training interrupted")
        interrupted = 1 # user stopped runtime
    except Exception as e:
        interruped = 2  # Bad: something crashed.
        print(f"INTERNAL ERROR")
        print(f"Exception {e}")
    finally:
        return interrupted, hist

In [None]:
def instantiate_models(ml_env:MLEnv, model_variant, params, save_path=None, weights_file=None):
    if ml_env.is_tpu:
        # Generate a second CPU model for testing:
        math_model = get_model(ml_env, model_variant, params, save_path=save_path, on_tpu=True, weights_file=weights_file)
        test_model = get_model(ml_env, model_variant, params, save_path=save_path, on_tpu=False, weights_file=weights_file)
    else:
        test_model = None
        math_model = get_model(ml_env, model_variant, params, save_path=save_path, on_tpu=False, weights_file=weights_file)
    return math_model, test_model

In [None]:
def do_training(mlenv:MLEnv, math_model, training_dataset, validation_dataset, math_data, epochs_per_cycle, model_path=None, 
                weights_file=None, test_model=None, cycles=100, steps_per_epoch=1000, reweight_size=1000, valid_ops=None, regenerate_data_after_cycles=0, data_func=None,
                log_path='./logs'):
    # Training
    for mep in range(0, cycles):
        print()
        print()
        print(f"------ Meta-Epoch {mep+1}/{cycles} ------")
        print()
        if regenerate_data_after_cycles!=0 and data_func is not None:
            if mep>0 and (mep+1)%regenerate_data_after_cycles==0:
                training_dataset, validation_dataset = data_func()
        if mep==0 and ml_env.is_tpu is True:
            print("There will be some warnings by Tensorflow, documenting some state of internal decoherence, currently they can be ignored.")
        interrupted, hist = math_train(ml_env, math_model, training_dataset, validation=validation_dataset, epochs=epochs_per_cycle, steps_per_epoch=steps_per_epoch, log_path=log_path)
        if interrupted <2:
            if ml_env.is_tpu:
                mlenv.gdrive_log_mirror()  # TPUs can only savely mirror Tensorboard data once training is finished for an meta-epoch.
                if test_model is None:
                    print("Fatal: tpu-mode needs test_model on CPU")
                    return False
                print("Injecting weights into test_model:")
                test_model.set_weights(math_model.get_weights())
                if weights_file is not None:
                    print(f"Saving test-model weights to {weights_file}")
                    test_model.save_weights(weights_file)
                    print("Done")
                print(f"Checking {reweight_size} datapoints for accuracy...")
                math_data.check_results(test_model, samples=reweight_size, vector=vector, positional_encoding=positional_encoding, valid_ops=valid_ops, verbose=False)
            else:
                if model_path is not None:
                    print("Saving math-model")
                    math_model.save(model_path)
                    print("Done")
                print(f"Checking {reweight_size} datapoints for accuracy...")
                math_data.check_results(math_model, samples=reweight_size, vector=vector, positional_encoding=positional_encoding, valid_ops=valid_ops, verbose=False)
        if interrupted>0:
            break

In [None]:
ml_env=MLEnv(platform='tf', accelerator='fastest')
desc=ml_env.describe()
print(desc)

alu_bit_count = 15
math_data=ALU_Dataset(bit_count = alu_bit_count)

In [None]:
model_variants = {"res_mod": model_res_mod,
                  }

model_variant = 'res_mod'  # see: model_variants definition.
epochs_per_cycle = 100
cycles = 100  # perform 100 (meta-)cycles, each cycle trains with epochs_per_cycle epochs.
regenerate_data_after_cycles = 0  # if !=0, the training data will be created anew after each number of 
                                  # regenerace_data_after_cycles cycles. Disadvantage: when training TPU, 
                                  # Google might use the time it takes to regenerate to training data to 
                                  # terminate your session :-/
low_resource = False

if low_resource is True:
    samples = 100000  # Number training data examples. 
                    # WARNING: TPU simply crashes, if 2GB limit for entire set is reached.
                    # Possible solutions: https://www.tensorflow.org/api_docs/python/tf/data/experimental/service#running_the_tfdata_service,
                    # https://www.tensorflow.org/api_docs/python/tf/data/experimental/service , https://github.com/tensorflow/models/blob/master/official/recommendation/ncf_input_pipeline.py#L33
    validation_samples=10000
else:
    samples = 4000000  # Number training data examples. 
                    # WARNING: TPU simply crashes, if 2GB limit for entire set is reached.
                    # Possible solutions: https://www.tensorflow.org/api_docs/python/tf/data/experimental/service#running_the_tfdata_service,
                    # https://www.tensorflow.org/api_docs/python/tf/data/experimental/service , https://github.com/tensorflow/models/blob/master/official/recommendation/ncf_input_pipeline.py#L33
    validation_samples=100000
    
if low_resource is True:
    batch_size = 2000
else:
    batch_size = 20000
import_weights=True
if import_weights is False:
    print("WARNING: import weights is set to False!")
valid_ops = None  # Default: None (all ops), or list of ops, e.g. ['*', '/'] trains only multiplication and division.
# valid_ops = ['*','/','+','-']
# valid_ops = ['*']
steps_per_epoch = 2 # ?! # samples // batch_size  # TPU stuff
validation_steps= validation_samples // batch_size  # again TPU only
use_validation_with_tpu = False  # Is somehow really, really slow

params_res_mod={
    "self_attention_layers": 4,
    "self_attention_heads": 16,
    "self_attention_units": None,  # Uses the dimension of the embedding (16/19)
    "norm": "layernorm",    # "layernorm" or "softmax" (default), used in self-attention layer.
    "layers": 2,  # Residual fully conected layers
    "units": 128, # Residual units
    "learning_rate": 0.001,
    "regularizer": 1e-9
    }

params=params_res_mod
vector = True
positional_encoding = True

if vector is True:
    if positional_encoding is True:
        params['input_size'] = [3, math_data.embedding_size+3]
    else:
        params['input_size'] = [3, math_data.embedding_size]
else:
    params['input_size'] = math_data.input_size
params['output_size'] = math_data.output_size

In [None]:
root_path, project_path, model_path, data_path, log_path = ml_env.init_paths(project_name="ALU_Net", model_name="math_model")

In [None]:
apply_model_tuner = False   # Use GPU (not TPU!) for model_tuner.

In [None]:
# not YET supported:
if apply_model_tuner is True:
    as_train, as_val = math_data.get_datasets(samples=500000, validation_samples=50000, vector=vector, cache_path=cache_path)

    def tuner_eval(ml_env:MLEnv, model_variant, params, batch_size, epochs):
        math_model, _ = instantiate_models(ml_env, model_variant, params, save_path=None, import_weights=False)
        interrupted, hist = math_train(ml_env, math_model, as_train, as_val, batch_size=batch_size, epochs=epochs)
        print(params, end=" [ ")
        res = math_data.check_results(math_model, samples=100, valid_ops=valid_ops, verbose=False)
        ev = 1/hist.history['val_loss'][-1]+hist.history['val_accuracy'][-1]*20
        if res>0:
            print("Success-rate: {res}")
            ev += res*5000
        return ev

    tuner_eval_func = lambda params : tuner_eval(ml_env, model_variant, params, batch_size=batch_size, epochs=20)
    ml_tuner = MLTuner(ml_env, model_variant)

    param_space_minimal_prm={
    "dense_layers": [4,8,12],
    "dense_neurons":[256,512,768], 
    "learning_rate": [0.001,0.002],
    "regu1": [1e-8,1e-7]
    }

    best_params = ml_tuner.tune(param_space_minimal_prm, tuner_eval_func)
    params = best_params
    import_weights=False

In [None]:
params

In [None]:
create_train_val_data = lambda regen : math_data.get_datasets(pre_weight=False, samples=samples, validation_samples=validation_samples, batch_size=batch_size, 
                                     vector=vector, positional_encoding=positional_encoding, valid_ops=valid_ops, cache_path=data_path, use_cache=True, regenerate_cached_data=regen)
create_train_val_data_regen = lambda : create_train_val_data(True)
train, val = create_train_val_data(False)

In [None]:
# !pip install tensorflow_datasets
# import tensorflow_datasets as tdfs
# sa=SelfAttention()
# nval=tdfs.as_numpy(val)
# for n in nval:
#     print(n[0].shape)   
#     print(sa(n[0]).shape)
#     break

In [None]:
if ml_env.is_tpu is True:
    weights_file=os.path.join(model_path, f"{model_variant}.h5")
    save_path = weights_file
else:
    weights_file=os.path.join(model_path, f"{model_variant}.h5")
    save_path=os.path.join(model_path, model_variant)

In [None]:
math_model, test_model = instantiate_models(ml_env, model_variant, params, save_path=save_path, weights_file=weights_file)
# math_model, test_model = instantiate_models(ml_env, model_variant, params, save_path=None, import_weights=False)

In [None]:
try:
    # use the python variable log_path:
    get_ipython().run_line_magic('tensorboard', '--logdir "{log_path}"')
except:
    pass

In [None]:
do_training(ml_env, math_model, train, val, math_data, epochs_per_cycle, model_path=model_path, 
            weights_file=weights_file, test_model=test_model, cycles=cycles, steps_per_epoch=steps_per_epoch, valid_ops=valid_ops, 
            regenerate_data_after_cycles=regenerate_data_after_cycles, data_func=create_train_val_data_regen, log_path=log_path)

# Testing and applying the trained model

In [None]:
if ml_env.is_tpu is False:
    test_model = math_model
math_data.check_results(test_model, samples=100, vector=vector, positional_encoding=positional_encoding, valid_ops=valid_ops, verbose=True)

In [None]:
dx,dy,_,_,_=math_data.create_data_point(22,33,'*'); print(22*33)

In [None]:
r=test_model.predict(np.array([dx]))
print(r)
math_data.decode_results(r)

In [None]:
def calc(inp):
    args=inp.split(' ')
    if len(args)!=3:
        print("need three space separated tokens: <int> <operator> <int>, e.g. '3 + 4' or '4 XOR 5'")
        return False
    if args[1] not in math_data.model_ops:
        print(f"{args[1]} is not a known operator.")
        return False
    op1=int(args[0])
    op2=int(args[2])
    dx,dy,_,_,_=math_data.create_data_point(op1, op2, args[1], vector=vector, positional_encoding=positional_encoding)
    ans=math_data.decode_results(test_model.predict(np.array([dx])))
    print(f"{op1} {args[1]} {op2} = {ans[0]}")
    op=f"{op1} {args[1]} {op2}"
    op=op.replace('AND', '&').replace('XOR','^').replace('=','==').replace('OR','|')
    an2=eval(op)
    if ans[0]!=an2:
        print("Error")
        print(bin(ans[0]))
        print(bin(an2))
    return ans[0],an2

In [None]:
calc("22 * 33")

In [None]:
calc("1 = 1")

In [None]:
calc("3 * 4")

In [None]:
calc ("1 AND 3")