In [1]:
import json
import time

import numpy as np
import pandas as pd
import tensorflow as tf

from dataset import build_dataset
from integerembeddings.integer_embedding import IntegerSequenceEmbeddingPreprocessor
from mets.function_definitions import get_function_names
from model import create_token_guesser_model
from vocab import Vocab

In [2]:
# Define vocabulary and mapping
vocabulary = Vocab(get_function_names())

In [3]:
# Instantiate preprocessor once due to internal model loading
preprocessor = IntegerSequenceEmbeddingPreprocessor(load_model=False)

In [4]:
# Build model
model = create_token_guesser_model(vocabulary_size=vocabulary.size,
                                   num_embedding_seq_layers=2, embedding_seq_dim=32,
                                   num_embedding_feed_forward_layers=2, embedding_feed_forward_dim=32,
                                   num_sequence_encoding_seq_layers=2, sequence_encoding_seq_dim=32,
                                   encoder_output_dim=128,
                                   num_decoding_feed_forward_layers=2, decoding_feed_forward_dim=32)
model.build(input_shape=(None,))
model.summary()

# Compile model
model.compile(optimizer="adam", loss="binary_crossentropy")

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_inputs (InputLayer)    [(None, None, None)  0           []                               
                                ]                                                                 
                                                                                                  
 tf.expand_dims (TFOpLambda)    (None, None, None,   0           ['encoder_inputs[0][0]']         
                                1)                                                                
                                                                                                  
 time_distributed (TimeDistribu  (None, None, None,   0          ['tf.expand_dims[0][0]']         
 ted)                           1)                                                            

In [5]:
# Training Parameters
max_curriculums = 10
training_dataset_size = 1000
advancement_attempts = 5
epochs = 10

In [6]:
# Setup
curriculums = [(i + 1) / max_curriculums for i in range(max_curriculums)]
recorded_training_data = {curriculum: [] for curriculum in curriculums}
iteration = 0

In [7]:
# Curriculum Training
for i, curriculum in enumerate(curriculums, start=1):
    time.sleep(0.2)  # helps everything chill for some reason

    # Perform an initial large dose of training

    # Build Training Dataset
    desired_sizes = [training_dataset_size] * max_curriculums
    complexities = curriculums
    print(f">> Building initial dataset...", end=" ")
    X_train, y_train, X_train_mask = build_dataset(desired_sizes, complexities, vocabulary, preprocessor)
    print("done.")

    # Fit model
    print(f">> Initial training...", end=" ")
    history = model.fit(x=(X_train, X_train_mask), y=y_train, epochs=epochs, batch_size=256, verbose=0)
    print("done.")

    best_loss = np.mean(history.history["loss"])

    # Allow the opportunity to further improve before advancing
    count = advancement_attempts
    while count > 0:
        # Build Training Dataset
        desired_sizes = [training_dataset_size // i] * i
        complexities = curriculums[:i]
        print(f">> Building dataset...", end=" ")
        X_train, y_train, X_train_mask = build_dataset(desired_sizes, complexities, vocabulary, preprocessor)
        print("done.")

        # Fit model
        print(f">> Training...", end=" ")
        history = model.fit(x=(X_train, X_train_mask), y=y_train, epochs=epochs, batch_size=256, verbose=0)
        print("done.")

        # Quick Prediction
        test_complexities = [curriculum]
        X_test, y_test, X_test_mask = build_dataset([1] * len(test_complexities), test_complexities, vocabulary,
                                                    preprocessor)
        y_pred = model.predict(x=(X_test, X_test_mask), verbose=0, batch_size=256)

        print(f">> Predictions Sample @ Complexity(s)={test_complexities}")
        for row_true, row_pred in zip(y_test, y_pred):
            y_true_tokens = {}
            y_pred_tokens = {}
            for token_id, (count_true, count_pred) in enumerate(zip(row_true, row_pred)):
                count_true = int(count_true)
                count_pred = float(count_pred)
                y_true_tokens[token_id] = count_true
                y_pred_tokens[token_id] = count_pred

            column_names = [vocabulary[id_] for id_ in y_true_tokens.keys()]
            df = pd.DataFrame([y_true_tokens, y_pred_tokens], index=["True Values", "Predicted Values"])
            df.columns = column_names
            print(df)

        # Record data
        loss = history.history["loss"]
        min_loss, mean_loss, max_loss = min(loss), np.mean(loss), max(loss)
        recorded_training_data[curriculum].append(
            (iteration, {
                "loss": (min_loss, mean_loss, max_loss),
            })
        )

        # Advancement logic
        if mean_loss < best_loss:
            best_loss = mean_loss
            count = advancement_attempts
            # Save model
            model_path = f"models/best_model_curriculum_{i}.h5"
            model.save(model_path)
            print(f">> Saved improved model to {model_path}")
        else:
            count -= 1

        iteration += 1

        with open("training_data.json", "w") as json_file:
            json.dump(recorded_training_data, json_file)

        print(f">> ({count} remaining) Mean Loss: {mean_loss:.3f}, Best Loss: {best_loss:.3f}")

    print(f">> Advanced Past Curriculum: {curriculum}")

>> Building initial dataset... done.
>> Initial training... done.
>> Building dataset... done.
>> Training... done.
>> Predictions Sample @ Complexity(s)=[0.1]
                       ADD       SUB      MULT  FLOOR_DIV
True Values       0.000000  1.000000  0.000000   0.000000
Predicted Values  0.009402  0.999964  0.001818   0.016339
>> Saved improved model to models/best_model_curriculum_1.h5
>> (5 remaining) Mean Loss: 0.388, Best Loss: 0.388
>> Building dataset... done.
>> Training... done.
>> Predictions Sample @ Complexity(s)=[0.1]
                       ADD       SUB      MULT  FLOOR_DIV
True Values       0.000000  0.000000  0.000000   1.000000
Predicted Values  0.446812  0.062177  0.104443   0.865737
>> Saved improved model to models/best_model_curriculum_1.h5
>> (5 remaining) Mean Loss: 0.129, Best Loss: 0.129
>> Building dataset... done.
>> Training... done.
>> Predictions Sample @ Complexity(s)=[0.1]
                       ADD       SUB      MULT  FLOOR_DIV
True Values       0.

# Popualar Integer Equations

In [16]:
eq1 = ["FLOOR_DIV", "MULT", "N", "ADD", "N", "1", "2"]  # n * (n + 1) // 2 ; triangular numbers
eq2 = ["ADD", "MULT", "2", "N", "1"]  # 2n + 1 ; odd numbers
eq3 = ["MULT", "N", "N"]  # n * 2 ; square numbers

In [17]:
model_demonstration = tf.keras.models.load_model("models/best_model_curriculum_10.h5")

In [18]:
X_demonstration_1 = [[0, 1, 3, 6, 10, 15, 21, 28],
                     [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23],
                     [0, 1, 4, 9, 16, 25, 36]]

In [19]:
X_demonstration_1_padded, X_demonstration_1_mask = preprocessor.preprocess(X_demonstration_1, dynamic_training=True)

In [20]:
model_demonstration.predict([X_demonstration_1_padded, X_demonstration_1_mask])



array([[0.24458337, 0.24582638, 0.9978314 , 0.15597743],
       [0.98345053, 0.24479996, 0.13787568, 0.11025475],
       [0.3476571 , 0.25062287, 0.9999585 , 0.1379164 ]], dtype=float32)

In [21]:
X_demonstration_2 = [[0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78],
                     [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23],
                     [0, 1, 4, 9, 16, 25, 36]]

In [22]:
X_demonstration_2_padded, X_demonstration_2_mask = preprocessor.preprocess(X_demonstration_2, dynamic_training=True)

In [23]:
model_demonstration.predict([X_demonstration_2_padded, X_demonstration_2_mask])



array([[0.7515641 , 0.53577894, 0.99555355, 0.9884969 ],
       [0.98345053, 0.24479996, 0.13787568, 0.11025475],
       [0.3476571 , 0.25062287, 0.9999585 , 0.1379164 ]], dtype=float32)

### Formula Predictions Analysis for Small Data Entries

**Correct Predictions:**
- **Triangular Numbers**: Identifies multiplication
- **Odd Numbers**: Identifies addition
- **Square Numbers**: Identifies multiplication

**Failed Predictions:**
- **Triangular Numbers**: 
  - **Missed**: Slight addition
  - **Missed**: Floor division
- **Odd Numbers**:
  - **Missed**: Multiplication is necessary for the computation

### Expanded Data Predictions

With a larger dataset, the prediction for **Triangular Numbers** improves:
- **Now Predicts**: Multiplication, addition, and floor division
- **Incorrectly Predicts**: Subtraction, which does not apply

### Observations on Odd Numbers Formula

The model's strong prediction of addition for **Odd Numbers** might be influenced by an assumption of $ n + n $ rather than the actual $ 2 \times n $. This suggests the model could be misinterpreting the multiplication of two as repetitive addition.
