This notebook provides the terminal commands needed to train and load the models described in Section 4, as well as the encoder-decoder model for predicting a2 described in Section 7.3. The trained models are available at ['Trained transformer models for predicting traces of Frobenius of elliptic curves with conductor up to 10^6'](https://zenodo.org/records/15849326).  The commands should be run from the Int2Int directory.

In [2]:
# Training command 

# The following are specific to the trained models linked above.
sizes={2:7*10**5, 3:12*10**5, 97:35*10**5}
ranges={2:5, 3:7, 97:39}
basep={2:20, 3:20, 97:39}


p = 97  # Choose a prime number. The trained models contain p = 2, 3 and 97
epoch_size=sizes[p]
rangep=ranges[p]
base=basep[p]

exp_name = f'ap_to_a{p}'  # Define experiment name 

# More general parameters
python_name = 'python'  # Use your system's Python command (e.g., 'python', 'python3', or 'py')

architecture = 'encoder_only' #Set to "encoder-decoder" to include decoder layers
n_enc_layers = 4
n_dec_layers = 1

exp_id_train = 1  # Set your experiment ID for training

num_workers = 1  # Number of CPU workers

data_folder = 'data/'  # Folder containing the training and test data

if architecture=='encoder_only':
    train_cmd = f'{python_name} train.py --num_workers {num_workers} --dump_path scratch --exp_name {exp_name} --exp_id {exp_id_train} \
    --train_data {data_folder}/{exp_name}_train.txt --eval_data {data_folder}/{exp_name}_test.txt \
    --operation data --data_types int[24]:range({rangep}) --env_base_seed 42 --architecture {architecture} n_enc_layers {n_enc_layers}\
    --epoch_size {epoch_size} --optimizer adam,lr=0.00003 --eval_size 10000 --batch_size_eval 10000  --base {basep} --max_epoch 201'
else:
    train_cmd = f'{python_name} train.py --num_workers {num_workers} --dump_path scratch --exp_name {exp_name} --exp_id {exp_id_train} \
    --train_data {data_folder}/{exp_name}_train.txt --eval_data {data_folder}/{exp_name}_test.txt \
    --operation data --data_types int[24]:range({rangep}) --env_base_seed 42 --architecture {architecture} --n_dec_layers  {n_dec_layers}\
    --epoch_size {epoch_size} --optimizer adam,lr=0.00003 --eval_size 10000 --batch_size_eval 10000  --base {basep} --max_epoch 201'

In [None]:
# Load and evaluate the models

exp_id_load = "good_red"  # Set your experiment ID for loading. For the models linked above, the IDs are good_red for the encoder-only models, and good_red_1declay for the encoder-decoder model.

checkpoint_name = "periodic-200" # Name of the checkpoint. Usually "checkpoint", but if the periodic save was used in training, it can be e.g. "periodic-100"

eval_verbose = True # Pick True to export detailed results of evaluation to scratch/{exp_name}/{exp_id_load}, and False otherwise.

if architecture=='encoder_only':
    load_cmd = f'{python_name} train.py --num_workers {num_workers} --dump_path scratch --exp_name {exp_name} --exp_id {exp_id_load} \
    --reload_model scratch/{exp_name}/{exp_id_train}/{checkpoint_name}.pth --reload_checkpoint scratch/{exp_name}/{exp_id_train}/{checkpoint_name}.pth \
    --eval_only True eval_verbose {eval_verbose} --eval_data {data_folder}/{exp_name}_test.txt \
    --operation data --data_types int[24]:range({rangep}) --env_base_seed 42 --architecture {architecture} n_enc_layers {n_enc_layers}\
    --epoch_size {epoch_size} --optimizer adam,lr=0.00003 --eval_size 10000 --batch_size_eval 10000  --base {basep} --max_epoch 201'
else:
    load_cmd = f'{python_name} train.py --num_workers {num_workers} --dump_path scratch --exp_name {exp_name} --exp_id {exp_id_load} \
    --reload_model scratch/{exp_name}/{exp_id_train}/{checkpoint_name}.pth --reload_checkpoint scratch/{exp_name}/{exp_id_train}/{checkpoint_name}.pth \
    --eval_only True eval_verbose {eval_verbose} --eval_data {data_folder}/{exp_name}_test.txt \
    --operation data --data_types int[24]:range({rangep}) --env_base_seed 42 --architecture {architecture} --n_dec_layers  {n_dec_layers}\
    --epoch_size {epoch_size} --optimizer adam,lr=0.00003 --eval_size 10000 --batch_size_eval 10000  --base {basep} --max_epoch 201'