In [2]:
import tensorflow_decision_forests as tfdf
import tensorflow as tf
import numpy as np
import pandas as pd
from datetime import datetime

import re

tf.experimental.numpy.experimental_enable_numpy_behavior()

2024-03-03 20:14:54.431820: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-03 20:14:54.432027: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-03 20:14:54.460810: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-03 20:14:54.539670: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
data = pd.read_csv(
    "./1-99 plus a and b.csv",
    names=["Code", "Assembly"])

start_char = "Ø"
end_char = "⁂"
numerical_char = "✦"

# Constrain data to constants or functions on a single variable, using the variable once
data["Code"] = data["Code"].apply(lambda x: x.replace("int func(int a, int b)","int func()"))
# Fix function headers
data["Code"] = data["Code"].apply(lambda x: x.replace("int func()","int func()") if re.search(r' a |a;',x) else x)
# Add spaces around punctuation
data["Code"] = data["Code"].apply(lambda x: re.sub(r'([\{\};\(\)\,])', r' \1 ', x))
# normalize variable name to "variable"
data["Code"] = data["Code"].apply(lambda x: x.replace(" a "," variable_a "))
data = data[~data["Code"].str.contains(" b ")]
#data["Code"] = data["Code"].apply(lambda x: x.replace(" b "," variable_b "))
# Whitelist certain operators from the training set
data = data[data["Code"].str.contains(r' \+ | \- | \* ', regex=True)]
# pull digits for training
data["Code Digits"] = data["Code"].apply(lambda x: re.findall(r'\d+', x))
# Remove features present in every program. There is not enough data for the model to understand what these features should mean
data["Code"] = data["Code"].apply(lambda x: re.sub(r' func| \{| \}| \(| \)|int| ;| return',"",x))

# Uses heuristics to create the operator lookup table
data["Operator"] = data["Code"].apply(lambda x: re.findall(r' [\+\-%*\/] ',x)[0])
# Creates the lookup table from the Code templates and the processed assembly
operator_lookup = data["Operator"].drop_duplicates().values.tolist()
# Gets the operator index for each code sample
data["Mapped Operator"] = data["Operator"].apply(lambda x: operator_lookup.index(x))

# Strip the excess
data["Code"] = data["Code"].apply(lambda x: x.replace("variable variable","variable"))
# Adds positional data to the encodings
def add_positioning_to_tokens(code):
    tokens = code.split()
    for i in range(len(tokens)):
        tokens[i] = f"{i}_{tokens[i]}"
    return " ".join(tokens)
data["Code"] = data["Code"].apply(lambda x: add_positioning_to_tokens(x))

# Convert assembly to "templates" which don't contain constant numbers derived from the code.
# This vastly reduces the number of possible outputs for a given code line.
# The model will manually fill in the template using data from the code after it has compiled it
r_assembly_digit = r'(?<= )[\-]?\d+'
data["Assembly Digits"] = data["Assembly"].apply(lambda x: re.findall(r_assembly_digit, x))
data["Assembly Templates"] = data["Assembly"].apply(lambda x: re.sub(r_assembly_digit, numerical_char, x)) + f"\n{end_char}"

data

Unnamed: 0,Code,Assembly,Code Digits,Operator,Mapped Operator,Assembly Digits,Assembly Templates
0,0_1 1_+ 2_1,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...","[1, 1]",+,0,[2],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
1,0_1 1_- 2_1,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...","[1, 1]",-,1,[0],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
2,0_1 1_* 2_1,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...","[1, 1]",*,2,[1],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
5,0_1 1_+ 2_2,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...","[1, 2]",+,0,[3],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
6,0_1 1_- 2_2,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...","[1, 2]",-,1,[-1],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
...,...,...,...,...,...,...,...
50484,0_variable_a 1_- 2_99,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...",[99],-,1,[99],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
50485,0_variable_a 1_* 2_99,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...",[99],*,2,[99],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
50488,0_variable_a 1_+ 2_variable_a,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...",[],+,0,[],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
50489,0_variable_a 1_- 2_variable_a,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D...",[],-,1,[0],"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."


In [4]:
gen_data = pd.concat([data["Code"],data["Assembly Templates"]],axis=1)
gen_data = gen_data.drop_duplicates()
gen_data = gen_data.reset_index(drop=True)

gen_data

Unnamed: 0,Code,Assembly Templates
0,0_1 1_+ 2_1,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
1,0_1 1_- 2_1,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
2,0_1 1_* 2_1,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
3,0_1 1_+ 2_2,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
4,0_1 1_- 2_2,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
...,...,...
29995,0_variable_a 1_- 2_99,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
29996,0_variable_a 1_* 2_99,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
29997,0_variable_a 1_+ 2_variable_a,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."
29998,0_variable_a 1_- 2_variable_a,"func(int, int):\npush rbp\nmov rbp, rsp\nmov D..."


In [5]:
set_length = 0
for a in gen_data["Assembly Templates"]:
    set_length += len(a.split("\n"))

code_context = np.empty(shape=(set_length),dtype=object)
gen_context = np.empty(shape=(set_length),dtype=object)
labels = np.empty(shape=(set_length),dtype=int)

assembly_lookup = []

data_i = 0
for ri, row in gen_data.iterrows():
    assembly = row["Assembly Templates"]
    code = row["Code"]

    tokenized_code = code.split()
    tokenized = assembly.split("\n")

    for ti in range(len(tokenized)):
        if (tokenized[ti] != end_char):
            tokenized[ti] = f"{ti}_{tokenized[ti]}"

        t = tokenized[ti]

        if (not (t in assembly_lookup)):
            assembly_lookup += [t]

        code_context[data_i] = code
        gen_context[data_i] = "\n".join(tokenized[:ti])
        labels[data_i] = assembly_lookup.index(tokenized[ti])
        data_i += 1

assembly_dict = {}
for i in range(len(gen_context)):
    assembly = gen_context[i]
    for assembly_line in assembly.split("\n"):
        if assembly_line in assembly_dict:
            assembly_dict[assembly_line]["count"] += 1
        else:
            assembly_dict[assembly_line] = {"count": 1, "index": i}

print(assembly_dict)

for l in assembly_dict.keys():
    item = assembly_dict[l]
    c = item["count"]
    if c <= 5:
        code_context = np.append(code_context, np.array(code_context[item["index"]] * 10))
        gen_context = np.append(gen_context, np.array(gen_context[item["index"]] * 10))
        labels = np.append(labels, np.array(labels[item["index"]] * 10))
        print("Warning: " + l + " only has " + str(assembly_dict[l]["count"]) + " instances; duplicating so it is not considered OoV")

gen_dataset = tf.data.Dataset.from_tensor_slices(({"code": code_context, "assembly": gen_context},labels)).batch(1000)

{'': {'count': 30000, 'index': 0}, '0_func(int, int):': {'count': 240786, 'index': 1}, '1_push rbp': {'count': 210786, 'index': 2}, '2_mov rbp, rsp': {'count': 180786, 'index': 3}, '3_mov DWORD PTR [rbp-4], edi': {'count': 150786, 'index': 4}, '4_mov DWORD PTR [rbp-8], esi': {'count': 120786, 'index': 5}, '5_mov eax, ✦': {'count': 88608, 'index': 6}, '6_pop rbp': {'count': 58812, 'index': 7}, '7_ret': {'count': 29406, 'index': 8}, '5_mov eax, DWORD PTR [rbp-4]': {'count': 1730, 'index': 2679}, '6_add eax, ✦': {'count': 594, 'index': 2680}, '7_pop rbp': {'count': 1060, 'index': 2681}, '8_ret': {'count': 530, 'index': 2682}, '6_sub eax, DWORD PTR [rbp-4]': {'count': 297, 'index': 2690}, '6_add eax, eax': {'count': 9, 'index': 5402}, '5_mov edx, DWORD PTR [rbp-4]': {'count': 448, 'index': 8104}, '6_mov eax, edx': {'count': 384, 'index': 8105}, '7_add eax, eax': {'count': 82, 'index': 8106}, '8_add eax, edx': {'count': 232, 'index': 8107}, '9_pop rbp': {'count': 40, 'index': 8108}, '10_ret

2024-03-03 20:15:18.925853: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:3b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 20:15:19.326342: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:3b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 20:15:19.326409: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:3b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 20:15:19.330168: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:3b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 20:15:19.330296: I external/local_xla/xla/stream_executor

In [6]:
def prepare_dataset(features, labels):
  features = {"code": tf.strings.split(features["code"]),"assembly": tf.strings.split(features["assembly"],sep="\n")}
  return features, labels

gen_dataset = gen_dataset.map(prepare_dataset)

In [7]:
#tuner = tfdf.tuner.RandomSearch(num_trials=50, use_predefined_hps=True)
# We want to overfit, since this is a compilation problem and we are training on all the inputs.
gen_model = tfdf.keras.GradientBoostedTreesModel(validation_ratio=0.0)
gen_model.fit(gen_dataset, verbose=2)

Use /tmp/tmp8dtpdiqs as temporary training directory
Reading training dataset...




Training tensor examples:
Features: {'code': tf.RaggedTensor(values=Tensor("data:0", shape=(None,), dtype=string), row_splits=Tensor("data_1:0", shape=(None,), dtype=int64)), 'assembly': tf.RaggedTensor(values=Tensor("data_2:0", shape=(None,), dtype=string), row_splits=Tensor("data_3:0", shape=(None,), dtype=int64))}
Label: Tensor("data_4:0", shape=(None,), dtype=int64)
Weights: None
Normalized tensor features:
 {'code': SemanticTensor(semantic=<Semantic.CATEGORICAL_SET: 4>, tensor=tf.RaggedTensor(values=Tensor("data:0", shape=(None,), dtype=string), row_splits=Tensor("data_1:0", shape=(None,), dtype=int64))), 'assembly': SemanticTensor(semantic=<Semantic.CATEGORICAL_SET: 4>, tensor=tf.RaggedTensor(values=Tensor("data_2:0", shape=(None,), dtype=string), row_splits=Tensor("data_3:0", shape=(None,), dtype=int64)))}
Training dataset read in 0:00:05.323976. Found 270787 examples.
Training model...
Standard output detected as not visible to the user e.g. running in a notebook. Creating a tr

[INFO 24-03-03 20:15:33.8643 EST kernel.cc:771] Start Yggdrasil model training
[INFO 24-03-03 20:15:33.8643 EST kernel.cc:772] Collect training examples
[INFO 24-03-03 20:15:33.8644 EST kernel.cc:785] Dataspec guide:
column_guides {
  column_name_pattern: "^__LABEL$"
  type: CATEGORICAL
  categorial {
    min_vocab_frequency: 0
    max_vocab_count: -1
  }
}
default_column_guide {
  categorial {
    max_vocab_count: 2000
  }
  discretized_numerical {
    maximum_num_bins: 255
  }
}
ignore_columns_without_guides: false
detect_numerical_as_discretized_numerical: false

[INFO 24-03-03 20:15:33.8664 EST kernel.cc:391] Number of batches: 271
[INFO 24-03-03 20:15:33.8664 EST kernel.cc:392] Number of examples: 270787
[INFO 24-03-03 20:15:33.9994 EST data_spec_inference.cc:305] 1 item(s) have been pruned (i.e. they are considered out of dictionary) for the column assembly (35 item(s) left) because min_value_count=5 and max_number_of_unique_values=2000
[INFO 24-03-03 20:15:34.1539 EST kernel.cc:

Model trained in 3:02:09.775624
Compiling model...
Model compiled.


<keras.src.callbacks.History at 0x7f37415cd900>

In [8]:
def generate_line(code, context = ""):
    return np.argmax(gen_model({"code":tf.strings.split([code]), "assembly":tf.strings.split([context],sep="\n")})[0])

def generate_template(code, sanity=50):
    code = re.sub(r'([\{\};\(\)\,])', r' \1 ', code)
    interim = ""
    while not interim.endswith(end_char + "\n") and sanity > 0:
        interim += assembly_lookup[generate_line(code,interim)] + "\n"
        sanity -= 1
    return interim

In [9]:
pf = pd.concat([data["Code"],data["Mapped Operator"]],axis=1)
pf = pf.rename(columns={"Code": "features", "Mapped Operator": "labels"})
tf_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(pf, label="labels")

def prepare_dataset(features, labels):
  features = {"features": tf.strings.split(features["features"])}
  return features, labels

tf_dataset = tf_dataset.map(prepare_dataset)

op_model = tfdf.keras.RandomForestModel(num_trees=50,verbose=2)
op_history = op_model.fit(tf_dataset)

Use 8 thread(s) for training
Use /tmp/tmpux8ytm50 as temporary training directory
Reading training dataset...
Training tensor examples:
Features: {'features': tf.RaggedTensor(values=Tensor("data:0", shape=(None,), dtype=string), row_splits=Tensor("data_1:0", shape=(None,), dtype=int64))}
Label: Tensor("data_2:0", shape=(None,), dtype=int64)
Weights: None
Normalized tensor features:
 {'features': SemanticTensor(semantic=<Semantic.CATEGORICAL_SET: 4>, tensor=tf.RaggedTensor(values=Tensor("data:0", shape=(None,), dtype=string), row_splits=Tensor("data_1:0", shape=(None,), dtype=int64)))}
Training dataset read in 0:00:00.296164. Found 30000 examples.
Training model...


[INFO 24-03-03 23:18:26.0009 EST kernel.cc:771] Start Yggdrasil model training
[INFO 24-03-03 23:18:26.0009 EST kernel.cc:772] Collect training examples
[INFO 24-03-03 23:18:26.0011 EST kernel.cc:785] Dataspec guide:
column_guides {
  column_name_pattern: "^__LABEL$"
  type: CATEGORICAL
  categorial {
    min_vocab_frequency: 0
    max_vocab_count: -1
  }
}
default_column_guide {
  categorial {
    max_vocab_count: 2000
  }
  discretized_numerical {
    maximum_num_bins: 255
  }
}
ignore_columns_without_guides: false
detect_numerical_as_discretized_numerical: false

[INFO 24-03-03 23:18:26.0035 EST kernel.cc:391] Number of batches: 30
[INFO 24-03-03 23:18:26.0035 EST kernel.cc:392] Number of examples: 30000
[INFO 24-03-03 23:18:26.0125 EST kernel.cc:792] Training dataset:
Number of records: 30000
Number of columns: 2

Number of columns by type:
	CATEGORICAL_SET: 1 (50%)
	CATEGORICAL: 1 (50%)

Columns:

CATEGORICAL_SET: 1 (50%)
	1: "features" CATEGORICAL_SET has-dict vocab-size:204 zero

Model trained in 0:00:01.238422
Compiling model...
Model compiled.


In [10]:
def classify_operator(code):
    return np.argmax(op_model.call({"features": tf.strings.split([code])})[0])

In [11]:
### OPTIMIZATION ###

### Here, it generates a model per operator. These train off the data as well
### This number is data dependent

operator_models = [None] * len(operator_lookup)

early_stopping = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

for ri in range(len(operator_lookup)):

    operator_models[ri] = tf.keras.Sequential([
        tf.keras.layers.Dense(256),
        tf.keras.layers.Dense(256),
        tf.keras.layers.Dense(256),
        tf.keras.layers.Dense(3)
    ])
    operator_models[ri].compile(loss="mse",optimizer="adam")

    relevant_data = data[data["Mapped Operator"] == ri]

    relevant_data["Code Digits"] = relevant_data["Code Digits"].apply(lambda x: x + ([0] * (3-len(x))))
    relevant_data["Assembly Digits"] = relevant_data["Assembly Digits"].apply(lambda x: x + ([0] * (3-len(x))))

    inputs = np.stack(relevant_data["Code Digits"].to_numpy()).astype(int)
    outputs = np.stack(relevant_data["Assembly Digits"].to_numpy()).astype(int)

    operator_models[ri].fit(x=inputs, y=outputs, validation_split=0.1, epochs=100, callbacks=[early_stopping])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_data["Code Digits"] = relevant_data["Code Digits"].apply(lambda x: x + ([0] * (3-len(x))))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_data["Assembly Digits"] = relevant_data["Assembly Digits"].apply(lambda x: x + ([0] * (3-len(x))))


Epoch 1/100


2024-03-03 23:18:41.083661: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f37484cb7d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-03 23:18:41.084070: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1050, Compute Capability 6.1
2024-03-03 23:18:41.231428: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-03-03 23:18:41.430309: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
I0000 00:00:1709525921.698469  247903 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 1/100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_data["Code Digits"] = relevant_data["Code Digits"].apply(lambda x: x + ([0] * (3-len(x))))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_data["Assembly Digits"] = relevant_data["Assembly Digits"].apply(lambda x: x + ([0] * (3-len(x))))


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 1/100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_data["Code Digits"] = relevant_data["Code Digits"].apply(lambda x: x + ([0] * (3-len(x))))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_data["Assembly Digits"] = relevant_data["Assembly Digits"].apply(lambda x: x + ([0] * (3-len(x))))


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


In [12]:
def compile_numbers(code_numbers, operator):
    output = operator_models[operator](np.array([code_numbers]).astype(int))[0][0]
    return round(output)

In [13]:
def splice_numbers_into_assembly(template,digit):
    return template.replace(numerical_char,str(digit.numpy().astype(int)))

In [14]:
def remove_prefixes(template):
    lines = template.split('\n')
    lines = [(line.split('_')[1] if ("_" in line) else line) for line in lines]
    return "\n".join(lines)

In [15]:
def compile_code(code):
    digits = re.findall(r'\d+', code)
    digits = digits + ([0] * (3 - len(digits)))

    tokens = code.split()
    for i in range(len(tokens)):
        tokens[i] = f"{i}_{tokens[i]}"
    code = " ".join(tokens)

    operator_n = classify_operator(code)
    final_digit = compile_numbers(digits,operator_n)
    assembly_template = generate_template(code)
    spliced_assembly = splice_numbers_into_assembly(assembly_template, final_digit)
    return remove_prefixes(spliced_assembly)

In [56]:
print(compile_code("variable_a * 1"))

push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
mov DWORD PTR [rbp-8], esi
mov eax, DWORD PTR [rbp-4]
pop rbp
ret
⁂

