In [3]:
# Encoding the FlexiBERT structure to integer vectors

import numpy as np
import sys

def encode_json_structure(json_structure, fixed_length=10):
    # Define dictionaries to map categories to unique integer IDs
    hidden_size_mapping = {128: 0, 256: 1}
    operation_type_mapping = {"SA": 0, "LT": 1, "DSC": 2}
    num_operation_heads_mapping = {2: 0, 4: 1}
    feed_forward_dimension_mapping = {512: 0, 1024: 1}
    num_feed_forward_mapping = {1: 0, 3: 1}
    SA_mapping = {"SDP": 0, "WMA": 1}
    LT_mapping = {"DFT": 0, "DCT": 1}
    DSC_mapping = {5: 0, 9: 1}

    # Encode the JSON structure into numerical representation
    encoded_representation = []

    # Encode hidden_size
    encoded_representation.append(hidden_size_mapping.get(json_structure.get("hidden_size"), -1))

    # Encode encoder_layer parameters
    layers = 0
    for layer in json_structure["nas_config"]["encoder_layers"]:
        layers = layers + 1
        encoded_representation.extend([operation_type_mapping.get(layer["operation_type"], -1),
                                      num_operation_heads_mapping.get(layer["num_operation_heads"], -1),
                                      feed_forward_dimension_mapping.get(layer["feed_forward_dimension"], -1),
                                      num_feed_forward_mapping.get(layer["num_feed_forward"], -1)])

        # Encode SA, LT, DSC parameters
        if layer["operation_type"] == "SA":
            encoded_representation.append(SA_mapping.get(layer["operation_parameter"], -1))
        elif layer["operation_type"] == "LT":
            encoded_representation.append(LT_mapping.get(layer["operation_parameter"], -1))
        elif layer["operation_type"] == "DSC":
            encoded_representation.append(DSC_mapping.get(layer["operation_parameter"], -1))
    # print(layers)
    encoded_representation.insert(0,layers)
    # Pad or truncate the encoded representation to the fixed length
    if len(encoded_representation) < fixed_length:
        encoded_representation.extend([-1] * (fixed_length - len(encoded_representation)))
    else:
        print("MAX SIZE")
        sys.exit(1)

    # Convert the encoded representation to a NumPy array
    encoded_representation = np.array(encoded_representation)

    return encoded_representation

# Example JSON structures
json_structure1 = {
    "hidden_size": 256,
    "nas_config": {
        "encoder_layers": [
            {'operation_type': 'LT', 'operation_parameter': 'DFT', 'num_operation_heads': 4, 'feed_forward_dimension': 1024, 'num_feed_forward': 3},
            {'operation_type': 'SA', 'operation_parameter': 'SDP', 'num_operation_heads': 2, 'feed_forward_dimension': 1024, 'num_feed_forward': 1},
            {'operation_type': 'DSC', 'operation_parameter': 9, 'num_operation_heads': 4, 'feed_forward_dimension': 512, 'num_feed_forward': 3},
            {'operation_type': 'DSC', 'operation_parameter': 5, 'num_operation_heads': 2, 'feed_forward_dimension': 512, 'num_feed_forward': 1}
        ]
    }
}

json_structure2 = {
    "hidden_size": 128,
    "nas_config": {
        "encoder_layers": [
            {'operation_type': 'SA', 'operation_parameter': 'SDP', 'num_operation_heads': 4, 'feed_forward_dimension': 1024, 'num_feed_forward': 1},
            {'operation_type': 'LT', 'operation_parameter': 'DCT', 'num_operation_heads': 2, 'feed_forward_dimension': 1024, 'num_feed_forward': 1},
            {'operation_type': 'DSC', 'operation_parameter': 9, 'num_operation_heads': 4, 'feed_forward_dimension': 512, 'num_feed_forward': 3},
        ]
    }
}


In [4]:
encode_json_structure(json_structure2, 23)

array([ 3,  0,  0,  1,  1,  0,  0,  1,  0,  1,  0,  1,  2,  1,  0,  1,  1,
       -1, -1, -1, -1, -1, -1])

In [9]:
import json
import pandas as pd


#READ FLEXIBER ARCH
configs = []
with open("BERT_benchmark.json", 'r') as f:
    configs = json.load(f)


data_rows = []
accuracy = []
latency = []
for i in range(500):

    nas_config = configs[i]["hparams"]["model_hparam_overrides"]#["nas_config"]
    glue = configs[i]["scores"]["glue"]#["nas_config"]
    latency = (configs[i]['time_to_train'])
    #accuracy.append(glue)
    json_structure = nas_config
    
    # Encode the JSON structure into a fixed-size vector representation
    fixed_size_vector = encode_json_structure(json_structure, fixed_length=23)
    
    # Build a row dictionary: add each element of the vector as a separate column.
    row = {f"vector_{j}": fixed_size_vector[j] for j in range(23)}
    row["accuracy"] = glue
    row["latency"] = latency / 1000.0

    data_rows.append(row)

# Create a DataFrame from the data and save it as a CSV file.
df = pd.DataFrame(data_rows)
df.to_csv("output.csv", index=False)

print("CSV file saved as output.csv")

CSV file saved as output.csv


In [13]:
import pandas as pd

# File paths – update these to the correct paths on your system.
old_csv_path = "old_flexibert_data.csv"
new_csv_path = "new_flexibert_data.csv"

# Load the old CSV (the one with a single "vector" column) into a DataFrame.
old_df = pd.read_csv(old_csv_path)
print("Old CSV loaded. Number of rows:", len(old_df))

# Load the new CSV (the one with separate vector_0, vector_1, ..., columns) into a DataFrame.
new_df = pd.read_csv(new_csv_path)
print("New CSV loaded. Number of rows:", len(new_df))

# Extract the latency column from the old CSV.
old_latency = old_df['latency']
old_memory = old_df['mem']
# Check if the two DataFrames have the same number of rows.
if len(old_latency) != len(new_df):
    print("Warning: Number of rows differ between the old and new CSVs!")
else:
    # Replace the 'latency' column in the new DataFrame with the one from the old DataFrame.
    new_df['latency'] = old_latency.values
    new_df['memory'] = old_memory.values
    print("Replaced latency column in the new CSV.")

# Optionally, save the updated new DataFrame to a new CSV file.
updated_csv_path = "modified_flexibert_data.csv"
new_df.to_csv(updated_csv_path, index=False)
print("Updated new CSV saved to:", updated_csv_path)

Old CSV loaded. Number of rows: 500
New CSV loaded. Number of rows: 500
Replaced latency column in the new CSV.
Updated new CSV saved to: modified_flexibert_data.csv
