Note

- saved_model has no hyperparameters (e.g. epochs, batch_size, verbose). 
- hard coded
    - created_at
    - created_for
    - hyperparameters
    _


In [3]:
import json
import os
from pprint import pprint
import jsonschema as jsc
import tensorflow as tf

In [5]:
def read_json(file_path):
    """Reads a JSON file and returns the data."""
    with open(file_path, "r") as file:
        data = json.load(file)
    return data

def write_json(data, file_path):
    """Writes data to a JSON file."""
    with open(file_path, "w") as file:
        json.dump(data, file, indent=4)
    print(f"{file_path} has been created" )

In [19]:
def get_manual_input(*inputs):
    manual_inputs = {"created_for_project": inputs, "created_at": inputs}
    return manual_inputs


def load_model(model_path):
    model = tf.keras.models.load_model(model_path)
    return model


def get_parameters_layer(model):
    model_config = model.get_config()
    del model_config["layers"][0]
    model_layers = model.layers

    parameter_layers_list = []
    layers_dimensions = {}
    i = 0
    for layer in model_config['layers']:
        #pprint(layer)
        layer_config_setting = []
        for j in layer['config']:
            #print(j)
            if j not in ["kernel_initializer", "bias_initializer", "name"]:
                configuration_setting_dict = {
                    "configuration_setting": {
                        "configuration_setting_label": j,
                        "configuration_setting_value": layer['config'][j],
                    }
                }
                layer_config_setting.append(configuration_setting_dict)
        #pprint(layer_config_setting)
    
        layer_name = layer['config']['name']
        layer_type = layer['class_name']

        # Use built-in functions for common layer types
        if layer_type in {'Conv2D', 'Conv3D', 'Conv1D'}:
            dimensions = [layer['config']['filters']] + list(layer['config']['kernel_size'])
        elif layer_type == 'Dense':
            dimensions = [layer['config']['units']]
        elif layer_type == 'LSTM':
            dimensions = list(layer['config']['units'])
        elif layer_type == 'Embedding':
            dimensions = list(layer['config']['input_dim'])
        elif layer_type == 'Flatten':
            dimensions = 1 
        elif layer_type == 'MaxPooling2D' or layer_type == 'MaxPooling3D':
            dimensions = list(layer['config']['pool_size'])
        elif layer_type == 'AveragePooling2D' or layer_type == 'AveragePooling3D':
            dimensions = list(layer['config']['pool_size'])

        # Check for potentially nested configurations like Recurrent layers
        elif 'rnn' in layer_type.lower():
            try:
                dimensions = layer['layers'][0]['units']
            except KeyError:
                dimensions = None  # Handle nested layers recursively

        # Handle unsupported layer types
        else:
            dimensions = None

        if dimensions:
            layers_dimensions[layer_name] = dimensions

        parameter_layer = {
            "parameter_layer": {
                "layer_configuration": layer_config_setting,
                "layer_dimension": dimensions,
                "layer_input_dimension": list(model_layers[i].input_shape),
            },
            "layer_type": f"{layer['class_name']}",
        }
        i += 1
        parameter_layers_list.append(parameter_layer)


    return parameter_layers_list
    

def get_training_config(model):
    model_optimizer_config = model.optimizer.get_config()
    optimizer_name = model.optimizer.get_config()['name']
    model_optimizer_config.pop('name')
    model_hyperparameter_config = {"epoch":10, "batch_size": 128, "verbose":1}
    training_config = {}
    hyperparameters_configuration_list = []
    
    for i in model_hyperparameter_config:
        configuration_setting_dict = {
            "configuration_setting": {
                "configuration_setting_label": i,
                "configuration_setting_value": model_hyperparameter_config[i],
            }
        }
        hyperparameters_configuration_list.append(configuration_setting_dict)
        hyperparameters = hyperparameters_configuration_list
    

    optimizers_configuration_list = []
    for i in model_optimizer_config:
        configuration_setting_dict = {
            "configuration_setting": {
                "configuration_setting_label": i,
                "configuration_setting_value": model_optimizer_config[i],
            }
        }
        optimizers_configuration_list.append(configuration_setting_dict)

        # pprint(layer_config_setting)
        # print(i)
        # layer_config.append(config_setting)
    optimizer = {
        "optimizer_name": optimizer_name,
        "opimizer_configuration": optimizers_configuration_list,
    }
    
    
    #parameter_layers_list.append(parameter_layer)

    return hyperparameters, optimizer


def get_initializer(model):
    model_config = model.get_config()
    model_initializers_list = []

    for i in range(len(model_config["layers"])):
        try:
            for k in ["kernel_initializer", "bias_initializer"]:
                init_config_setting = []
                for j in model_config["layers"][i]["config"][k]["config"]:
                    configuration_setting_dict = {
                        "configuration_setting": {
                            "configuration_setting_label": j,
                            "configuration_setting_value": model_config["layers"][i][
                                "config"
                            ][k]["config"][j],
                        }
                    }
                    # print(configuration_setting_dict)
                    init_config_setting.append(configuration_setting_dict)

                model_initializer = {
                    "initializer_name": f"{model_config ['layers'][i]['config'][k]['class_name']}",
                    "configuration_setting": init_config_setting,
                    "whole_model": False,
                    "initializes_layer_index": i,
                }
                model_initializers_list.append(model_initializer)
        except:
            pass

    return model_initializers_list


def get_meta_data(model_path, framework, *inputs):
    # if framework not in frameworks:
    #     raise ValueError("Invalid sim type. Expected one of: %s" % frameworks)
    model = load_model(model_path)
    if framework == "tensorflow":
        model_name = os.path.basename(model_path)
        model_location = os.getcwd()
        hyperparameter_config, optimizer_config = get_training_config(model)
        
        manual_inputs = get_manual_input(inputs)
        parameters_layers = get_parameters_layer(model)
        input_parameters_layers = model.get_config()["layers"][0]["config"]
        initializer = get_initializer(model)
        mllo = {
            "model_name": f"{model_name}",
            "model_type": f"{model.name}",
            "model_framework": {
                "framework_name": "tensorflow",
                "framework_version": f"{model.tensorflow_version}",
            },
            "model_location": f"{model_location}",
            "created_at": "2024-1-1",
            "created_for_project": "MLLOS",
            "model_input_requirements": {
                "input_dimension": input_parameters_layers["batch_input_shape"],
                "input_datatype": input_parameters_layers["dtype"],
            },
            "model_architecture": {"parameters": parameters_layers},
            "training_configuration": {
                "hyperparameters": hyperparameter_config,
                "optimizer": optimizer_config,
            },
            "model_initializers": initializer,
        }
    return mllo

In [20]:
from datetime import datetime
str_time = datetime.now()
d = datetime.strftime(str_time, "%Y-%m-%dT%H_%M_%S%f")
filepath = "toy_model"
filename = f"mllo_mapped_{d}.json"
model_dict = get_meta_data(filepath, framework="tensorflow")
write_json(model_dict, file_path= filename)

mllo_mapped_2024-01-12T13_34_32404128.json has been created


In [15]:
# validate
f = open("ml3.json")
jsonsch = json.load(f)
f.close()

f = open(filename)
print(f"load {filename}")
loaded_json = json.load(f)
f.close()

jsc.validate(loaded_json, jsonsch)
print('pass')

load mllo_mapped_2024-01-12T13_22_47944391.json
pass
