# JSON Evaluation Pipeline

In [1]:
# !pip install transformers
# !pip install torch
# !pip install pandas
# !pip3 install torch torchvision torchaudio
# !pip install ipywidgets
# !pip install bitsandbytes
# !pip install accelerate

In [2]:
import transformers
import torch
import tqdm
import json

## Model Import

In [4]:
## define run name
run_name = "finalTraining_v1"
# run_name = "MLPC-2048-StarCoderBase7B"

# define model for tokenizer
model_name = "codellama/CodeLlama-7b-hf"
# model_name = "bigcode/starcoderbase-7b"

# dataset import folder
export_folder = "./dataset/" + run_name + "/"

# model save path
model_save_path = "./models/" + run_name + "/"

# model checkpoint path
model_checkpoint_path = "./checkpoints/" + run_name + "/"

In [5]:
## Test loading model and inference with that model

# load quantization config for 4bit quantization -> must be same as training
quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16)

# load model from model_save_path with quantization config
model = transformers.AutoModelForCausalLM.from_pretrained(model_save_path, quantization_config=quantization_config, low_cpu_mem_usage=True)

# optional: load model untrained
# model = transformers.AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, low_cpu_mem_usage=True)

# optional: load model unquantized and untrained
# model = transformers.AutoModelForCausalLM.from_pretrained(model_name, low_cpu_mem_usage=True)

# optional: load model from checkpoint
# model = transformers.AutoModelForCausalLM.from_pretrained("./output/bigRun/checkpoint-1000", quantization_config=quantization_config, low_cpu_mem_usage=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 22.00 MiB. GPU 0 has a total capacity of 10.58 GiB of which 16.44 MiB is free. Process 1665360 has 8.02 GiB memory in use. Including non-PyTorch memory, this process has 2.53 GiB memory in use. Of the allocated memory 2.21 GiB is allocated by PyTorch, and 154.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# load tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

# add pad token
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

## Generator

In [6]:
def generate_nitrox_json(model, tokenizer, prompt, use_custom_eos=False, custom_eos_token="}", max_length=3000, confidence_threshold=0.01):
    """ Generate code completions for a given prompt using the model and tokenizer.
    """


    # Encode the prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Initialize the output as the input
    output = input_ids
    
    # Loop until the end token is generated or counter is at max_length
    for i in tqdm.tqdm(range(max_length)):
        # Predict the probabilities of the next token
        with torch.no_grad():
            outputs = model(output)
        predictions = outputs.logits[:, -1, :]
        probabilities = torch.nn.functional.softmax(predictions, dim=-1)

        # Get the token with the highest probability
        max_prob, max_token_id = torch.max(probabilities, dim=-1)

        # Check if the confidence is over the threshold
        if max_prob.item() < confidence_threshold:
            break

        # Append the token to the output
        output = torch.cat([output, max_token_id.unsqueeze(0)], dim=-1)

        if len(output[0]) > 3 + len(custom_eos_token):
            evtl_end = tokenizer.decode(output[0][-3:], skip_special_tokens=True)
            if use_custom_eos:
                if custom_eos_token in evtl_end:
                    break
            # check for <EOS> in evtl_end
            if "<EOS>" in evtl_end:
                break
        
        # decode every 1000 iterations and print output
        if len(output[0]) % 50 == 0:
            # print(tokenizer.decode(output[0], skip_special_tokens=True))
            print("Length of output: ", len(output[0]))
            print("Max prob: ", max_prob.item())
            print("Max token: ", max_token_id.item())
            print("Counter: ", i)
            print("")

        
    # Decode the output
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # delete <START> from generated text
    generated_text = generated_text.replace("<START>", "")

    print(generated_text)

    # cleanup
    del output
    del input_ids
    torch.cuda.empty_cache()

    return generated_text

In [14]:
def generate_nitrox_json_fast_hope(model, tokenizer, prompt, seed=42, custom_eos_token="<END>", max_length=400):
    # set a seed for generation
    torch.manual_seed(seed)

    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    input_ids = input_ids.to('cuda')
    end_token_id = tokenizer.encode(custom_eos_token, add_special_tokens=False)[0]

    output = model.generate(input_ids, eos_token_id=end_token_id, temperature=0.1, max_length=max_length, do_sample=True, num_return_sequences=1)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # delete <START> from generated text
    generated_text = generated_text.replace("<START>", "")
    print(generated_text)
    
    # cleanup
    del output
    del input_ids
    torch.cuda.empty_cache()

    return generated_text

In [16]:
# test generate_nitrox_json_fast_hope
prompt_test = '<START> {'
test_output_0 = generate_nitrox_json_fast_hope(model, tokenizer, prompt_test, seed=0)

print(test_output_0)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "typeName": "VAR_typeName", "domainType": "NON_DOMAIN", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}}], "returnType": {"@class": "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "NON_DOMAIN", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "publishedEventTypeNames": "VAR_publishedEventTypeNames", "listenedEventTypeName": null, "getter": false, "setter": false}, {"@class": "nitrox.dlc.mirror.model.MethodModel", "name": "VAR_name", "declaredByTypeName": "VAR_declaredByTypeName", "accessLevel": "PUBLIC", "parameters": [], "returnType": {"@class": "nitrox.dlc.mirr

In [9]:
def generate_multi_prompts(prompts, model, tokenizer, use_custom_eos=True, custom_eos_token='"valueObject" : true}', 
max_length=6000, confidence_threshold=0.01, export_path="gen_json"):
    counter = 0
    for prompt in tqdm.tqdm(prompts):
        # generated_text = generate_nitrox_json(model, tokenizer, prompt, use_custom_eos=use_custom_eos, custom_eos_token=custom_eos_token, max_length=max_length, confidence_threshold=confidence_threshold)
        generated_text = generate_nitrox_json_fast_hope(model, tokenizer, prompt, custom_eos_token=custom_eos_token, max_length=max_length)

        # save generated text as file named after the model Type
        with open(export_path + "/" + str(counter) + "_" + prompt.split("nitrox.dlc.mirror.model.")[1].split(",")[0].replace("\"","") + ".json", "w") as f:
            f.write(generated_text)
        counter += 1

## Generate from Prompts

Generate JSON to max token length.

In [10]:
prompts = [
    '<START> { "@class" : "nitrox.dlc.mirror.model.EntityModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.ValueObjectModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.AggregateRootModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.IdentityModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.EnumModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.DomainServiceModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.RepositoryModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.ApplicationServiceModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.DomainEventModel"',
    '<START> { "@class" : "nitrox.dlc.mirror.model.DomainCommandModel"',
]

In [11]:
# multiply prompts for more prompts -> prompt = 10 * prompt
prompts = 1 * prompts

len(prompts)

10

In [13]:
# generate generate_multi_prompts
generate_multi_prompts(prompts, model, tokenizer, use_custom_eos=False, custom_eos_token="<END>", max_length=300, confidence_threshold=0.01, export_path="gen_json_test")

  0%|          | 0/10 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.
 10%|█         | 1/10 [00:17<02:40, 17.81s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.EntityModel" , "typeName" : "VAR_typeName" , "abstract" : false , "allFields" : [{"@class" : "nitrox.dlc.mirror.model.ValueReferenceModel", "name" : "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName" : "VAR_typeName", "domainType" : "IDENTITY", "assertions" : [{"@class" : "nitrox.dlc.mirror.model.AssertionModel", "assertionType" : "isNotNull", "param1" : null, "param2" : null, "messageCode" : "VAR_messageCode"}, {"@class" : "nitrox.dlc.mirror.model.AssertionModel", "assertionType" : "isPositive", "param1" : null, "param2" : null, "messageCode" : "VAR_messageCode"}], "hasOptionalContainer" : false, "hasCollectionContainer" : false, "hasListContainer" : false, "hasSetContainer" : false, "hasStreamContainer" : false, "containerTypeName" :


 20%|██        | 2/10 [00:35<02:21, 17.73s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.ValueObjectModel" , "typeName" : "VAR_typeName" , "abstract" : false , "allFields" : [{"@class" : "nitrox.dlc.mirror.model.ValueReferenceModel", "name" : "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName" : "VAR_typeName", "domainType" : "IDENTITY", "assertions" : [{"@class" : "nitrox.dlc.mirror.model.AssertionModel", "assertionType" : "isNotNull", "param1" : null, "param2" : null, "message" : "{jakarta.validation.constraints.NotNull.message}"}], "hasOptionalContainer" : false, "hasCollectionContainer" : false, "hasListContainer" : false, "hasSetContainer" : false, "hasStreamContainer" : false, "containerTypeName" : "VAR_containerTypeName", "containerAssertions" : []}, "accessLevel" : "PRIVATE", "declaredByTypeName" : "VAR_declaredByTypeName", "modifiable" :


 30%|███       | 3/10 [00:52<02:03, 17.63s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.AggregateRootModel" , "typeName" : "VAR_typeName" , "abstract" : false , "allFields" : [{"@class" : "nitrox.dlc.mirror.model.ValueReferenceModel", "name" : "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName" : "VAR_typeName", "domainType" : "IDENTITY", "assertions" : [{"@class" : "nitrox.dlc.mirror.model.AssertionModel", "assertionType" : "isNotNull", "param1" : null, "param2" : null, "message" : "{jakarta.validation.constraints.NotNull.message}"}], "hasOptionalContainer" : false, "hasCollectionContainer" : false, "hasListContainer" : false, "hasSetContainer" : false, "hasStreamContainer" : false, "containerTypeName" : "VAR_containerTypeName", "containerAssertions" : []}, "accessLevel" : "PRIVATE", "declaredByTypeName" : "VAR_declaredByTypeName", "modifiable


 40%|████      | 4/10 [01:10<01:45, 17.66s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.IdentityModel" , "typeName" : "VAR_typeName", "abstract": false, "allFields": [{"@class": "nitrox.dlc.mirror.model.FieldModel", "name": "VAR_name", "type": {"@class": "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "NON_DOMAIN", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "accessLevel": "PRIVATE", "declaredByTypeName": "VAR_declaredByTypeName", "modifiable": false, "publicReadable": true, "publicWriteable": false, "static": false}], "methods": [{"@class": "nitrox.dlc.mirror.model.MethodModel", "name": "VAR_name", "declaredByTypeName": "VAR_declaredByTypeName", "accessLevel": "PUBLIC", "parameters": [], "


 50%|█████     | 5/10 [01:28<01:28, 17.68s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.EnumModel" , "typeName" : "VAR_typeName" , "abstract" : false , "allFields" : [{"@class" : "nitrox.dlc.mirror.model.FieldModel" , "name" : "VAR_name" , "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "NON_DOMAIN", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "accessLevel": "PRIVATE", "declaredByTypeName": "VAR_declaredByTypeName", "modifiable": false, "publicReadable": true, "publicWriteable": false, "static": false}, {"@class" : "nitrox.dlc.mirror.model.FieldModel" , "name" : "VAR_name" , "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContain


 60%|██████    | 6/10 [01:45<01:10, 17.64s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.DomainServiceModel" , "typeName" : "VAR_typeName" , "abstract": false, "allFields": [{"@class": "nitrox.dlc.mirror.model.FieldModel", "name": "VAR_name", "type": {"@class": "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "REPOSITORY", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "accessLevel": "PRIVATE", "declaredByTypeName": "VAR_declaredByTypeName", "modifiable": false, "publicReadable": false, "publicWriteable": false, "static": false}, {"@class": "nitrox.dlc.mirror.model.FieldModel", "name": "VAR_name", "type": {"@class": "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_type


 70%|███████   | 7/10 [02:03<00:52, 17.64s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.RepositoryModel" , "typeName" : "VAR_typeName" , "abstract" : false , "allFields" : [{"@class" : "nitrox.dlc.mirror.model.FieldModel", "name" : "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "NON_DOMAIN", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "accessLevel": "PRIVATE", "declaredByTypeName": "VAR_declaredByTypeName", "modifiable": false, "publicReadable": false, "publicWriteable": false, "static": false}, {"@class" : "nitrox.dlc.mirror.model.FieldModel", "name": "VAR_name", "type": {"@class": "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "type


 80%|████████  | 8/10 [02:21<00:35, 17.63s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.ApplicationServiceModel" , "typeName" : "VAR_typeName" , "abstract" : false , "allFields" : [{"@class" : "nitrox.dlc.mirror.model.FieldModel", "name" : "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "DOMAIN_SERVICE", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "accessLevel": "PRIVATE", "declaredByTypeName": "VAR_declaredByTypeName", "modifiable": false, "publicReadable": false, "publicWriteable": false, "static": false}, {"@class" : "nitrox.dlc.mirror.model.FieldModel", "name"": "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel


 90%|█████████ | 9/10 [02:38<00:17, 17.63s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:529 for open-end generation.


 { "@class" : "nitrox.dlc.mirror.model.DomainEventModel" , "typeName" : "VAR_typeName" , "abstract": false, "allFields": [{"@class": "nitrox.dlc.mirror.model.ValueReferenceModel", "name": "VAR_name", "type": {"@class": "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "IDENTITY", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "accessLevel": "PRIVATE", "declaredByTypeName": "VAR_declaredByTypeName", "modifiable": false, "publicReadable": true, "publicWriteable": false, "static": false}, {"@class": "nitrox.dlc.mirror.model.ValueReferenceModel", "name": "VAR_name", "type": {"@class": "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_


100%|██████████| 10/10 [02:56<00:00, 17.65s/it]

 { "@class" : "nitrox.dlc.mirror.model.DomainCommandModel" , "typeName" : "VAR_typeName" , "abstract" : false , "allFields" : [{"@class" : "nitrox.dlc.mirror.model.ValueReferenceModel", "name" : "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel", "typeName": "VAR_typeName", "domainType": "IDENTITY", "assertions": [], "hasOptionalContainer": false, "hasCollectionContainer": false, "hasListContainer": false, "hasSetContainer": false, "hasStreamContainer": false, "containerTypeName": "VAR_containerTypeName", "containerAssertions": []}, "accessLevel": "PRIVATE", "declaredByTypeName": "VAR_declaredByTypeName", "modifiable": false, "publicReadable": true, "publicWriteable": false, "static": false}, {"@class" : "nitrox.dlc.mirror.model.ValueReferenceModel", "name" : "VAR_name", "type": {"@class" : "nitrox.dlc.mirror.model.AssertedContainableTypeModel





## Complete JSONs

In [35]:
def close_json(json_str):
    stack = []

    for char in json_str:
        if char in '{[':
            stack.append(char)
        elif char in '}]':
            if stack:
                stack.pop()

    while stack:
        char = stack.pop()
        if char == '{':
            json_str += '}'
        elif char == '[':
            json_str += ']'

    return json_str

In [36]:
def replace_double_quotes(json_str):
    return json_str.replace('""', '"')

In [37]:
def complete_json(json_file):
    """
    clean and close the json 
    """

    # 1. check the end of the JSON and delete the last uncomplete key/value pair. (Delete till the first komma ",")
    json_file = json_file[:json_file.rfind(",")]

    # 2. create a array with the unclosed brackets in the json -> move sequential through the json and add when brackets are open and delete when they are closed. ((, {, [) -> (,), }, ])
    open_brackets = []
    closed_brackets = []
    for char in json_file:
        if char in ["(", "{", "["]:
            open_brackets.append(char)
        if char in [")", "}", "]"]:
            closed_brackets.append(char)
    


    json_file = close_json(json_file)
    # find the missing brackets by comparing the two arrays
    # brackets = []
    # for bracket in open_brackets:
    #     if bracket == "(":
    #         if ")" not in closed_brackets:
    #             brackets.append(bracket)
    #         else:
    #             closed_brackets.remove(")")
    #     elif bracket == "{":
    #         if "}" not in closed_brackets:
    #             brackets.append(bracket)
    #         else:
    #             closed_brackets.remove("}")
    #     elif bracket == "[":
    #         if "]" not in closed_brackets:
    #             brackets.append(bracket)
    #         else:
    #             closed_brackets.remove("]")
    
    # print(brackets)
    # # 3. add the missing brackets from the array to the end of the json
    # for bracket in brackets:
    #     if bracket == "(":
    #         json_file += ")"
    #     elif bracket == "{":
    #         json_file += "}"
    #     elif bracket == "[":
    #         json_file += "]"
    
    # 4. replace double quotes
    json_file = replace_double_quotes(json_file)

    return json_file

In [38]:
# define folder
folder = "gen_json"

In [39]:
# read all json_files in gen_json folder
import os

json_files = []
for file in os.listdir(folder):
    if file.endswith(".json"):
        with open(folder + "/" + file, "r") as f:
            json_files.append(f.read())

In [40]:
# complete all jsons
completed_jsons = []
for json_file in json_files:
    completed_jsons.append(complete_json(json_file))

In [41]:
# save all jsons to gen_json/completed folder
for i in range(len(completed_jsons)):
    with open(folder + "/completed/" + str(i) + ".json", "w") as f:
        f.write(complete_json(completed_jsons[i]))

## Parse completed JSONs

In [42]:
import json
import os

In [43]:
# test test_json_comp with json.loads and look for errors
def test_json_function(test_json):
    try:
        json.loads(test_json)
        return "No error"
    except Exception as e:
        return e
    

In [44]:
test_json = ' { "@class" : "nitrox.dlc.mirror.model.ApplicationServiceModel" , "typeName" : "VAR_typeName" }'

result = test_json_function(test_json)
result

'No error'

In [45]:
# read all jsons in gen_json/completed folder

jsons_to_parse = []
for file in os.listdir(folder + "/completed"):
    if file.endswith(".json"):
        with open(folder +"/completed/" + file, "r") as f:
            jsons_to_parse.append(f.read())

In [46]:
# test all jsons
results = {}
for number, json_file in enumerate(jsons_to_parse):
    # append results to results dict
    results[number] = test_json_function(json_file)

# save results to results.txt
with open(folder + "/completed/results.txt", "w") as f:
    for key in results:
        f.write(str(key) + ": " + str(results[key]) + "\n")