# CELLO Mk 3.0, EST. 2023-3-3


## COMPONENT #3:
## UCF Modifier and Formatter

Efficiently modify UCF files and never worry about syntaxical errors.

In [None]:
# Define the input and output paths
in_path = "../../IO/inputs/"
out_path = "../../IO/temp_folder"

In [None]:
import json
import os
import glob
import copy

# Save the current working directory
original_directory = os.getcwd()

# Change to the target directory
os.chdir(in_path)

extension = '.json'
all_files = sorted(list(glob.glob('*' + extension)))
print('listing all JSON files in input path: \n')
for file in all_files:
    print(file)

In [None]:
UCF_extension = 'UCF.json'

UCF_files = sorted(list(glob.glob('*' + UCF_extension)))
print('ALL UCF files in input path: \n')
for file in UCF_files:
    print(file)
    
# Change back to the local repository
# os.chdir(original_directory)
# on second though, maybe use this in the end

In [None]:
# CHOOSE your UCF file
UCF = "Eco2c1G5T1.UCF.json"

In [None]:
f = open(UCF, 'r')

In [None]:
ucf = json.load(f)
ucf_orig = copy.deepcopy(ucf)

In [None]:
print(type(ucf))
print(len(ucf))

In [None]:
collections = []
ucf_parts = []


for i in ucf:
    #print(i.keys())
    #print(i['collection'])
    collections.append(i['collection'])
    #if i['collection'] == 'motif_library':
        #print(i['netlist'])
    if i['collection'] == 'parts':
        ucf_parts.append(i['name'])
        #print(i['name'])
    #print()

# top layer keys
collection_names = set(collections)

In [None]:
print('Parts in UCF: \n')
print(ucf_parts)
print()
print('Collection Names: \n')
print(collection_names)

In [None]:
def list_collection_prarmeters(ucf: list, cName: str):
    params = []
    for e in ucf:
        if e['collection'] == cName:
            params.append(list(e.keys()))
            
    params_set = set(tuple(x) for x in params)
    params = [list(x) for x in params_set]
    return params

collection_and_keys = {}


for n in collection_names:
    # print(f"PARAM ({n}): ")
    params = list_collection_prarmeters(ucf, n)
    longest_keys = max(params)
    collection_and_keys[n] = longest_keys
    # print(longest_keys) # prints the longest parameters list\

print(json.dumps(collection_and_keys, indent=4))

In [None]:
class UCF:
    
    def __init__(self, filepath, name):
        (U, I, O) = self.__parse_helper(filepath, name)
        self.UCFmain = U
        self.UCFin = I
        self.UCFout = O
        
    def __parse_helper(self, filepath, name):
        U = os.path.join(filepath, name + '.UCF.json')
        I = os.path.join(filepath, name + '.input.json')
        O = os.path.join(filepath, name + '.output.json')
        paths = [U, I, O]
        out = []
        for f in paths:
            with open(f, 'r') as ucf:
                ucf = json.load(ucf)
                out.append(ucf)
        return tuple(out)
    
    def __str__(self):
        return f'''{json.dumps(self.UCFmain[0], indent=4)} \n 
                   {json.dumps(self.UCFin[0], indent=4)} \n 
                   {json.dumps(self.UCFout[0], indent=4)}'''
    
    def list_collection_prarmeters(self, cName):
        params = []
        for c in ucf:
            if c['collection'] == cName:
                params.append(list(c.keys()))
        params_set = set(tuple(x) for x in params)
        params = [list(x) for x in params_set]
        return params
    
    def query_top_level_collection(self, ucf, cName):
        for c in ucf:
            if c['collection'] == cName:
                return c
        
        

In [None]:
testUCF = UCF("../../IO/inputs", "Eco1c1G1T1")

print(testUCF)

In [None]:
print(json.dumps(ucf[0], indent=2))

In [None]:
'''
Testing that all the important parts are in the UCF
'''

ucf_desc = ucf[0]['description'].split(' ')
# print(ucf_desc)
ucf_desc = [d.split(',') for d in ucf_desc]
ucf_desc = [d[0] for d in ucf_desc]
# print(ucf_desc)
marker = -1
check_parts = []
for i in range(len(ucf_desc)):
    if ucf_desc[i] == 'homologs:':
        marker = i + 1
        break
if marker < len(ucf_desc) and marker > -1:
    check_parts = ucf_desc[marker:]
    print('parts from description: \n')
    print(check_parts)
    print(len(check_parts))

In [None]:
important_parts = []

for c in ucf:
    #print('whaaat')
    if c['collection'] == 'parts':
        #print('ok')
        if c['name'] in check_parts:
            #print(c['name'])
            important_parts.append(c)
            
#print(json.dumps(important_parts, indent=4))
print(len(important_parts))

In [None]:
ucf_functions = [] # *
function_keys = []
for c in ucf:
    if c['collection'] == 'functions':
        function_keys.append(list(c.keys()))
        ucf_functions.append(c)
        
function_keys_set = set(tuple(x) for x in function_keys)
unique_function_keys = [list(x) for x in function_keys_set]
print(json.dumps(unique_function_keys, indent=2))
print(collections.count('functions'))
print(len(ucf_functions))

def get_values_by_name(listofdicts: list, keys: list):
    vals = []
    for d in listofdicts:
        val = []
        for k in keys:
            if k in d.keys():
                val.append(d[k])
                #print(d[k])
            else:
                print('error')
        if val:
            vals.append(val)
    return vals
                
#get_values_by_name(ucf_functions, ['name'])
function_names = get_values_by_name(ucf_functions, ['name'])
print(function_names)
#print(json.dumps(ucf_functions[:10], indent=4))

In [None]:
toxicity_graphs = {}

for f in ucf_functions:
    print(f['name'])
    print(f.keys())
    if 'table' in f.keys():
        if type(f['table']) == dict:
            print('table keys: ' + str(f['table'].keys()))
            toxicity_graphs[f['name']] = f['table']
        else:
            # print(f['table'])
            print('long table')
    print()
    
# Conclusion: only toxicity tables have direct X & Y mapping

In [None]:
import matplotlib.pyplot as plt

# print(toxicity_graphs)

print(list(toxicity_graphs.items())[-1])

x = []
y = []

for g in toxicity_graphs.items():
    (fname, ftable) = g
    x = ftable['x']
    y = ftable['output']

    # plot the data
    plt.plot(x, y)
    
plt.title('toxicity graphs')

# show the plot
plt.show()
