In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../src')

# UCF JSON

In [28]:
import json

def describe_library(lib):
    description = {
        'metadata': {},
        'circuit_components': [],
        'gate_definitions': [],
        'dna_parts': [],
        'experimental_data': [],
        'miscellaneous': []
    }
    
    for i, item in enumerate(lib):
        # Track collection metadata
        if i == 0:
            description['metadata'] = {k:v for k,v in item.items() if k != 'collection'}
            
        # Circuit components (common pattern)
        if all(k in item for k in ['outputs', 'inputs', 'netlist']):
            desc = {
                'type': 'circuit_component',
                'structure': list(item.keys()),
                'example_inputs': item.get('inputs', []),
                'example_outputs': item.get('outputs', [])
            }
            if not any(d['structure'] == desc['structure'] for d in description['circuit_components']):
                description['circuit_components'].append(desc)
                
        # Gate definitions (different types)
        elif 'regulator' in item:
            desc = {
                'type': 'gate_definition',
                'properties': list(item.keys()),
                'example_regulator': item.get('regulator'),
                'example_gate_type': item.get('gate_type')
            }
            description['gate_definitions'].append(desc)
            
        # Mathematical models
        elif 'equation' in item:
            desc = {
                'type': 'gate_model',
                'equation_variables': item.get('variables', []),
                'parameters': item.get('parameters', [])
            }
            description['gate_definitions'].append(desc)
            
        # DNA parts
        elif 'dnasequence' in item:
            desc = {
                'type': 'dna_part',
                'name': item.get('name'),
                'sequence_length': len(item.get('dnasequence', ''))
            }
            description['dna_parts'].append(desc)
            
        # Experimental data
        elif 'cytometry_data' in item:
            description['experimental_data'].append({
                'gate': item.get('gate_name'),
                'data_type': 'cytometry'
            })
            
        # Other categories
        else:
            description['miscellaneous'].append({
                'index': i,
                'keys': list(item.keys())
            })
    
    # Create summary stats
    summary = {
        'total_entries': len(lib),
        'metadata_fields': list(description['metadata'].keys()),
        'num_circuit_components': len(description['circuit_components']),
        'num_gate_definitions': len(description['gate_definitions']),
        'num_dna_parts': len(description['dna_parts']),
        'num_experimental_datasets': len(description['experimental_data']),
        # Fixed line below by converting list to tuple
        'unique_component_types': len({tuple(c['structure']) for c in description['circuit_components']})
        
    }
    
    return {'summary': summary, 'details': description}
    

# Usage:
lib = json.load(open('../libs/cello-ucf/Eco1C1G1T0.UCF.json'))
report = describe_library(lib)

# Print condensed version for LLM input
print("=== Library Summary ===")
print(json.dumps(report['summary'], indent=2))
print("\n=== Example Entries ===")
print("Metadata values:", {k: str(v)[:100] + "..." for k,v in lib[0].items()})
print("Circuit Component:", {k: lib[3][k] for k in ['outputs', 'inputs']})
print("Gate Definition:", lib[257]['regulator'])
print("DNA Part:", lib[299]['name'][:100] + "...")
print("Experimental Data:", lib[398]['gate_name'])

=== Library Summary ===
{
  "total_entries": 414,
  "metadata_fields": [
    "description",
    "version",
    "date",
    "author",
    "organism",
    "genome",
    "media",
    "temperature",
    "growth"
  ],
  "num_circuit_components": 1,
  "num_gate_definitions": 28,
  "num_dna_parts": 85,
  "num_experimental_datasets": 14,
  "unique_component_types": 1
}

=== Example Entries ===
Metadata values: {'collection': 'header...', 'description': 'TetR homologs: PhlF, SrpR, BM3R1, HlyIIR, BetI, AmtR, AmeR...', 'version': 'Eco1C1G1T0...', 'date': 'Thu Dec 24 00:54:52 EST 2015...', 'author': "['Bryan Der']...", 'organism': 'Escherichia coli NEB 10-beta...', 'genome': 'NEB 10 ∆(ara-leu) 7697 araD139 fhuA ∆lacX74 galK16 galE15 e14- φ80dlacZ∆M15  recA1 relA1 endA1 nupG ...', 'media': 'M9 minimal media composed of M9 media salts (6.78 g/L Na2HPO4, 3 g/L KH2PO4, 1 g/L NH4Cl, 0.5 g/L Na...', 'temperature': '37...', 'growth': 'Inoculation: Individual colonies into M9 media, 16 hours overnight in 

In [None]:
lib = json.load(open('../libs/cello-ucf/Eco1C1G1T0.UCF.json'))
describe_library(lib)

In [None]:
lib = json.load(open('../libs/cello-ucf/Eco1C1G1T1.pAN1201.UCF.json'))
describe_library(lib)

# SBOL XML

In [72]:
import sbol2

doc = sbol2.Document()
doc.read('../libs/synbiohub/Eco1C1G1T1_collection.xml')
print(doc)

Design........................0
Build.........................0
Test..........................0
Analysis......................0
ComponentDefinition...........160
ModuleDefinition..............70
Model.........................0
Sequence......................129
Collection....................1
Activity......................1
Plan..........................0
Agent.........................1
Attachment....................35
CombinatorialDerivation.......0
Implementation................0
SampleRoster..................0
Experiment....................0
ExperimentalData..............0
Annotation Objects............0
---
Total: .........................397

