In [26]:
from collections import defaultdict

In [1]:
from erd import *

In [77]:
from erd import generate_erd_for_top_nodes

In [48]:
from mcli.lib.auth.mcli_manager import MCLIManager, MCLIInstance, MCLIConnectionParams, create_mcli_instance

In [57]:
from mcli.app.readiness.readiness import _do_generate_type_graph
from mcli.app.readiness.generate_graph import (
  load_graph_data, 
  find_top_level_nodes,
  build_hierarchical_graph,
  count_descendants,
  transform_graph
)

In [3]:
from mcli.lib.logger.logger import get_logger

In [4]:
logger = get_logger()

In [15]:
env_url = "https://stgawsrso.mcli.ai/lefvdev/panda009"
mcli_mngr = MCLIManager(env_url=env_url)
mcli = mcli_mngr.mcli_as_basic_user()

In [18]:
def generate_type_graph_command(output="realGraph.json", analyze=True, generate_erds=True, depth=2, top=5, js_file=None):
    """Generate a type graph from a MCLI cluster and optionally analyze it
    
    This command connects to a MCLI cluster, generates a merged graph of entity types,
    saves it to a file, and optionally analyzes it to identify top nodes for hierarchical exports.
    
    If --analyze is specified, the graph will be analyzed to identify top nodes.
    If --generate-erds is specified, ERDs will be generated for the top nodes.
    If --js-file is specified, the JavaScript code in that file will be used instead of the default.
    """
    logger.info(f"Starting generate-type-graph with output={output}, analyze={analyze}, generate_erds={generate_erds}, depth={depth}, top={top}")
    
    try:
        # Create output directory if it doesn't exist (for ERD output)
        if generate_erds:
            # Get project root directory
            project_root = Path(".")
            # Go up to project root (file -> readiness -> app -> mcli -> src -> project_root)
            output_dir = project_root / "output"
            if not output_dir.exists():
                output_dir.mkdir(exist_ok=True)
                logger.info(f"Created output directory: {output_dir}")
        
        # Check if realGraph.json exists if we're trying to analyze or generate ERDs without creating a new graph
        if (analyze or generate_erds) and output == "realGraph.json" and not Path(output).exists():
            click.echo(f"Warning: {output} does not exist. You need to generate the graph first.")
        
        # Load custom JavaScript if specified
        js_code = None
        if js_file:
            try:
                with open(js_file, 'r') as f:
                    js_code = f.read()
                click.echo(f"Loaded JavaScript code from {js_file}")
                logger.info(f"Loaded JavaScript code from {js_file}")
            except Exception as e:
                error_msg = f"Error loading JavaScript file: {e}"
                click.echo(error_msg)
                logger.error(error_msg)
                return
        
        # Generate the graph
        logger.info(f"Generating type graph to {output}")
        graph_file = _do_generate_type_graph(output_file=output, analyze=analyze, js_code=js_code)
        
        if not graph_file:
            error_msg = "Failed to save graph file."
            click.echo(error_msg)
            logger.error(error_msg)
            return
            
        success_msg = f"Graph saved to {graph_file}"
        click.echo(success_msg)
        logger.info(success_msg)
        
        return
        # Generate ERDs if requested
        if generate_erds:
            info_msg = f"Generating ERDs for top {top} nodes with depth {depth}..."
            click.echo(info_msg)
            logger.info(info_msg)
            
            try:
                # Ensure the graph file exists
                if not Path(graph_file).exists():
                    error_msg = f"Graph file {graph_file} does not exist. Cannot generate ERDs."
                    click.echo(error_msg)
                    logger.error(error_msg)
                    return
                
                # Generate the ERDs
                files = generate_erd_for_top_nodes(graph_file, max_depth=depth, top_n=top)
                
                if files and len(files) > 0:
                    click.echo("Generated the following ERD files:")
                    for dot_file, png_file, count in files:
                        click.echo(f"{png_file} - {count} descendants")
                        logger.info(f"Generated ERD: {png_file} with {count} descendants")
                else:
                    error_msg = "No ERD files were generated. Check if the graph contains valid entity relationships."
                    click.echo(error_msg)
                    logger.warning(error_msg)
            except Exception as e:
                error_msg = f"Error generating ERDs: {str(e)}"
                click.echo(error_msg)
                logger.error(error_msg)
                
    except Exception as e:
        error_msg = f"Error generating type graph: {str(e)}"
        click.echo(error_msg)
        logger.error(error_msg)
        import traceback
        logger.error(f"Traceback: {traceback.format_exc()}")
        raise

In [27]:
generate_type_graph_command()

NameError: name 'generate_type_graph_command' is not defined

In [31]:
def build_adjacency_list(graph_data):
    """Build an adjacency list from the graph data."""
    logger.info("build_adjacency_list")
    try:
        # Extract vertices and edges
        vertices = graph_data["graph"]["m_vertices"]
        edges = graph_data["graph"]["m_edges"]
        logger.info(f"Found the following vertices: {len(vertices)}")
        logger.info(f"Found the following edges: {len(edges)}")
        
        # Create mapping of IDs to node info
        node_map = {node["id"]: node for node in vertices}
        
        # Build adjacency list (directed graph)
        adj_list = defaultdict(list)
        for edge in edges:
            source = edge["from"]["id"]
            target = edge["to"]["id"]
            adj_list[source].append(target)
        
        return node_map, adj_list
    except KeyError as e:
        raise ValueError(f"Invalid graph data structure: missing key {e}. Check the format of your graph data file.")
    except TypeError as e:
        raise ValueError(f"Invalid graph data structure: {e}. Check the format of your graph data file.")

In [32]:
graph_data = load_graph_data("./realGraph.json")

In [33]:
build_adjacency_list(graph_data=graph_data)

({'CaseCommentAttachment': {'id': 'CaseCommentAttachment',
   'category': 'Entity',
   'data': {'name': 'CaseCommentAttachment',
    'categoryMetadataIdentifier': 'CaseCommentAttachment',
    'package': 'reliabilityAssetCase'}},
  'FailureRecommendationRelation': {'id': 'FailureRecommendationRelation',
   'category': 'Entity',
   'data': {'name': 'FailureRecommendationRelation',
    'categoryMetadataIdentifier': 'FailureRecommendationRelation',
    'package': 'failureMechanismRecommendation'}},
  'DataValidationRuleset': {'id': 'DataValidationRuleset',
   'category': 'Entity',
   'data': {'name': 'DataValidationRuleset',
    'categoryMetadataIdentifier': 'DataValidationRuleset',
    'package': 'dataValidation'}},
  'Tas.Response': {'id': 'Tas.Response',
   'category': 'Entity',
   'data': {'name': 'Tas.Response',
    'categoryMetadataIdentifier': 'Tas.Response',
    'package': 'platform'}},
  'MasterSensor': {'id': 'MasterSensor',
   'category': 'Entity',
   'data': {'name': 'MasterSen

In [34]:
node_map, adj_list = build_adjacency_list(graph_data)

In [35]:
adj_list

defaultdict(list,
            {'AircraftAssetMaintenanceRecommendation': ['PandaUser',
              'ForecastedAircraftPart',
              'AircraftModel',
              'Aircraft',
              'NationalStockNumber',
              'ReliabilityCase',
              'UserAircraftAssetMaintenanceRecommendationRelation'],
             'StagingRemisOffEquipmentWorkOrderTable': ['StagingRemisOnEquipmentWorkOrderTable',
              'StagingRemisEndItemTable'],
             'AdjudicatedMaintenanceAction': ['PandaMlMetadata',
              'ReliabilityCase',
              'Position',
              'AdjudicatedMaintenanceEvent',
              'PandaInferenceJob',
              'MaintenanceAction',
              'ActionTakenCode',
              'MaintenanceEvent'],
             'EnvReport': ['EnvLog'],
             'ExpectedSensor': ['SensorBounds',
              'ReliabilityAssetSensorRelation',
              'FailureModeLineItem',
              'Unit',
              'SensorValueMapping',
 

In [45]:
top_nodes = find_top_level_nodes(node_map, adj_list, 50)

In [46]:
top_nodes

['SandboxMdmDatasetToExcisionRuleMapping',
 'SandboxMdmDataset',
 'SandboxMdmDatasetToTrdMapping',
 'WucToMdmWucCorrectionRulesMapping',
 'MdmTrdExcRuleJob',
 'MdmGetTrdsAndExcRulesJob',
 'MdmWucCorrectionRule',
 'RawRemisData',
 'MdmExcisionRule',
 'MdmDataRuleVersion',
 'TransformedRemisData',
 'MdmDatasetToExcisionRuleMapping',
 'MdmDataset',
 'SandboxTransformedRemisDataToMdmDataRuleRelation',
 'MdmDataRule',
 'SandboxRawRemisData',
 'TransformedRemisDataToMdmDataRuleRelation',
 'MdmDatasetToTrdMapping',
 'SandboxTransformedRemisData',
 'SandboxSampleFile',
 'ProtectionLoop',
 'OperationFileTracker',
 'ProtectionLoopSensorRelation',
 'ProtectionLoopAlert',
 'FailureRecommendationRelation',
 'CommentBoard',
 'PandaMail',
 'CommentBoardReply',
 'ReadinessRiskInferenceLog',
 'ReliabilityAssetClassDefinitionParserJobBatchResult',
 'ReliabilityAssetClassDefinitionParserJob',
 'FailureModeRelation',
 'MasterSensor',
 'MaintenanceDataSourceSpec',
 'FacilityReliabilityAssetRelation',
 'Typ

In [51]:
hierarchy = build_hierarchical_graph(top_nodes, node_map, adj_list, 3)

In [52]:
hierarchy

{'SandboxMdmDatasetToExcisionRuleMapping': {'SandboxMdmDatasetToExcisionRuleMapping': {'node_info': {'id': 'SandboxMdmDatasetToExcisionRuleMapping',
    'category': 'Entity',
    'data': {'name': 'SandboxMdmDatasetToExcisionRuleMapping',
     'categoryMetadataIdentifier': 'SandboxMdmDatasetToExcisionRuleMapping',
     'package': 'panda'}},
   'children': {'SandboxMdmDataset': {'node_info': {'id': 'SandboxMdmDataset',
      'category': 'Entity',
      'data': {'name': 'SandboxMdmDataset',
       'categoryMetadataIdentifier': 'SandboxMdmDataset',
       'package': 'panda'}},
     'children': {'SandboxSampleFile': {'node_info': {'id': 'SandboxSampleFile',
        'category': 'Entity',
        'data': {'name': 'SandboxSampleFile',
         'categoryMetadataIdentifier': 'SandboxSampleFile',
         'package': 'panda'}},
       'children': {'SourceFile': {'node_info': {'id': 'SourceFile',
          'category': 'Entity',
          'data': {'name': 'SourceFile',
           'categoryMetadataId

In [55]:
top_nodes_with_counts = [(node, count_descendants(node, adj_list)) for node in top_nodes]

In [56]:
top_nodes_with_counts

[('SandboxMdmDatasetToExcisionRuleMapping', 267),
 ('SandboxMdmDataset', 267),
 ('SandboxMdmDatasetToTrdMapping', 267),
 ('WucToMdmWucCorrectionRulesMapping', 265),
 ('MdmTrdExcRuleJob', 264),
 ('MdmGetTrdsAndExcRulesJob', 264),
 ('MdmWucCorrectionRule', 264),
 ('RawRemisData', 263),
 ('MdmExcisionRule', 263),
 ('MdmDataRuleVersion', 263),
 ('TransformedRemisData', 263),
 ('MdmDatasetToExcisionRuleMapping', 263),
 ('MdmDataset', 263),
 ('SandboxTransformedRemisDataToMdmDataRuleRelation', 263),
 ('MdmDataRule', 263),
 ('SandboxRawRemisData', 263),
 ('TransformedRemisDataToMdmDataRuleRelation', 263),
 ('MdmDatasetToTrdMapping', 263),
 ('SandboxTransformedRemisData', 263),
 ('SandboxSampleFile', 251),
 ('ProtectionLoop', 250),
 ('OperationFileTracker', 250),
 ('ProtectionLoopSensorRelation', 250),
 ('ProtectionLoopAlert', 250),
 ('FailureRecommendationRelation', 249),
 ('CommentBoard', 249),
 ('PandaMail', 249),
 ('CommentBoardReply', 249),
 ('ReadinessRiskInferenceLog', 249),
 ('Reliabil

In [58]:
hierarchy, top_nodes = transform_graph(graph_data, 3, 50)

NameError: name 'max_depth' is not defined

In [65]:
from datetime import datetime
name = f"realGraph_.json"

In [74]:
time_name = str(datetime.now()).split(" ")[1]

In [75]:
output_file = f"realGraph_{time_name}.json"

In [59]:
def _do_generate_type_graph(output_file=output_file, analyze=False, js_code=None):
    """Generate a type graph from MCLI and optionally save/analyze it.
    
    Args:
        output_file: File to save the graph to (optional)
        analyze: Whether to analyze the graph after generating it
        js_code: Optional JavaScript code to execute instead of default graph generation
    
    Returns:
        Path to the saved graph file if saved, None otherwise
    """
    try:
        
        mcli_mngr = MCLIManager(env_url="https://stgawsrso.mcli.ai/lefvdev/panda009")
        mcli = mcli_mngr.mcli_as_basic_user()
        
        # Default JavaScript code to execute if none is provided
        if js_code is None:
            js_code = """
            var realGraph = GlobalCanvasCommonGraphUtil.mergeGraphs([
              {
                action: {
                  actionName: 'relationshipGraph',
                  typeName: 'DataModelGraphManager',
                },
                id: 'DataModelGraphRegistration',
                type: 'GlobalCanvasGraphRegistration',
              },
            ], [{
                id: 'Entity',
              }]);
            realGraph;  // Return the result
            """
            logger.info("Using default JavaScript code to generate graph")
        else:
            logger.info("Using custom JavaScript code to generate graph")
        
        logger.info("Executing JavaScript to generate graph...")
        merged_graph = mcli.Js.exec(js_code)
        if not merged_graph.graph:
            logger.error("JavaScript execution returned no data")
            raise ValueError("JavaScript execution returned no data")
            
        # Get basic statistics about the graph
        num_vertices = len(merged_graph.graph.m_vertices)
        num_edges = len(merged_graph.graph.m_edges)
        logger.info(f"Merged graph contains {num_vertices} vertices and {num_edges} edges")
        
        if output_file:
            # Make sure the parent directory exists
            output_path = Path(output_file)
            if not output_path.parent.exists():
                output_path.parent.mkdir(parents=True, exist_ok=True)
                logger.info(f"Created parent directory for output file: {output_path.parent}")
                
            # Save the graph to a file
            try:
                with open(output_file, 'w') as f:
                    json.dump(merged_graph.toJson(), f, indent=2)
                logger.info(f"Saved graph to {output_file}")
                
                if analyze:
                    # Analyze the graph
                    logger.info(f"Analyzing graph for hierarchical exports")
                    results = analyze_graph_for_hierarchical_exports(output_file)
                    
                    if results:
                        logger.info(f"Top 5 nodes by descendant count:")
                        for node in results:
                            logger.info(f"{node['id']} - {node['descendant_count']} descendants")
                    else:
                        logger.warning("No top nodes found in the graph.")
                        
                return output_file
            except Exception as e:
                logger.error(f"Error saving graph: {e}")
                raise
        
        return merged_graph
    except Exception as e:
        logger.error(f"Error in _do_generate_type_graph: {e}")
        import traceback
        logger.error(f"Traceback: {traceback.format_exc()}")
        raise

In [79]:
files = generate_erd_for_top_nodes("realGraph.json", max_depth=3, top_n=50)

In [80]:
files

[]

In [81]:
  node_map, adj_list = build_adjacency_list(graph_data)

In [82]:
# Count descendants for each node
descendant_counts = {}
for node_id in node_map:
    descendant_counts[node_id] = count_descendants(node_id, adj_list)

In [83]:
descendant_counts

{'CaseCommentAttachment': 1,
 'FailureRecommendationRelation': 249,
 'DataValidationRuleset': 3,
 'Tas.Response': 2,
 'MasterSensor': 248,
 'MlProject': 19,
 'Tas.KnownIssue': 1,
 'VectorStore.Collection': 1,
 'DataValidationRun': 7,
 'SystemUpdate': 1,
 'MlModel.Monitor.Result': 16,
 'PrivilegeBase': 1,
 'UiSdlPageGroup': 1,
 'ControlValvesModelableBase': 1,
 'SensorBounds': 247,
 'FinancialAsset': 1,
 'TestUser': 1,
 'ReliabilityCaseSegment': 247,
 'TypeId': 1,
 'MetricProfilerSpec': 1,
 'ReliabilitySyntheticData.ReliabilityRisk.DataGenerationJob': 1,
 'BaseCodeAnalysis.Metric.HardcodedCredentials.RegexRule': 1,
 'SbaDartIngestSensorBatchJob': 1,
 'MlWorkflowCoordinatableResult': 2,
 'GanttChartDemoResourceToCalendar': 8,
 'Exchange': 1,
 'Employee': 20,
 'RateData': 1,
 'SourceTarget': 2,
 'Action.Spy.Claim': 1,
 'PendingRefreshUniqueIndexesReq': 1,
 'ReliabilityRiskMl.Reconstruction.LightGBMPipe': 1,
 'ModelRegistry.ModelDependency': 1,
 'SqlCalcFieldsQueueEntry': 1,
 'PhysicalAsse

In [84]:
# Sort nodes by descendant count
sorted_nodes = sorted(descendant_counts.items(), key=lambda x: x[1], reverse=True)

In [86]:
# Return top N nodes
top_nodes = [(node_id, count) for node_id, count in sorted_nodes[:50]]

In [87]:
top_nodes = [(node_id, count) for node_id, count in sorted_nodes[:50]]

In [88]:
top_nodes

[('SandboxMdmDatasetToExcisionRuleMapping', 267),
 ('SandboxMdmDataset', 267),
 ('SandboxMdmDatasetToTrdMapping', 267),
 ('WucToMdmWucCorrectionRulesMapping', 265),
 ('MdmTrdExcRuleJob', 264),
 ('MdmGetTrdsAndExcRulesJob', 264),
 ('MdmWucCorrectionRule', 264),
 ('RawRemisData', 263),
 ('MdmExcisionRule', 263),
 ('MdmDataRuleVersion', 263),
 ('TransformedRemisData', 263),
 ('MdmDatasetToExcisionRuleMapping', 263),
 ('MdmDataset', 263),
 ('SandboxTransformedRemisDataToMdmDataRuleRelation', 263),
 ('MdmDataRule', 263),
 ('SandboxRawRemisData', 263),
 ('TransformedRemisDataToMdmDataRuleRelation', 263),
 ('MdmDatasetToTrdMapping', 263),
 ('SandboxTransformedRemisData', 263),
 ('SandboxSampleFile', 251),
 ('ProtectionLoop', 250),
 ('OperationFileTracker', 250),
 ('ProtectionLoopSensorRelation', 250),
 ('ProtectionLoopAlert', 250),
 ('FailureRecommendationRelation', 249),
 ('CommentBoard', 249),
 ('PandaMail', 249),
 ('CommentBoardReply', 249),
 ('ReadinessRiskInferenceLog', 249),
 ('Reliabil