# Semantic Knowledge Graph Exploration

This notebook is dedicated to exploring and analyzing the trained semantic knowledge graph stored in `nx_semantic_final.graphml`. The graph has been enriched through hyper-training with semantic features, centrality measures, and relationship confidence scores.

## Key Features to Explore:
- Graph structure and statistics
- Semantic relationships and patterns
- Centrality analysis and influential concepts
- Relationship types and their distributions
- Concept clustering and communities
- Advanced semantic queries and path finding

## 1. Setup and Data Loading

In [None]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, defaultdict
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as pyo
from tqdm import tqdm
import json
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
pyo.init_notebook_mode(connected=True)

print("Libraries loaded successfully!")

In [None]:
# Load the trained semantic graph
graph_path = r"c:\Users\erich\OneDrive\Documents\Python Projects\Semantica-Full-Reasoning-Chatbot\Data\Output\nx_semantic_final.graphml"

print("Loading semantic knowledge graph...")
try:
    G = nx.read_graphml(graph_path)
    print(f"✅ Graph loaded successfully!")
    print(f"📊 Nodes: {G.number_of_nodes():,}")
    print(f"📊 Edges: {G.number_of_edges():,}")
    print(f"📊 Graph type: {type(G).__name__}")
    print(f"📊 Is directed: {G.is_directed()}")
except Exception as e:
    print(f"❌ Error loading graph: {e}")
    G = None

## 2. Graph Overview and Basic Statistics

In [None]:
def analyze_graph_structure(G):
    """Comprehensive analysis of graph structure"""
    if G is None:
        print("Graph not loaded!")
        return
    
    print("=== GRAPH STRUCTURE ANALYSIS ===")
    print(f"Nodes: {G.number_of_nodes():,}")
    print(f"Edges: {G.number_of_edges():,}")
    print(f"Density: {nx.density(G):.6f}")
    print(f"Is Connected: {nx.is_connected(G) if not G.is_directed() else 'N/A (directed)'}")
    
    if G.is_directed():
        print(f"Is Weakly Connected: {nx.is_weakly_connected(G)}")
        print(f"Number of SCCs: {nx.number_strongly_connected_components(G)}")
        print(f"Number of WCCs: {nx.number_weakly_connected_components(G)}")
    
    # Degree statistics
    degrees = [d for n, d in G.degree()]
    print(f"\n=== DEGREE STATISTICS ===")
    print(f"Average degree: {np.mean(degrees):.2f}")
    print(f"Median degree: {np.median(degrees):.2f}")
    print(f"Max degree: {np.max(degrees)}")
    print(f"Min degree: {np.min(degrees)}")
    
    # Node and edge attributes
    if G.nodes():
        sample_node = list(G.nodes())[0]
        print(f"\n=== NODE ATTRIBUTES ===")
        print(f"Sample node: {sample_node}")
        print(f"Node attributes: {list(G.nodes[sample_node].keys())}")
    
    if G.edges():
        sample_edge = list(G.edges())[0]
        print(f"\n=== EDGE ATTRIBUTES ===")
        print(f"Sample edge: {sample_edge}")
        print(f"Edge attributes: {list(G.edges[sample_edge].keys())}")

analyze_graph_structure(G)

In [None]:
# Visualize degree distribution
def plot_degree_distribution(G):
    """Plot degree distribution with both linear and log scales"""
    if G is None:
        return
    
    degrees = [d for n, d in G.degree()]
    degree_counts = Counter(degrees)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Linear scale
    ax1.hist(degrees, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
    ax1.set_xlabel('Degree')
    ax1.set_ylabel('Frequency')
    ax1.set_title('Degree Distribution (Linear Scale)')
    ax1.grid(True, alpha=0.3)
    
    # Log scale
    degrees_vals, counts = zip(*sorted(degree_counts.items()))
    ax2.loglog(degrees_vals, counts, 'o-', alpha=0.7, color='coral')
    ax2.set_xlabel('Degree (log scale)')
    ax2.set_ylabel('Frequency (log scale)')
    ax2.set_title('Degree Distribution (Log-Log Scale)')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print top nodes by degree
    top_nodes = sorted(G.degree(), key=lambda x: x[1], reverse=True)[:10]
    print("\n=== TOP 10 NODES BY DEGREE ===")
    for node, degree in top_nodes:
        print(f"{node}: {degree}")

plot_degree_distribution(G)

## 3. Semantic Relationship Analysis

In [None]:
def analyze_relationship_types(G):
    """Analyze the distribution of relationship types in the graph"""
    if G is None:
        return
    
    # Extract relationship types
    relations = []
    for u, v, data in G.edges(data=True):
        if 'relation' in data:
            relations.append(data['relation'])
        elif 'rel' in data:
            relations.append(data['rel'])
    
    if not relations:
        print("No relation attributes found in edges.")
        return
    
    relation_counts = Counter(relations)
    
    print(f"=== RELATIONSHIP TYPE ANALYSIS ===")
    print(f"Total unique relation types: {len(relation_counts)}")
    print(f"Total relationships: {len(relations):,}")
    
    # Top 20 most common relations
    print("\n=== TOP 20 RELATIONSHIP TYPES ===")
    for rel, count in relation_counts.most_common(20):
        percentage = (count / len(relations)) * 100
        print(f"{rel}: {count:,} ({percentage:.2f}%)")
    
    return relation_counts

relation_counts = analyze_relationship_types(G)

In [None]:
# Visualize relationship distribution
def plot_relationship_distribution(relation_counts, top_n=15):
    """Plot the distribution of relationship types"""
    if relation_counts is None:
        return
    
    # Get top N relationships
    top_relations = relation_counts.most_common(top_n)
    relations, counts = zip(*top_relations)
    
    # Create interactive plot with Plotly
    fig = px.bar(
        x=list(relations), 
        y=list(counts),
        title=f'Top {top_n} Relationship Types Distribution',
        labels={'x': 'Relationship Type', 'y': 'Count'},
        color=list(counts),
        color_continuous_scale='Viridis'
    )
    
    fig.update_layout(
        xaxis_tickangle=-45,
        height=600,
        showlegend=False
    )
    
    fig.show()
    
    # Also create a pie chart for proportions
    fig_pie = px.pie(
        values=list(counts), 
        names=list(relations),
        title=f'Top {top_n} Relationship Types Distribution (Proportions)'
    )
    
    fig_pie.update_traces(textposition='inside', textinfo='percent+label')
    fig_pie.update_layout(height=700)
    fig_pie.show()

if relation_counts:
    plot_relationship_distribution(relation_counts)

## 4. Centrality Analysis and Important Concepts

In [None]:
def analyze_centrality_measures(G, sample_size=1000):
    """Analyze centrality measures for important concepts"""
    if G is None:
        return
    
    print("=== CENTRALITY ANALYSIS ===")
    
    # For large graphs, sample nodes for expensive centrality measures
    if G.number_of_nodes() > sample_size:
        print(f"Sampling {sample_size} nodes for centrality analysis...")
        sample_nodes = np.random.choice(list(G.nodes()), sample_size, replace=False)
        G_sample = G.subgraph(sample_nodes)
    else:
        G_sample = G
    
    centrality_measures = {}
    
    # Degree centrality (fast)
    print("Computing degree centrality...")
    centrality_measures['degree'] = nx.degree_centrality(G)
    
    # Betweenness centrality (expensive, use sample)
    print("Computing betweenness centrality...")
    centrality_measures['betweenness'] = nx.betweenness_centrality(G_sample)
    
    # Closeness centrality (expensive, use sample)
    print("Computing closeness centrality...")
    centrality_measures['closeness'] = nx.closeness_centrality(G_sample)
    
    # Eigenvector centrality (can be expensive)
    try:
        print("Computing eigenvector centrality...")
        centrality_measures['eigenvector'] = nx.eigenvector_centrality(G_sample, max_iter=100)
    except:
        print("Eigenvector centrality failed - graph may not be connected")
    
    # Print top concepts for each centrality measure
    for measure, values in centrality_measures.items():
        print(f"\n=== TOP 10 CONCEPTS BY {measure.upper()} CENTRALITY ===")
        top_concepts = sorted(values.items(), key=lambda x: x[1], reverse=True)[:10]
        for concept, score in top_concepts:
            print(f"{concept}: {score:.6f}")
    
    return centrality_measures

centrality_measures = analyze_centrality_measures(G)

In [None]:
# Visualize centrality distributions
def plot_centrality_distributions(centrality_measures):
    """Plot distributions of centrality measures"""
    if not centrality_measures:
        return
    
    n_measures = len(centrality_measures)
    fig, axes = plt.subplots(1, n_measures, figsize=(5*n_measures, 5))
    
    if n_measures == 1:
        axes = [axes]
    
    for i, (measure, values) in enumerate(centrality_measures.items()):
        centrality_scores = list(values.values())
        
        axes[i].hist(centrality_scores, bins=50, alpha=0.7, edgecolor='black')
        axes[i].set_xlabel(f'{measure.title()} Centrality')
        axes[i].set_ylabel('Frequency')
        axes[i].set_title(f'{measure.title()} Centrality Distribution')
        axes[i].grid(True, alpha=0.3)
        
        # Add statistics
        mean_val = np.mean(centrality_scores)
        median_val = np.median(centrality_scores)
        axes[i].axvline(mean_val, color='red', linestyle='--', label=f'Mean: {mean_val:.4f}')
        axes[i].axvline(median_val, color='orange', linestyle='--', label=f'Median: {median_val:.4f}')
        axes[i].legend()
    
    plt.tight_layout()
    plt.show()

plot_centrality_distributions(centrality_measures)

## 5. Semantic Path Finding and Concept Exploration

In [None]:
def find_semantic_paths(G, source, target, max_paths=5):
    """Find semantic paths between two concepts"""
    if G is None:
        return []
    
    if source not in G.nodes() or target not in G.nodes():
        print(f"Error: '{source}' or '{target}' not found in graph")
        return []
    
    try:
        # Find shortest paths
        paths = list(nx.all_shortest_paths(G, source, target))
        paths = paths[:max_paths]  # Limit number of paths
        
        print(f"=== SEMANTIC PATHS FROM '{source}' TO '{target}' ===")
        print(f"Found {len(paths)} shortest path(s) of length {len(paths[0])-1 if paths else 0}")
        
        for i, path in enumerate(paths, 1):
            print(f"\nPath {i}:")
            for j in range(len(path)-1):
                u, v = path[j], path[j+1]
                edge_data = G.edges[u, v] if G.has_edge(u, v) else {}
                relation = edge_data.get('relation', edge_data.get('rel', 'unknown'))
                weight = edge_data.get('weight', 1.0)
                print(f"  {u} --[{relation}|{weight:.3f}]--> {v}")
        
        return paths
    
    except nx.NetworkXNoPath:
        print(f"No path found between '{source}' and '{target}'")
        return []
    except Exception as e:
        print(f"Error finding paths: {e}")
        return []

# Example: Find paths between related concepts
example_paths = [
    ("/c/en/dog", "/c/en/animal"),
    ("/c/en/happy", "/c/en/sad"),
    ("/c/en/car", "/c/en/transportation"),
    ("/c/en/book", "/c/en/knowledge")
]

for source, target in example_paths:
    paths = find_semantic_paths(G, source, target)
    print("\n" + "="*80 + "\n")

In [None]:
def explore_concept_neighborhood(G, concept, max_neighbors=20):
    """Explore the semantic neighborhood of a concept"""
    if G is None or concept not in G.nodes():
        print(f"Concept '{concept}' not found in graph")
        return
    
    print(f"=== EXPLORING CONCEPT: '{concept}' ===")
    
    # Get neighbors
    neighbors = list(G.neighbors(concept))
    print(f"Total neighbors: {len(neighbors)}")
    
    # Analyze relationships by type
    relationship_groups = defaultdict(list)
    
    for neighbor in neighbors:
        edge_data = G.edges[concept, neighbor] if G.has_edge(concept, neighbor) else G.edges[neighbor, concept]
        relation = edge_data.get('relation', edge_data.get('rel', 'unknown'))
        weight = edge_data.get('weight', 1.0)
        relationship_groups[relation].append((neighbor, weight))
    
    # Display relationships grouped by type
    for relation, neighbors_list in sorted(relationship_groups.items()):
        print(f"\n--- {relation.upper()} ({len(neighbors_list)} connections) ---")
        # Sort by weight (confidence) and show top ones
        neighbors_list.sort(key=lambda x: x[1], reverse=True)
        for neighbor, weight in neighbors_list[:10]:  # Show top 10
            print(f"  {neighbor} (weight: {weight:.3f})")
        if len(neighbors_list) > 10:
            print(f"  ... and {len(neighbors_list) - 10} more")
    
    return relationship_groups

# Explore some interesting concepts
interesting_concepts = ["/c/en/love", "/c/en/intelligence", "/c/en/technology", "/c/en/nature"]

for concept in interesting_concepts:
    if G and concept in G.nodes():
        explore_concept_neighborhood(G, concept)
        print("\n" + "="*80 + "\n")
    else:
        print(f"Concept '{concept}' not found in graph")

## 6. Graph Quality Assessment

In [None]:
def assess_graph_quality(G):
    """Assess the quality of semantic enrichments in the graph"""
    if G is None:
        return
    
    print("=== GRAPH QUALITY ASSESSMENT ===")
    
    # Check for enriched attributes
    enriched_edges = 0
    high_confidence_edges = 0
    transitivity_edges = 0
    centrality_enriched = 0
    
    weight_values = []
    confidence_values = []
    
    print("Analyzing edge attributes...")
    for u, v, data in tqdm(G.edges(data=True), desc="Processing edges"):
        if 'weight' in data:
            weight_values.append(data['weight'])
        
        if 'high_confidence' in data and data['high_confidence']:
            high_confidence_edges += 1
        
        if 'transitivity_inferred' in data and data['transitivity_inferred']:
            transitivity_edges += 1
        
        if 'centrality_boosted' in data and data['centrality_boosted']:
            centrality_enriched += 1
        
        # Count edges with multiple enrichments
        enrichment_count = sum([
            'high_confidence' in data,
            'transitivity_inferred' in data,
            'centrality_boosted' in data
        ])
        if enrichment_count > 0:
            enriched_edges += 1
    
    total_edges = G.number_of_edges()
    
    print(f"\n=== ENRICHMENT STATISTICS ===")
    print(f"Total edges: {total_edges:,}")
    print(f"Enriched edges: {enriched_edges:,} ({enriched_edges/total_edges*100:.2f}%)")
    print(f"High confidence edges: {high_confidence_edges:,} ({high_confidence_edges/total_edges*100:.2f}%)")
    print(f"Transitivity inferred: {transitivity_edges:,} ({transitivity_edges/total_edges*100:.2f}%)")
    print(f"Centrality boosted: {centrality_enriched:,} ({centrality_enriched/total_edges*100:.2f}%)")
    
    if weight_values:
        print(f"\n=== WEIGHT STATISTICS ===")
        print(f"Mean weight: {np.mean(weight_values):.4f}")
        print(f"Median weight: {np.median(weight_values):.4f}")
        print(f"Weight std: {np.std(weight_values):.4f}")
        print(f"Min weight: {np.min(weight_values):.4f}")
        print(f"Max weight: {np.max(weight_values):.4f}")
        
        # Plot weight distribution
        plt.figure(figsize=(12, 4))
        
        plt.subplot(1, 2, 1)
        plt.hist(weight_values, bins=50, alpha=0.7, edgecolor='black')
        plt.xlabel('Edge Weight')
        plt.ylabel('Frequency')
        plt.title('Edge Weight Distribution')
        plt.grid(True, alpha=0.3)
        
        plt.subplot(1, 2, 2)
        plt.boxplot(weight_values)
        plt.ylabel('Edge Weight')
        plt.title('Edge Weight Box Plot')
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
    return {
        'total_edges': total_edges,
        'enriched_edges': enriched_edges,
        'high_confidence_edges': high_confidence_edges,
        'transitivity_edges': transitivity_edges,
        'centrality_enriched': centrality_enriched,
        'weight_stats': {
            'mean': np.mean(weight_values) if weight_values else None,
            'median': np.median(weight_values) if weight_values else None,
            'std': np.std(weight_values) if weight_values else None
        }
    }

quality_stats = assess_graph_quality(G)

## 7. Advanced Semantic Queries

In [None]:
def semantic_query_builder(G):
    """Advanced semantic query functions"""
    
    def find_concepts_by_relation(relation_type, top_n=20):
        """Find most connected concepts for a specific relation type"""
        concept_counts = defaultdict(int)
        
        for u, v, data in G.edges(data=True):
            edge_relation = data.get('relation', data.get('rel', ''))
            if relation_type.lower() in edge_relation.lower():
                concept_counts[u] += 1
                concept_counts[v] += 1
        
        top_concepts = sorted(concept_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]
        return top_concepts
    
    def find_bridge_concepts(concept1, concept2, max_hops=3):
        """Find concepts that bridge two other concepts"""
        if concept1 not in G.nodes() or concept2 not in G.nodes():
            return []
        
        bridge_concepts = []
        
        # Find all paths up to max_hops
        try:
            for path in nx.all_simple_paths(G, concept1, concept2, cutoff=max_hops):
                if len(path) > 2:  # Has intermediate concepts
                    bridge_concepts.extend(path[1:-1])  # Exclude source and target
        except nx.NetworkXNoPath:
            pass
        
        # Count frequency of bridge concepts
        bridge_counts = Counter(bridge_concepts)
        return bridge_counts.most_common(10)
    
    def find_concept_clusters(relation_type, min_cluster_size=5):
        """Find clusters of concepts connected by specific relation types"""
        # Create subgraph with only specified relation type
        edges_to_include = []
        for u, v, data in G.edges(data=True):
            edge_relation = data.get('relation', data.get('rel', ''))
            if relation_type.lower() in edge_relation.lower():
                edges_to_include.append((u, v))
        
        if not edges_to_include:
            return []
        
        subgraph = G.edge_subgraph(edges_to_include)
        
        # Find connected components
        if G.is_directed():
            components = list(nx.weakly_connected_components(subgraph))
        else:
            components = list(nx.connected_components(subgraph))
        
        # Filter by minimum size
        large_components = [comp for comp in components if len(comp) >= min_cluster_size]
        
        return sorted(large_components, key=len, reverse=True)
    
    return {
        'find_concepts_by_relation': find_concepts_by_relation,
        'find_bridge_concepts': find_bridge_concepts,
        'find_concept_clusters': find_concept_clusters
    }

if G:
    query_functions = semantic_query_builder(G)
    print("Semantic query functions created successfully!")
    print("Available functions:")
    for func_name in query_functions.keys():
        print(f"  - {func_name}")
else:
    print("Graph not loaded - cannot create query functions")

In [None]:
# Example semantic queries
if G and 'query_functions' in locals():
    # Query 1: Find concepts most associated with "IsA" relations
    print("=== CONCEPTS MOST CONNECTED BY 'IsA' RELATIONS ===")
    isa_concepts = query_functions['find_concepts_by_relation']('isa', top_n=15)
    for concept, count in isa_concepts:
        print(f"{concept}: {count} connections")
    
    print("\n" + "="*80 + "\n")
    
    # Query 2: Find bridge concepts between related concepts
    print("=== BRIDGE CONCEPTS BETWEEN 'HAPPINESS' AND 'SUCCESS' ===")
    bridges = query_functions['find_bridge_concepts']('/c/en/happiness', '/c/en/success')
    for bridge, count in bridges:
        print(f"{bridge}: appears in {count} bridging paths")
    
    print("\n" + "="*80 + "\n")
    
    # Query 3: Find concept clusters
    print("=== CONCEPT CLUSTERS CONNECTED BY 'RelatedTo' RELATIONS ===")
    clusters = query_functions['find_concept_clusters']('relatedto', min_cluster_size=10)
    for i, cluster in enumerate(clusters[:5], 1):  # Show top 5 clusters
        print(f"\nCluster {i} ({len(cluster)} concepts):")
        sample_concepts = list(cluster)[:10]  # Show first 10 concepts
        print(f"  {', '.join(sample_concepts)}")
        if len(cluster) > 10:
            print(f"  ... and {len(cluster) - 10} more concepts")
else:
    print("Cannot run semantic queries - graph not loaded or query functions not available")

## 8. Interactive Concept Explorer

In [None]:
def interactive_concept_explorer(G):
    """Interactive function to explore concepts"""
    if G is None:
        print("Graph not loaded!")
        return
    
    print("🔍 INTERACTIVE CONCEPT EXPLORER")
    print("Enter a concept to explore (e.g., '/c/en/love' or just 'love')")
    print("Type 'quit' to exit, 'help' for commands")
    
    while True:
        user_input = input("\nEnter concept: ").strip()
        
        if user_input.lower() == 'quit':
            break
        elif user_input.lower() == 'help':
            print("\nAvailable commands:")
            print("  - Enter concept name to explore its neighborhood")
            print("  - 'search <term>' to find concepts containing term")
            print("  - 'path <concept1> <concept2>' to find paths between concepts")
            print("  - 'stats' to show graph statistics")
            print("  - 'quit' to exit")
            continue
        elif user_input.lower().startswith('search '):
            search_term = user_input[7:].strip()
            matching_concepts = [node for node in G.nodes() if search_term.lower() in node.lower()]
            print(f"\nFound {len(matching_concepts)} concepts containing '{search_term}':")
            for concept in matching_concepts[:20]:  # Show first 20
                print(f"  {concept}")
            if len(matching_concepts) > 20:
                print(f"  ... and {len(matching_concepts) - 20} more")
            continue
        elif user_input.lower().startswith('path '):
            parts = user_input[5:].strip().split()
            if len(parts) >= 2:
                concept1, concept2 = parts[0], parts[1]
                # Auto-format if needed
                if not concept1.startswith('/c/en/'):
                    concept1 = f'/c/en/{concept1}'
                if not concept2.startswith('/c/en/'):
                    concept2 = f'/c/en/{concept2}'
                find_semantic_paths(G, concept1, concept2, max_paths=3)
            else:
                print("Please provide two concepts: path <concept1> <concept2>")
            continue
        elif user_input.lower() == 'stats':
            print(f"\nGraph Statistics:")
            print(f"  Nodes: {G.number_of_nodes():,}")
            print(f"  Edges: {G.number_of_edges():,}")
            print(f"  Density: {nx.density(G):.6f}")
            continue
        
        # Regular concept exploration
        concept = user_input
        
        # Auto-format concept if needed
        if not concept.startswith('/c/en/') and not concept.startswith('/c/'):
            concept = f'/c/en/{concept}'
        
        if concept in G.nodes():
            explore_concept_neighborhood(G, concept, max_neighbors=15)
        else:
            print(f"\nConcept '{concept}' not found in graph.")
            # Suggest similar concepts
            similar = [node for node in G.nodes() if user_input.lower() in node.lower()]
            if similar:
                print("Did you mean one of these?")
                for suggestion in similar[:10]:
                    print(f"  {suggestion}")

# Note: This function requires user input, so it's better suited for interactive use
print("Interactive concept explorer function defined.")
print("Call interactive_concept_explorer(G) to start exploring!")
print("\nExample concepts to try:")
print("  - love, happiness, intelligence, technology")
print("  - dog, animal, car, book, music")

## 9. Export and Summary

In [None]:
def export_analysis_results(G, centrality_measures, quality_stats, relation_counts):
    """Export analysis results to files"""
    output_dir = r"c:\Users\erich\OneDrive\Documents\Python Projects\Semantica-Full-Reasoning-Chatbot\Data\Output"
    
    # Create analysis summary
    analysis_summary = {
        'graph_stats': {
            'nodes': G.number_of_nodes() if G else 0,
            'edges': G.number_of_edges() if G else 0,
            'density': nx.density(G) if G else 0,
            'is_directed': G.is_directed() if G else False
        },
        'quality_stats': quality_stats,
        'relation_distribution': dict(relation_counts.most_common(50)) if relation_counts else {},
        'top_concepts_by_centrality': {}
    }
    
    # Add top concepts for each centrality measure
    if centrality_measures:
        for measure, values in centrality_measures.items():
            top_concepts = sorted(values.items(), key=lambda x: x[1], reverse=True)[:20]
            analysis_summary['top_concepts_by_centrality'][measure] = dict(top_concepts)
    
    # Save analysis summary
    summary_file = f"{output_dir}/semantic_graph_analysis_summary.json"
    try:
        with open(summary_file, 'w', encoding='utf-8') as f:
            json.dump(analysis_summary, f, indent=2, ensure_ascii=False)
        print(f"✅ Analysis summary saved to: {summary_file}")
    except Exception as e:
        print(f"❌ Error saving analysis summary: {e}")
    
    # Export top concepts by centrality as CSV
    if centrality_measures:
        centrality_df_data = []
        for measure, values in centrality_measures.items():
            for concept, score in values.items():
                centrality_df_data.append({
                    'concept': concept,
                    'centrality_measure': measure,
                    'score': score
                })
        
        centrality_df = pd.DataFrame(centrality_df_data)
        centrality_file = f"{output_dir}/concept_centrality_scores.csv"
        try:
            centrality_df.to_csv(centrality_file, index=False, encoding='utf-8')
            print(f"✅ Centrality scores saved to: {centrality_file}")
        except Exception as e:
            print(f"❌ Error saving centrality scores: {e}")
    
    # Export relationship distribution as CSV
    if relation_counts:
        relation_df = pd.DataFrame([
            {'relation': rel, 'count': count, 'percentage': count/sum(relation_counts.values())*100}
            for rel, count in relation_counts.most_common()
        ])
        relation_file = f"{output_dir}/relationship_distribution.csv"
        try:
            relation_df.to_csv(relation_file, index=False, encoding='utf-8')
            print(f"✅ Relationship distribution saved to: {relation_file}")
        except Exception as e:
            print(f"❌ Error saving relationship distribution: {e}")
    
    print("\n📊 Export completed!")

# Export results
if G:
    export_analysis_results(G, centrality_measures, quality_stats, relation_counts)
else:
    print("Cannot export - graph not loaded")

In [None]:
def generate_final_summary(G, centrality_measures, quality_stats, relation_counts):
    """Generate a comprehensive final summary"""
    print("🎯 SEMANTIC KNOWLEDGE GRAPH EXPLORATION SUMMARY")
    print("=" * 60)
    
    if G is None:
        print("❌ Graph could not be loaded")
        return
    
    print(f"📊 GRAPH OVERVIEW:")
    print(f"   • Total concepts (nodes): {G.number_of_nodes():,}")
    print(f"   • Total relationships (edges): {G.number_of_edges():,}")
    print(f"   • Graph density: {nx.density(G):.6f}")
    print(f"   • Graph type: {'Directed' if G.is_directed() else 'Undirected'}")
    
    if quality_stats:
        print(f"\n🔧 SEMANTIC ENRICHMENT QUALITY:")
        total_edges = quality_stats['total_edges']
        print(f"   • Enriched edges: {quality_stats['enriched_edges']:,} ({quality_stats['enriched_edges']/total_edges*100:.1f}%)")
        print(f"   • High confidence: {quality_stats['high_confidence_edges']:,} ({quality_stats['high_confidence_edges']/total_edges*100:.1f}%)")
        print(f"   • Transitivity inferred: {quality_stats['transitivity_edges']:,} ({quality_stats['transitivity_edges']/total_edges*100:.1f}%)")
        print(f"   • Centrality boosted: {quality_stats['centrality_enriched']:,} ({quality_stats['centrality_enriched']/total_edges*100:.1f}%)")
        
        if quality_stats['weight_stats']['mean']:
            print(f"   • Average edge weight: {quality_stats['weight_stats']['mean']:.4f}")
    
    if relation_counts:
        print(f"\n🔗 RELATIONSHIP TYPES:")
        print(f"   • Unique relation types: {len(relation_counts)}")
        print(f"   • Top 5 most common relations:")
        total_relations = sum(relation_counts.values())
        for rel, count in relation_counts.most_common(5):
            percentage = count / total_relations * 100
            print(f"     - {rel}: {count:,} ({percentage:.1f}%)")
    
    if centrality_measures:
        print(f"\n⭐ MOST INFLUENTIAL CONCEPTS:")
        for measure, values in centrality_measures.items():
            top_concept = max(values.items(), key=lambda x: x[1])
            print(f"   • By {measure}: {top_concept[0]} (score: {top_concept[1]:.4f})")
    
    print(f"\n🎉 EXPLORATION CAPABILITIES AVAILABLE:")
    print(f"   • Semantic path finding between concepts")
    print(f"   • Concept neighborhood exploration")
    print(f"   • Advanced semantic queries and clustering")
    print(f"   • Centrality analysis for concept importance")
    print(f"   • Interactive concept exploration tools")
    
    print(f"\n💾 EXPORTED FILES:")
    print(f"   • semantic_graph_analysis_summary.json")
    print(f"   • concept_centrality_scores.csv")
    print(f"   • relationship_distribution.csv")
    
    print("\n" + "=" * 60)
    print("🚀 Semantic knowledge graph exploration complete!")
    print("   Your graph is ready for advanced semantic reasoning tasks.")

# Generate final summary
generate_final_summary(G, centrality_measures, quality_stats, relation_counts)