# Semantica relation GRaph visualized

# Phase 1

In [12]:
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
from matplotlib.patches import FancyArrowPatch
import random
import io
import pandas as pd
from PIL import Image, ImageDraw
import os
import time
from tqdm import tqdm
import uuid
import warnings
import traceback
import json  # Added missing import
from scipy.spatial.distance import cosine  # Added missing import
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend for better memory handling

class ConceptNetProcessor:
    """
    Process ConceptNet data for semantic visualization
    """
    def __init__(self, english_data=None, german_data=None):
        self.english_data = english_data
        self.german_data = german_data
        self.semantic_graph = nx.DiGraph()
        self.concept_vectors = {}
        self.relation_types = set()
        
        print("ConceptNetProcessor initialized")
    
    def clean_concept_name(self, concept_str):
        """Extract clean concept name from ConceptNet format"""
        if not isinstance(concept_str, str):
            return "unknown"
            
        # Extract the concept name from the ConceptNet URI format
        parts = concept_str.split('/')
        if len(parts) >= 4:
            # Format is typically /c/LANG/CONCEPT
            concept = parts[-1]
            # Remove part-of-speech tags if present
            if '/' in concept:
                concept = concept.split('/')[0]
            return concept
        return concept_str
    
    def extract_relation_type(self, relation_str):
        """Extract relation type from ConceptNet format"""
        if not isinstance(relation_str, str):
            return "unknown"
            
        parts = relation_str.split('/')
        if len(parts) >= 3:
            # Format is typically /r/RELATION_TYPE
            return parts[-1]
        return relation_str
    
    def extract_language(self, concept_str):
        """Extract language from ConceptNet concept URI"""
        if not isinstance(concept_str, str):
            return "unknown"
            
        parts = concept_str.split('/')
        if len(parts) >= 4:
            # Format is typically /c/LANG/CONCEPT
            return parts[2]
        return "unknown"
    
    def parse_weight(self, weight_str):
        """Parse weight JSON string to extract numeric weight"""
        if not isinstance(weight_str, str):
            return 1.0
            
        try:
            weight_data = json.loads(weight_str)
            # ConceptNet weights are typically in 'weight' field
            return float(weight_data.get('weight', 1.0))
        except:
            return 1.0
    
    def build_semantic_graph(self, max_concepts=200, min_weight=1.0, sample_size=5000):
        """Build semantic graph from ConceptNet data"""
        print("Building semantic graph from ConceptNet data...")
        
        if self.english_data is None and self.german_data is None:
            print("No ConceptNet data provided.")
            return
        
        # Combine datasets
        all_data = []
        if self.english_data is not None:
            print(f"Processing {len(self.english_data)} English ConceptNet assertions...")
            all_data.append(('en', self.english_data))
        
        if self.german_data is not None:
            print(f"Processing {len(self.german_data)} German ConceptNet assertions...")
            all_data.append(('de', self.german_data))
        
        # Track concepts and their occurrence count
        concept_counts = {}
        
        # Process each language dataset
        for lang, data in all_data:
            # Sample to ensure manageable size if needed
            if len(data) > sample_size:
                data_sample = data.sample(sample_size, random_state=42)
                print(f"Sampled {sample_size} assertions from {len(data)} {lang} assertions")
            else:
                data_sample = data
            
            # Process assertions
            for _, row in tqdm(data_sample.iterrows(), desc=f"Processing {lang} assertions", total=len(data_sample)):
                try:
                    # Extract source and target concepts
                    source_concept = self.clean_concept_name(row['start'])
                    target_concept = self.clean_concept_name(row['end'])
                    
                    # Extract relation type
                    relation_type = self.extract_relation_type(row['rel'])
                    self.relation_types.add(relation_type)
                    
                    # Extract languages
                    source_lang = self.extract_language(row['start'])
                    target_lang = self.extract_language(row['end'])
                    
                    # Parse weight
                    weight = self.parse_weight(row['weight'])
                    
                    # Skip low-weight relationships
                    if weight < min_weight:
                        continue
                    
                    # Track concept occurrences
                    concept_counts[source_concept] = concept_counts.get(source_concept, 0) + 1
                    concept_counts[target_concept] = concept_counts.get(target_concept, 0) + 1
                    
                    # Add to graph
                    self.semantic_graph.add_node(
                        source_concept,
                        lang=source_lang,
                        count=concept_counts[source_concept]
                    )
                    
                    self.semantic_graph.add_node(
                        target_concept,
                        lang=target_lang,
                        count=concept_counts[target_concept]
                    )
                    
                    # Add edge with relation data
                    self.semantic_graph.add_edge(
                        source_concept,
                        target_concept,
                        relation=relation_type,
                        weight=weight
                    )
                    
                except Exception as e:
                    warnings.warn(f"Error processing assertion: {e}")
        
        # Limit to top concepts if needed
        if len(concept_counts) > max_concepts:
            print(f"Limiting graph to top {max_concepts} concepts...")
            top_concepts = sorted(concept_counts.items(), key=lambda x: x[1], reverse=True)[:max_concepts]
            top_concept_names = {c[0] for c in top_concepts}
            
            # Create subgraph with only top concepts
            subgraph = nx.DiGraph()
            
            for node in top_concept_names:
                if self.semantic_graph.has_node(node):
                    subgraph.add_node(
                        node,
                        **self.semantic_graph.nodes[node]
                    )
            
            for source, target, data in self.semantic_graph.edges(data=True):
                if source in top_concept_names and target in top_concept_names:
                    subgraph.add_edge(
                        source,
                        target,
                        **data
                    )
            
            self.semantic_graph = subgraph
        
        print(f"Semantic graph built with {self.semantic_graph.number_of_nodes()} nodes and {self.semantic_graph.number_of_edges()} edges")
        
        # Infer semantic categories
        self.infer_semantic_categories()
        
        return self.semantic_graph
    
    def infer_semantic_categories(self):
        """Infer semantic categories for concepts in the graph"""
        print("Inferring semantic categories...")
        
        # Define category patterns
        category_patterns = {
            'fruit': ['apple', 'banana', 'orange', 'fruit', 'berry', 'apfel'],
            'color': ['red', 'blue', 'green', 'yellow', 'color', 'rot', 'blau', 'grün', 'gelb', 'farbe'],
            'taste': ['sweet', 'sour', 'bitter', 'taste', 'süß', 'sauer', 'geschmack'],
            'vehicle': ['car', 'bus', 'train', 'vehicle', 'auto', 'fahrzeug'],
            'temperature': ['hot', 'cold', 'warm', 'cool', 'heiß', 'kalt', 'temperatur'],
            'size': ['big', 'small', 'large', 'tiny', 'groß', 'klein', 'größe'],
            'time': ['minute', 'hour', 'day', 'week', 'month', 'year', 'zeit', 'tag', 'woche'],
            'animal': ['dog', 'cat', 'bird', 'animal', 'hund', 'katze', 'vogel', 'tier'],
            'person': ['man', 'woman', 'child', 'person', 'mann', 'frau', 'kind'],
            'place': ['city', 'country', 'house', 'room', 'stadt', 'land', 'haus', 'zimmer']
        }
        
        # Use relation types to help infer categories
        relation_category_map = {
            'IsA': 'type',
            'PartOf': 'part',
            'HasA': 'property',
            'UsedFor': 'function',
            'CapableOf': 'capability',
            'AtLocation': 'location',
            'HasProperty': 'property'
        }
        
        # Assign categories
        for node in self.semantic_graph.nodes():
            # Initialize with unknown category
            self.semantic_graph.nodes[node]['category'] = 'generic'
            
            # Check for pattern matches
            node_lower = str(node).lower()
            
            for category, patterns in category_patterns.items():
                if any(pattern in node_lower for pattern in patterns):
                    self.semantic_graph.nodes[node]['category'] = category
                    break
            
            # Use incoming edges to help determine category
            incoming_edges = list(self.semantic_graph.in_edges(node, data=True))
            if incoming_edges:
                for source, _, data in incoming_edges:
                    relation = data.get('relation', '')
                    if relation in relation_category_map:
                        # Use the source node's category for certain relations
                        if relation in ['IsA', 'PartOf'] and 'category' in self.semantic_graph.nodes[source]:
                            source_category = self.semantic_graph.nodes[source]['category']
                            if source_category != 'generic':
                                self.semantic_graph.nodes[node]['category'] = source_category
                                break
        
        # Count categories
        categories = {}
        for node, data in self.semantic_graph.nodes(data=True):
            category = data.get('category', 'generic')
            categories[category] = categories.get(category, 0) + 1
        
        print("Inferred categories:", categories)
    
    def generate_concept_vectors(self, dimensions=5):
        """Generate semantic vectors for concepts in the graph"""
        print(f"Generating {dimensions}-dimensional concept vectors...")
        
        if not self.semantic_graph:
            print("No semantic graph available.")
            return {}
        
        # Group concepts by category
        concepts_by_category = {}
        for node, data in self.semantic_graph.nodes(data=True):
            category = data.get('category', 'generic')
            if category not in concepts_by_category:
                concepts_by_category[category] = []
            concepts_by_category[category].append(node)
        
        # Generate base vectors for each category
        category_base_vectors = {}
        for category in concepts_by_category.keys():
            # Use deterministic seed for consistency, but ensure it's within valid range
            seed_value = abs(hash(category)) % (2**32 - 1)  # Safe seed range for NumPy
            np.random.seed(seed_value)
            category_base_vectors[category] = np.random.randn(dimensions)
            # Normalize to unit length
            category_base_vectors[category] = category_base_vectors[category] / np.linalg.norm(category_base_vectors[category])
        
        # Generate vectors for concepts within each category
        for category, concepts in concepts_by_category.items():
            base_vector = category_base_vectors[category]
            
            for concept in concepts:
                # Get node data
                node_data = self.semantic_graph.nodes[concept]
                count = node_data.get('count', 1)
                lang = node_data.get('lang', 'unknown')
                
                # Create language-specific seed value
                lang_seed = 0
                if lang == 'en':
                    lang_seed = 1
                elif lang == 'de':
                    lang_seed = 2
                
                # Scale by frequency/importance
                importance_factor = np.log1p(count) / 10
                
                # Add controlled variation with proper seed range
                # Fix: Ensure seed value is within valid range for NumPy
                concept_hash = abs(hash(concept)) % (2**32 - 1)  # Get positive hash in valid range
                seed_value = (concept_hash + lang_seed) % (2**32 - 1)  # Combine with language seed safely
                np.random.seed(seed_value)
                variation = np.random.randn(dimensions) * 0.2
                
                # Calculate final vector
                concept_vector = base_vector * (1.0 + importance_factor) + variation
                # Normalize for consistent visualization
                concept_vector = concept_vector / np.linalg.norm(concept_vector) * 1.5
                
                # Store vector with metadata
                self.concept_vectors[concept] = {
                    'vector': concept_vector,
                    'category': category,
                    'lang': lang,
                    'count': count,
                    'potential': min(1.5, 0.5 + importance_factor)  # For visualization sizing
                }
        
        # Process translation equivalents
        self.process_translations()
        
        # Adjust vectors based on graph connections
        self.adjust_vectors_by_relationships()
        
        print(f"Generated vectors for {len(self.concept_vectors)} concepts")
        return self.concept_vectors
    
    def process_translations(self):
        """
        Process translation equivalents to ensure they're positioned near each other
        """
        # Look for translation pairs using typical patterns in ConceptNet
        translation_pairs = []
        
        for source, target, data in self.semantic_graph.edges(data=True):
            relation = data.get('relation', '')
            
            # Check for translation relationships
            if relation in ['TranslationOf', 'Synonym']:
                if source in self.concept_vectors and target in self.concept_vectors:
                    source_lang = self.semantic_graph.nodes[source].get('lang', 'unknown')
                    target_lang = self.semantic_graph.nodes[target].get('lang', 'unknown')
                    
                    # Only consider cross-language pairs or clear synonyms
                    if source_lang != target_lang or relation == 'Synonym':
                        translation_pairs.append((source, target))
        
        # Adjust vectors for translation pairs
        for source, target in translation_pairs:
            if source in self.concept_vectors and target in self.concept_vectors:
                # Get vectors
                source_vector = self.concept_vectors[source]['vector']
                target_vector = self.concept_vectors[target]['vector']
                
                # Calculate average vector
                avg_vector = (source_vector + target_vector) / 2
                
                # Move both vectors closer to average
                self.concept_vectors[source]['vector'] = 0.7 * source_vector + 0.3 * avg_vector
                self.concept_vectors[target]['vector'] = 0.7 * target_vector + 0.3 * avg_vector
                
                # Mark as translation pair
                self.concept_vectors[source]['translation_pair'] = target
                self.concept_vectors[target]['translation_pair'] = source
    
    def adjust_vectors_by_relationships(self):
        """
        Adjust vectors based on semantic relationships
        """
        # Iterate to refine positions
        for _ in range(3):  
            for source, target, data in self.semantic_graph.edges(data=True):
                if source in self.concept_vectors and target in self.concept_vectors:
                    weight = data.get('weight', 1.0)
                    relation = data.get('relation', '')
                    
                    # Adjust factor based on relation type
                    relation_factor = 0.1  # Default
                    
                    if relation in ['IsA', 'PartOf', 'MadeOf']:
                        relation_factor = 0.2  # Stronger pull for hierarchical relations
                    elif relation in ['HasProperty', 'HasA', 'CapableOf']:
                        relation_factor = 0.15  # Medium pull
                    elif relation in ['Antonym', 'DistinctFrom']:
                        relation_factor = -0.1  # Push apart for opposite relationships
                    
                    # Apply adjustment factor
                    source_vec = self.concept_vectors[source]['vector']
                    target_vec = self.concept_vectors[target]['vector']
                    
                    # Calculate move vectors - weighted by relation
                    diff_vec = (target_vec - source_vec) * weight * relation_factor
                    
                    # Move vectors
                    self.concept_vectors[source]['vector'] += diff_vec
                    
                    # Only move target for positive relationships
                    if relation_factor > 0:
                        self.concept_vectors[target]['vector'] -= diff_vec * 0.5
                    
                    # Renormalize
                    self.concept_vectors[source]['vector'] = self.concept_vectors[source]['vector'] / np.linalg.norm(self.concept_vectors[source]['vector']) * 1.5
                    self.concept_vectors[target]['vector'] = self.concept_vectors[target]['vector'] / np.linalg.norm(self.concept_vectors[target]['vector']) * 1.5
    
    def compute_important_relationships(self, threshold=0.5, max_relationships=30):
        """Compute important relationships for visualization"""
        relationships = []
        
        for source, target, data in self.semantic_graph.edges(data=True):
            if source in self.concept_vectors and target in self.concept_vectors:
                weight = data.get('weight', 0.0)
                relation = data.get('relation', 'related')
                
                # Only include relationships above threshold
                if weight >= threshold:
                    source_vec = self.concept_vectors[source]['vector']
                    target_vec = self.concept_vectors[target]['vector']
                    
                    # Calculate actual vector distance
                    try:
                        distance = cosine(source_vec, target_vec)
                    except:
                        distance = 0.5
                    
                    relationships.append({
                        'source': source,
                        'target': target,
                        'weight': weight,
                        'relation': relation,
                        'distance': distance
                    })
        
        # Sort by weight and take top relationships
        relationships = sorted(relationships, key=lambda x: x['weight'], reverse=True)[:max_relationships]
        
        return relationships


class SemanticVisualizer:
    """
    Enhanced semantic field visualizer using ConceptNet data
    """
    def __init__(self, concept_processor=None):
        self.concept_processor = concept_processor
        self.fig = None
        self.ax = None
    
    def setup_visualization(self):
        """Set up the visualization environment"""
        # Create figure with elegant dark theme
        plt.style.use('dark_background')
        self.fig = plt.figure(figsize=(16, 12), facecolor='#0A0A1E')
        self.ax = self.fig.add_subplot(111, projection='3d', facecolor='#0A0A1E')
        
        # Remove panes to reduce visual clutter
        self.ax.xaxis.pane.fill = False
        self.ax.yaxis.pane.fill = False
        self.ax.zaxis.pane.fill = False
        
        # Make grid semi-transparent
        self.ax.xaxis.pane.set_edgecolor('w')
        self.ax.yaxis.pane.set_edgecolor('w')
        self.ax.zaxis.pane.set_edgecolor('w')
        self.ax.xaxis.pane.set_alpha(0.1)
        self.ax.yaxis.pane.set_alpha(0.1)
        self.ax.zaxis.pane.set_alpha(0.1)
        
        # Set title with elegant typography
        self.fig.suptitle('Semantica: Semantic Convergence', 
                     color='white', fontsize=24, fontweight='light', y=0.98)
        
        # Set axis colors and labels
        self.ax.set_xlabel('Semantic Dimension 1', color='white', fontsize=12, labelpad=10)
        self.ax.set_ylabel('Semantic Dimension 2', color='white', fontsize=12, labelpad=10)
        self.ax.set_zlabel('Semantic Depth', color='white', fontsize=12, labelpad=10)
        
        # Set tick colors
        self.ax.tick_params(axis='x', colors='white', labelsize=9)
        self.ax.tick_params(axis='y', colors='white', labelsize=9)
        self.ax.tick_params(axis='z', colors='white', labelsize=9)
        
        # Set a subtle grid
        self.ax.grid(True, linestyle=':', alpha=0.2, color='white')
        
        return self.fig, self.ax
    
    def visualize_concepts_improved(self, output_dir, num_frames=30):
        """
        Create a more readable and interpretable visualization
        with slower rotation and better labeling
        """
        print("Creating improved readable visualization...")
        
        # Set up visualization with larger figure size
        plt.close('all')  # Close all existing figures first
        fig = plt.figure(figsize=(20, 14), facecolor='black')  # Increased from (14, 10)
        ax = fig.add_subplot(111, projection='3d', facecolor='black')
        
        # Adjust subplot parameters to give more room for the graph
        plt.subplots_adjust(left=0.02, right=0.85)  # This reserves space on the right for the legend
        
        # Enhanced color scheme for categories - more distinct and contrasting colors
        category_colors = {
            'fruit': '#FF5722',      # Deep Orange
            'color': '#2196F3',      # Blue
            'taste': '#FFC107',      # Amber
            'vehicle': '#4CAF50',    # Green
            'temperature': '#9C27B0', # Purple
            'size': '#00BCD4',       # Cyan
            'time': '#FF9800',       # Orange
            'animal': '#8BC34A',     # Light Green
            'person': '#E91E63',     # Pink
            'place': '#3F51B5',      # Indigo
            'generic': '#BDBDBD'     # Lighter Gray for better visibility
        }
        
        # Category names with better labels
        category_labels = {
            'fruit': 'Fruit Concepts',
            'color': 'Color Concepts',
            'taste': 'Taste Concepts',
            'vehicle': 'Vehicle Concepts',
            'temperature': 'Temperature Concepts',
            'size': 'Size Concepts',
            'time': 'Time Concepts',
            'animal': 'Animal Concepts',
            'person': 'Person Concepts',
            'place': 'Place Concepts',
            'generic': 'General Concepts'
        }
        
        # Enhanced size settings for better visibility
        base_point_size = 100
        highlight_size_factor = 1.5
        
        # Create a function to update the plot with better readability
        def update(frame):
            ax.clear()
            
            # Set up a clean visualization space with depth
            ax.set_facecolor('black')
            ax.xaxis.pane.fill = False
            ax.yaxis.pane.fill = False
            ax.zaxis.pane.fill = False
            ax.grid(True, linestyle=':', alpha=0.3, color='white')
            
            # Set consistent limits and view
            ax.set_xlim([-2.5, 2.5])
            ax.set_ylim([-2.5, 2.5])
            ax.set_zlim([-2.5, 2.5])
            
            # Extract points with minimal animation
            points = {}
            plot_points = []
            plot_colors = []
            plot_sizes = []
            plot_labels = []
            plot_categories = []
            
            # Create a smoother, slower oscillation factor
            time_factor = frame * 0.1  # Reduced speed
            
            for concept, data in concept_vectors.items():
                vector = data['vector'][:3]  # First 3 dimensions
                
                # Add subtle animation with very slow movement
                concept_phase = (hash(concept) % 100) / 100.0
                
                oscillation = np.array([
                    0.05 * np.sin(time_factor + concept_phase * 6.28),
                    0.05 * np.cos(time_factor * 0.7 + concept_phase * 6.28),
                    0.05 * np.sin(time_factor * 0.5 + concept_phase * 6.28)
                ])
                
                # Apply minimal oscillation
                oscillation *= 0.5 * data.get('potential', 1.0)
                
                position = vector + oscillation
                points[concept] = position
                
                # Get color based on category
                category = data.get('category', 'generic')
                color = category_colors.get(category, '#BDBDBD')
                
                # Size based on importance with better scaling
                size = base_point_size * data.get('potential', 1.0)
                
                # Collect plot data
                plot_points.append(position)
                plot_colors.append(color)
                plot_sizes.append(size)
                plot_labels.append(concept)
                plot_categories.append(category)
            
            # Convert to numpy array
            if plot_points:
                plot_points = np.array(plot_points)
            else:
                # Fallback if no points
                plot_points = np.array([[0, 0, 0]])
                plot_colors = ['white']
                plot_sizes = [base_point_size]
                plot_labels = ['default']
                plot_categories = ['generic']
            
            # Plot points with better aesthetics and visibility
            scatter = ax.scatter(
                plot_points[:, 0], plot_points[:, 1], plot_points[:, 2], 
                c=plot_colors, s=plot_sizes, alpha=0.9, 
                edgecolor='white', linewidth=1.0,  # Thicker outline
                depthshade=True
            )
            
            # Draw relationships with clear styling
            for i, rel in enumerate(important_relationships[:5]):  # Show only top 5 for clarity
                source = rel['source']
                target = rel['target']
                relation_type = rel['relation']
                
                if source in points and target in points:
                    # Draw line with clear styling based on relation type
                    if relation_type in ['IsA', 'PartOf', 'MadeOf']:
                        line_color = '#00FFFF'  # Bright cyan for hierarchy
                        line_style = '-'
                        line_width = 2.5
                    elif relation_type in ['HasProperty', 'HasA', 'CapableOf']:
                        line_color = '#00FF00'  # Bright green for properties
                        line_style = '-'
                        line_width = 2.5
                    elif relation_type in ['Antonym', 'DistinctFrom']:
                        line_color = '#FF0000'  # Bright red for opposites
                        line_style = '--'
                        line_width = 2.5
                    else:
                        line_color = '#FFFFFF'  # White for others
                        line_style = '-'
                        line_width = 2.0
                    
                    # Draw connection with consistent visibility
                    ax.plot(
                        [points[source][0], points[target][0]], 
                        [points[source][1], points[target][1]], 
                        [points[source][2], points[target][2]], 
                        color=line_color, alpha=0.9,
                        linewidth=line_width, linestyle=line_style
                    )
                    
                    # Add relationship label at midpoint for important relationships
                    if i < 3:  # Only label top 3 relationships to avoid clutter
                        midpoint = [(points[source][i] + points[target][i])/2 for i in range(3)]
                        ax.text(
                            midpoint[0], midpoint[1], midpoint[2],
                            relation_type,
                            color='white',
                            fontsize=10,
                            fontweight='bold',
                            bbox=dict(
                                facecolor=(0, 0, 0, 0.7),
                                edgecolor='white',
                                boxstyle='round',
                                alpha=0.9
                            ),
                            ha='center',
                            va='center'
                        )
            
            # Determine important concepts to label
            # Focus on important central concepts
            concepts_to_label = []
            
            # Add relationship participants
            for rel in important_relationships[:3]:
                concepts_to_label.append(rel['source'])
                concepts_to_label.append(rel['target'])
            
            # Add examples of each category
            for category in set(plot_categories):
                category_concepts = [c for i, c in enumerate(plot_labels) if plot_categories[i] == category]
                if category_concepts:
                    concepts_to_label.append(category_concepts[0])
            
            # Add labels with consistent styling and positioning
            for concept in list(set(concepts_to_label))[:12]:  # Convert set to list before slicing
                if concept in points:
                    pos = points[concept]
                    
                    # Create consistent offset for label
                    offset = np.array([0.15, 0.15, 0.15])
                    if concept in plot_labels:
                        idx = plot_labels.index(concept)
                        # Get category information for styling
                        cat = plot_categories[idx]
                        
                    # Add label with high-contrast styling
                    ax.text(
                        pos[0] + offset[0], 
                        pos[1] + offset[1], 
                        pos[2] + offset[2],
                        concept, 
                        color='white', 
                        fontsize=12,  # Larger font
                        fontweight='bold',
                        bbox=dict(
                            facecolor=(0, 0, 0, 0.8),  # Darker background
                            edgecolor='white',         # White border
                            boxstyle='round,pad=0.3',  # More padding
                            alpha=0.9                  # More opaque
                        ),
                        ha='left', 
                        va='bottom',
                        zorder=100  # Keep labels on top
                    )
            
            # Create clear legend for categories
            # Use a consistent, static legend
            legend_elements = []
            
            # Only show categories that actually appear in the data
            visible_categories = set(plot_categories)
            
            for category in visible_categories:
                color = category_colors.get(category, '#BDBDBD')
                # Use more descriptive labels
                label = category_labels.get(category, category.capitalize())
                legend_elements.append(plt.Line2D(
                    [0], [0], 
                    marker='o', 
                    color='w', 
                    label=label, 
                    markerfacecolor=color,
                    markersize=10  # Larger marker
                ))
            
            # Add legend with new position
            legend = ax.legend(
                handles=legend_elements, 
                loc='center left',  # Changed from 'upper right' to 'center left'
                bbox_to_anchor=(1.15, 0.5),  # Places legend outside the plot
                fontsize=11,
                framealpha=0.9,
                edgecolor='white',
                facecolor=(0, 0, 0, 0.8)
            )
            
            # Set legend text color to white with bold font
            for text in legend.get_texts():
                text.set_color('white')
                text.set_fontweight('bold')
            
            # Add informative frame counter with better visibility
            ax.text2D(
                0.02, 0.98, 
                f"Frame: {frame+1}/{num_frames}", 
                transform=ax.transAxes, 
                color='white', 
                fontsize=12,  # Larger font
                fontweight='bold',
                bbox=dict(
                    facecolor=(0, 0, 0, 0.8),
                    edgecolor='white',
                    boxstyle='round,pad=0.3',
                    alpha=0.9
                )
            )
            
            # Add an explanatory panel describing what's being shown
            explanation_text = (
                "Semantica Visualization:\n"
                "• Points represent concepts in semantic space\n"
                "• Colors indicate concept categories\n"
                "• Connected concepts have semantic relationships\n"
                "• Spatial proximity indicates semantic similarity"
            )
            
            ax.text2D(
                0.02, 0.12,  # Position near bottom left
                explanation_text,
                transform=ax.transAxes,
                color='white',
                fontsize=10,
                fontweight='bold',
                bbox=dict(
                    facecolor=(0, 0, 0, 0.8),
                    edgecolor='white',
                    boxstyle='round,pad=0.5',
                    alpha=0.9
                ),
                linespacing=1.5  # More space between lines
            )
            
            # MUCH slower camera movement for better readability
            # Just a gentle rotation to show 3D structure
            # Using modulo to create a smooth oscillation between specific angles
            
            # Full rotation every 120 frames (4x slower than before)
            rotation_angle = (frame * 3) % 360  # Very slow rotation
            
            # Gentle camera bobbing between 20 and 30 degrees elevation
            elevation = 20 + 5 * np.sin(frame * 0.1)  # Subtle elevation change
            
            ax.view_init(elev=elevation, azim=rotation_angle)
            
            # Add static title with no animation
            plt.title(
                'Semantica: Relational Semantic Convergence', 
                color='white',
                fontsize=16,  # Larger font
                fontweight='bold',
                pad=20
            )
            
            # Print progress occasionally
            if frame % max(1, num_frames // 10) == 0:
                print(f"Rendering frame {frame+1}/{num_frames}")
            
            return ax
        
        # Create output directory if needed
        os.makedirs(output_dir, exist_ok=True)
        
        # Create a static preview image
        print("Creating static preview...")
        update(0)  # Use first frame for preview
        
        static_path = os.path.join(output_dir, f'semantica_readable_preview_{uuid.uuid4()}.png')
        plt.savefig(static_path, dpi=120, bbox_inches='tight')
        print(f"Static preview saved to {static_path}")
        
        # Create animation with optimized settings for readability
        try:
            print(f"Creating improved readable animation with {num_frames} frames...")
            
            # Memory management: Clear the figure and recreate
            plt.close('all')
            fig = plt.figure(figsize=(14, 10), facecolor='black')
            ax = fig.add_subplot(111, projection='3d', facecolor='black')
            
            # Create animation with more frames for smoother, slower movement
            anim = animation.FuncAnimation(
                fig, update, frames=num_frames, 
                interval=200,  # Slower frame rate (200ms vs 100ms)
                blit=False
            )
            
            # Use PillowWriter with optimized settings for readability
            output_path = os.path.join(output_dir, f'semantica_readable_{uuid.uuid4()}.gif')
            print(f"Saving readable animation to {output_path}...")
            
            # Use optimized writer settings for readability
            writer = animation.PillowWriter(
                fps=8,        # Much slower frame rate (was 15)
                bitrate=2000, # Higher quality
                metadata=dict(artist='Semantica')
            )
            
            # Save with progress updates
            anim.save(
                output_path, 
                writer=writer,
                dpi=120,      # Higher resolution
                savefig_kwargs={'facecolor': 'black'}
            )
            
            # Verify animation
            if os.path.exists(output_path):
                file_size = os.path.getsize(output_path) / (1024 * 1024)
                print(f"Readable animation saved successfully! File size: {file_size:.2f} MB")
                
                # Verify it's a valid GIF
                try:
                    from PIL import Image
                    with Image.open(output_path) as img:
                        frames = 0
                        try:
                            while True:
                                frames += 1
                                img.seek(img.tell() + 1)
                        except EOFError:
                            pass
                        print(f"Animation contains {frames} frames")
                except Exception as e:
                    print(f"GIF validation error: {e}")
            
            plt.close(fig)
            return output_path
        
        except Exception as e:
            print(f"Animation creation failed: {e}")
            print(traceback.format_exc())
            plt.close('all')
            return static_path

# Update the process_conceptnet_data function to include the new visualization option
def process_conceptnet_data(english_data, german_data, output_dir, visualization_type='readable'):
    """
    Process ConceptNet data and create visualization
    
    Parameters:
    -----------
    english_data : DataFrame
        English ConceptNet data
    german_data : DataFrame
        German ConceptNet data
    output_dir : str
        Directory to save visualizations
    visualization_type : str
        Type of visualization to create:
        - 'basic': Simple 3D visualization
        - 'advanced': Enhanced 3D visualization with more visual elements
        - 'network': 2D network visualization
        - 'readable': New improved readable visualization with slower rotation
        - 'all': Create all visualization types
    
    Returns:
    --------
    concept_processor : ConceptNetProcessor
        Processor object with semantic graph and vectors
    output_paths : list
        Paths to created visualizations
    """
    print("Starting ConceptNet processing pipeline...")
    
    # Initialize ConceptNet processor
    concept_processor = ConceptNetProcessor(english_data, german_data)
    
    # Build semantic graph from ConceptNet data
    concept_processor.build_semantic_graph(max_concepts=150, sample_size=5000)
    
    # Generate concept vectors
    concept_vectors = concept_processor.generate_concept_vectors(dimensions=5)
    
    # Compute important relationships for visualization
    important_relationships = concept_processor.compute_important_relationships(threshold=0.5, max_relationships=30)
    
    # Initialize visualizer
    visualizer = SemanticVisualizer(concept_processor)
    
    output_paths = []
    
    # Create visualizations based on requested type
    if visualization_type in ['readable', 'all']:
        print("Creating readable 3D visualization...")
        output_path = visualizer.visualize_concepts_improved(
            output_dir, 
            num_frames=40  # More frames for smoother, slower movement
        )
        output_paths.append(output_path)

    if visualization_type in ['basic', 'all']:
        print("Creating basic 3D visualization...")
        output_path = visualizer.visualize_concepts_enhanced(
            output_dir, 
            num_frames=20
        )
        output_paths.append(output_path)

    if visualization_type in ['advanced', 'all']:
        print("Creating advanced 3D visualization...")
        output_path = visualizer.visualize_concepts_interactive(
            output_dir, 
            num_frames=30
        )
        output_paths.append(output_path)

    if visualization_type in ['network', 'all']:
        print("Creating 2D network visualization...")
        output_path = visualizer.visualize_2d_network(output_dir)
        output_paths.append(output_path)

    print(f"Visualization(s) created at: {', '.join(output_paths)}")
    return concept_processor, output_paths

# Example usage
if __name__ == "__main__":
    # Specify input and output directories
    input_data_path = '../Data/Input'
    output_dir = '../Data/Output'
    
    # Load ConceptNet data
    print("Loading ConceptNet data...")
    english_conceptnet = pd.read_csv(os.path.join(input_data_path, 'conceptnet-assertions-5.7.0.en.tsv'), 
                                   sep='\t', header=None, names=['start', 'rel', 'end', 'weight'])
    
    german_conceptnet = pd.read_csv(os.path.join(input_data_path, 'conceptnet-assertions-5.7.0.de.tsv'), 
                                  sep='\t', header=None, names=['start', 'rel', 'end', 'weight'])
    
    print(f"English ConceptNet loaded with {len(english_conceptnet)} assertions.")
    print(f"German ConceptNet loaded with {len(german_conceptnet)} assertions.")
    
    # Process data and create visualizations
    processor, viz_paths = process_conceptnet_data(
        english_conceptnet, 
        german_conceptnet, 
        output_dir,
        visualization_type='readable'  # Use the new readable visualization
    )
    
    print("Process complete!")

Loading ConceptNet data...
English ConceptNet loaded with 3423004 assertions.
German ConceptNet loaded with 1078946 assertions.
Starting ConceptNet processing pipeline...
ConceptNetProcessor initialized
Building semantic graph from ConceptNet data...
Processing 3423004 English ConceptNet assertions...
Processing 1078946 German ConceptNet assertions...
Sampled 5000 assertions from 3423004 en assertions
English ConceptNet loaded with 3423004 assertions.
German ConceptNet loaded with 1078946 assertions.
Starting ConceptNet processing pipeline...
ConceptNetProcessor initialized
Building semantic graph from ConceptNet data...
Processing 3423004 English ConceptNet assertions...
Processing 1078946 German ConceptNet assertions...
Sampled 5000 assertions from 3423004 en assertions


Processing en assertions: 100%|██████████| 5000/5000 [00:00<00:00, 31645.62it/s]



Sampled 5000 assertions from 1078946 de assertions


Processing de assertions: 100%|██████████| 5000/5000 [00:00<00:00, 33433.43it/s]



Limiting graph to top 150 concepts...
Semantic graph built with 150 nodes and 234 edges
Inferring semantic categories...
Inferred categories: {'generic': 139, 'animal': 4, 'place': 5, 'person': 2}
Generating 5-dimensional concept vectors...
Generated vectors for 150 concepts
Creating readable 3D visualization...
Creating improved readable visualization...
Creating static preview...


NameError: name 'concept_vectors' is not defined