# Semantica relation GRaph visualized

In [2]:
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
from matplotlib.patches import FancyArrowPatch
import random
from IPython.display import HTML
import io
import base64
from PIL import Image, ImageDraw
import os
import time
from tqdm import tqdm

# Set up the output directory structure
curr_dir = os.getcwd()
output_data_path = os.path.join(curr_dir + r"\..", "Data", "Output")

# Create the directory if it doesn't exist
os.makedirs(output_data_path, exist_ok=True)

# Define our word pairs (English-German)
word_pairs = [
    ('apple', 'Apfel'),
    ('dog', 'Hund'),
    ('cat', 'Katze'),
    ('tree', 'Baum'),
    ('house', 'Haus')
]

# Define sentence relations
# Format: (sentence_id, language, sentence, [(word, role)])
sentences = [
    # Apple sentences
    (1, 'english', "The apple is red", [('apple', 'subject'), ('red', 'property')]),
    (1, 'german', "Der Apfel ist rot", [('Apfel', 'subject'), ('rot', 'property')]),
    
    # Animal sentences
    (2, 'english', "The dog chases the cat", [('dog', 'agent'), ('cat', 'patient')]),
    (2, 'german', "Der Hund jagt die Katze", [('Hund', 'agent'), ('Katze', 'patient')]),
    
    # Location sentences
    (3, 'english', "The dog sleeps under the tree", [('dog', 'agent'), ('tree', 'location')]),
    (3, 'german', "Der Hund schläft unter dem Baum", [('Hund', 'agent'), ('Baum', 'location')]),
    
    # Property sentences
    (4, 'english', "The house is big", [('house', 'subject'), ('big', 'property')]),
    (4, 'german', "Das Haus ist groß", [('Haus', 'subject'), ('groß', 'property')]),
]

# Additional words from sentences
additional_words = {
    'red': {'type': 'property', 'emoji': '🔴'},
    'rot': {'type': 'property', 'emoji': '🔴'},
    'big': {'type': 'property', 'emoji': '📏'},
    'groß': {'type': 'property', 'emoji': '📏'},
}

# Types of words (for different shapes)
word_types = {
    'apple': 'fruit',
    'Apfel': 'fruit',
    'dog': 'animal',
    'Hund': 'animal',
    'cat': 'animal',
    'Katze': 'animal',
    'tree': 'plant',
    'Baum': 'plant',
    'house': 'object',
    'Haus': 'object',
    'red': 'property',
    'rot': 'property',
    'big': 'property',
    'groß': 'property',
}

# Emoji mappings
emoji_map = {
    'apple': '🍎',
    'dog': '🐕',
    'cat': '🐈',
    'tree': '🌳',
    'house': '🏠',
    'red': '🔴',
    'big': '📏',
}

# Colors based on language
language_colors = {
    'english': '#3498db',  # Blue
    'german': '#e74c3c'    # Red
}

# Colors based on word type
type_colors = {
    'fruit': '#2ecc71',    # Green
    'animal': '#9b59b6',   # Purple
    'plant': '#27ae60',    # Dark Green
    'object': '#f39c12',   # Orange
    'property': '#34495e', # Dark Gray
    'sentence': '#7f8c8d'  # Light Gray
}

# Colors based on role in sentence
role_colors = {
    'subject': '#1abc9c',  # Teal
    'property': '#9b59b6', # Purple
    'agent': '#e67e22',    # Orange
    'patient': '#3498db',  # Blue
    'location': '#2ecc71'  # Green
}

# Shape based on type
def get_node_shape(node_type):
    shapes = {
        'fruit': 'o',      # Circle
        'animal': 's',     # Square
        'plant': '^',      # Triangle
        'object': 'd',     # Diamond
        'property': 'p',   # Pentagon
        'sentence': 'h'    # Hexagon
    }
    return shapes.get(node_type, 'o')

# Create initial graph
def create_initial_graph():
    print("Creating initial semantic graph structure...")
    G = nx.Graph()
    
    # Add nodes for words
    for en, de in word_pairs:
        # English words
        G.add_node(en, 
                  lang='english',
                  type=word_types[en],
                  emoji=emoji_map[en],
                  color=language_colors['english'],
                  position=np.array([-1.5 + random.uniform(-0.1, 0.1), random.uniform(-0.8, 0.8)]),
                  merged=False,
                  size=300)
        
        # German words
        G.add_node(de, 
                  lang='german',
                  type=word_types[de],
                  emoji=emoji_map[en],  # Use English word for emoji mapping
                  color=language_colors['german'],
                  position=np.array([1.5 + random.uniform(-0.1, 0.1), random.uniform(-0.8, 0.8)]),
                  merged=False,
                  size=300)
    
    # Add additional words from sentences
    for word, info in additional_words.items():
        if word not in G:
            lang = 'english' if word in ['red', 'big'] else 'german'
            emoji_key = word if word in emoji_map else 'red' if word in ['rot'] else 'big'
            
            G.add_node(word,
                      lang=lang,
                      type=info['type'],
                      emoji=info['emoji'],
                      color=language_colors[lang],
                      position=np.array([-1.5 if lang == 'english' else 1.5, random.uniform(-0.8, 0.8)]),
                      merged=False,
                      size=300)
    
    # Add sentence nodes
    sentence_positions = {}
    for sent_id, lang, text, roles in sentences:
        node_id = f"sentence_{sent_id}_{lang}"
        x_pos = -1.0 if lang == 'english' else 1.0
        y_pos = 0.5 * sent_id
        
        sentence_positions[(sent_id, lang)] = np.array([x_pos, y_pos])
        
        G.add_node(node_id,
                  lang=lang,
                  type='sentence',
                  text=text,
                  color=type_colors['sentence'],
                  position=np.array([x_pos, y_pos]),
                  roles=roles,
                  merged=False,
                  size=400)
    
    print(f"Added {len(G.nodes)} nodes to the graph")
    return G, sentence_positions

# Add edges gradually in each frame
def update_graph(G, sentence_positions, frame, total_frames):
    progress = frame / total_frames
    
    # Phase 1: Add word-sentence relations (0-30% of frames)
    if progress <= 0.3:
        phase_progress = progress / 0.3
        
        # Process each sentence
        for sent_id, lang, text, roles in sentences:
            node_id = f"sentence_{sent_id}_{lang}"
            
            # Add edges from words to sentences
            for word, role in roles:
                if word in G.nodes and not G.has_edge(node_id, word):
                    # Determine if we should add this edge based on progress
                    role_idx = ['subject', 'property', 'agent', 'patient', 'location'].index(role) if role in ['subject', 'property', 'agent', 'patient', 'location'] else 0
                    threshold = 0.1 + (role_idx * 0.05)
                    
                    if phase_progress > threshold:
                        # Add edge with role information
                        edge_strength = min(1.0, (phase_progress - threshold) * 5)
                        G.add_edge(node_id, word, 
                                  role=role, 
                                  weight=edge_strength, 
                                  alpha=edge_strength,
                                  color=role_colors.get(role, 'gray'))
                
                # If edge exists, increase its strength
                elif G.has_edge(node_id, word):
                    G[node_id][word]['weight'] = min(2.0, G[node_id][word]['weight'] + 0.05)
                    G[node_id][word]['alpha'] = min(1.0, G[node_id][word]['alpha'] + 0.02)
    
    # Phase 2: Add cross-language word relations (30-60% of frames)
    if 0.3 < progress <= 0.6:
        phase_progress = (progress - 0.3) / 0.3
        
        # Add edges between equivalent words
        for i, (en, de) in enumerate(word_pairs):
            # Skip if already merged
            if G.nodes[en].get('merged') or G.nodes[de].get('merged'):
                continue
                
            # Calculate threshold based on word pair index
            threshold = i / len(word_pairs) * 0.8
            
            # Add the edge if we've progressed past the threshold for this pair
            if phase_progress > threshold:
                if not G.has_edge(en, de):
                    # Add the edge with increasing opacity and width
                    edge_strength = min(1.0, (phase_progress - threshold) * 5)
                    G.add_edge(en, de, weight=edge_strength, alpha=edge_strength, color='purple')
                else:
                    # Increase existing edge weight
                    current_weight = G[en][de]['weight']
                    G[en][de]['weight'] = min(3.0, current_weight + 0.1)
                    G[en][de]['alpha'] = min(1.0, G[en][de]['alpha'] + 0.05)
        
        # Add edges between additional word pairs
        additional_pairs = [('red', 'rot'), ('big', 'groß')]
        for i, (en, de) in enumerate(additional_pairs):
            if en in G.nodes and de in G.nodes:
                # Skip if already merged
                if G.nodes[en].get('merged') or G.nodes[de].get('merged'):
                    continue
                    
                # Calculate threshold
                threshold = 0.4 + (i / len(additional_pairs) * 0.4)
                
                # Add edge if we've progressed past threshold
                if phase_progress > threshold:
                    if not G.has_edge(en, de):
                        edge_strength = min(1.0, (phase_progress - threshold) * 5)
                        G.add_edge(en, de, weight=edge_strength, alpha=edge_strength, color='purple')
                    else:
                        G[en][de]['weight'] = min(3.0, G[en][de]['weight'] + 0.1)
                        G[en][de]['alpha'] = min(1.0, G[en][de]['alpha'] + 0.05)
    
    # Phase 3: Move nodes toward their equivalents (60-90% of frames)
    if 0.6 < progress <= 0.9:
        phase_progress = (progress - 0.6) / 0.3
        
        # Move word nodes toward their equivalents
        for i, (en, de) in enumerate(word_pairs + [('red', 'rot'), ('big', 'groß')]):
            if en in G.nodes and de in G.nodes:
                # Skip if already merged
                if G.nodes[en].get('merged') or G.nodes[de].get('merged'):
                    continue
                    
                # Calculate threshold
                threshold = i / (len(word_pairs) + 2) * 0.7
                
                # Move nodes if we've progressed past threshold
                if phase_progress > threshold and G.has_edge(en, de):
                    # Calculate target position (middle point with some variance)
                    en_pos = G.nodes[en]['position']
                    de_pos = G.nodes[de]['position']
                    
                    # Move gradually toward center
                    midpoint = (en_pos + de_pos) / 2
                    
                    # Speed up as we get closer to the end
                    move_factor = min(0.1, (phase_progress - threshold) * 0.3)
                    
                    G.nodes[en]['position'] = en_pos + (midpoint - en_pos) * move_factor
                    G.nodes[de]['position'] = de_pos + (midpoint - de_pos) * move_factor
                    
                    # Merge nodes if they're close enough
                    if np.linalg.norm(G.nodes[en]['position'] - G.nodes[de]['position']) < 0.2 and phase_progress > 0.9:
                        # Mark these nodes as merged
                        G.nodes[en]['merged'] = True
                        G.nodes[de]['merged'] = True
                        
                        # Visually indicate they're merged by making them small and transparent
                        G.nodes[en]['size'] = 0
                        G.nodes[de]['size'] = 0
                        G.nodes[en]['alpha'] = 0
                        G.nodes[de]['alpha'] = 0
                        
                        # Create merged emoji node
                        emoji = emoji_map.get(en, '❓')
                        merged_id = f"merged_{en}_{de}"
                        
                        G.add_node(merged_id,
                                  type='merged',
                                  emoji=emoji,
                                  position=midpoint,
                                  size=500,
                                  alpha=1.0,
                                  color='#1abc9c')  # Teal color for merged nodes
                        
                        # Copy all connections from original nodes to merged node
                        for neighbor in set(G.neighbors(en)).union(set(G.neighbors(de))):
                            if neighbor not in [en, de]:
                                # Get the edge with highest weight
                                weight1 = G.get_edge_data(en, neighbor, {'weight': 0})['weight'] if G.has_edge(en, neighbor) else 0
                                weight2 = G.get_edge_data(de, neighbor, {'weight': 0})['weight'] if G.has_edge(de, neighbor) else 0
                                
                                max_weight = max(weight1, weight2)
                                
                                if neighbor.startswith('sentence_'):
                                    # Get role information for sentence connections
                                    role = None
                                    if G.has_edge(en, neighbor):
                                        role = G.get_edge_data(en, neighbor).get('role', 'default')
                                    elif G.has_edge(de, neighbor):
                                        role = G.get_edge_data(de, neighbor).get('role', 'default')
                                    
                                    G.add_edge(merged_id, neighbor,
                                              weight=max_weight,
                                              alpha=1.0,
                                              role=role,
                                              color=role_colors.get(role, 'gray'))
                                else:
                                    G.add_edge(merged_id, neighbor,
                                              weight=max_weight,
                                              alpha=1.0,
                                              color='gray')
        
        # Move sentence nodes as well based on progress
        for sent_id, lang, text, roles in sentences:
            node_id = f"sentence_{sent_id}_{lang}"
            if node_id in G.nodes:
                # Move English sentences left, German sentences right
                x_target = -2.0 if lang == 'english' else 2.0
                y_target = sentence_positions[(sent_id, lang)][1]
                
                current_pos = G.nodes[node_id]['position']
                target_pos = np.array([x_target, y_target])
                
                # Calculate move factor based on progress
                move_factor = phase_progress * 0.1
                
                # Update position
                G.nodes[node_id]['position'] = current_pos + (target_pos - current_pos) * move_factor
    
    # Phase 4: Final stabilization (90-100% of frames)
    if progress > 0.9:
        # Add pulsating effect to merged nodes
        phase_progress = (progress - 0.9) / 0.1
        pulse = 1.0 + 0.2 * np.sin(phase_progress * 10 * np.pi)
        
        for node in list(G.nodes()):
            if node.startswith('merged_'):
                G.nodes[node]['size'] = 500 * pulse
    
    return G

# Draw the graph for animation
def draw_graph(G, ax, frame, total_frames):
    ax.clear()
    
    # Set up plot
    ax.set_xlim(-2.5, 2.5)
    ax.set_ylim(-1.5, 1.5)
    ax.set_aspect('equal')
    ax.axis('off')
    
    # Add title
    percentage = int((frame / total_frames) * 100)
    ax.set_title(f"Semantica: Cross-Lingual Relational Convergence - {percentage}%", fontsize=14)
    
    # Draw legend
    # Language Legend
    ax.text(-2.4, 1.3, "Languages:", fontsize=10, fontweight='bold')
    ax.plot(-2.3, 1.2, 'o', color=language_colors['english'], markersize=8)
    ax.text(-2.2, 1.2, "English", fontsize=9)
    ax.plot(-2.3, 1.1, 'o', color=language_colors['german'], markersize=8)
    ax.text(-2.2, 1.1, "German", fontsize=9)
    
    # Word Type Legend
    ax.text(-2.4, 0.9, "Word Types:", fontsize=10, fontweight='bold')
    
    shapes = {'fruit': 'o', 'animal': 's', 'plant': '^', 'object': 'd', 'property': 'p', 'sentence': 'h'}
    y_pos = 0.8
    for type_name, shape in shapes.items():
        ax.plot(-2.3, y_pos, shape, color=type_colors[type_name], markersize=8)
        ax.text(-2.2, y_pos, type_name, fontsize=9)
        y_pos -= 0.1
    
    # Relationship Role Legend
    ax.text(1.8, 1.3, "Relation Roles:", fontsize=10, fontweight='bold')
    
    y_pos = 1.2
    for role, color in role_colors.items():
        ax.plot([1.9, 2.1], [y_pos, y_pos], '-', color=color, linewidth=2)
        ax.text(2.2, y_pos, role, fontsize=9)
        y_pos -= 0.1
    
    # Draw edges
    for u, v, data in G.edges(data=True):
        # Skip edges connected to merged nodes (original words that merged)
        if (G.nodes[u].get('merged', False) and not u.startswith('merged_')) or \
           (G.nodes[v].get('merged', False) and not v.startswith('merged_')):
            continue
            
        pos1 = G.nodes[u]['position']
        pos2 = G.nodes[v]['position']
        
        # Draw edge with appropriate thickness and transparency
        weight = data.get('weight', 1.0)
        alpha = data.get('alpha', 0.5)
        color = data.get('color', 'gray')
        
        arrow = FancyArrowPatch(
            posA=pos1, posB=pos2,
            arrowstyle='-',
            color=color,
            linewidth=weight,
            alpha=alpha,
            mutation_scale=10
        )
        ax.add_patch(arrow)
        
        # Add role label if it's a sentence-word edge
        if (u.startswith('sentence_') or v.startswith('sentence_')) and 'role' in data:
            # Calculate midpoint
            mid_x = (pos1[0] + pos2[0]) / 2
            mid_y = (pos1[1] + pos2[1]) / 2
            
            # Add small offset to avoid overlap
            offset = 0.05
            label_pos = (mid_x + offset, mid_y + offset)
            
            # Add role label
            if alpha > 0.7:  # Only show labels for strong edges
                ax.text(label_pos[0], label_pos[1], data['role'], fontsize=7, 
                       ha='center', va='center', color='black',
                       bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=1))
    
    # Draw sentence nodes first (so they are in back)
    for node, data in G.nodes(data=True):
        if node.startswith('sentence_') and not data.get('merged', False):
            pos = data['position']
            
            # Draw the node as a rounded rectangle
            rect = plt.Rectangle(
                (pos[0] - 0.3, pos[1] - 0.1),
                0.6, 0.2,
                facecolor=data['color'],
                alpha=0.7,
                edgecolor='black',
                linewidth=1,
                zorder=1
            )
            ax.add_patch(rect)
            
            # Add the sentence text
            lang = data['lang']
            text = data['text']
            text_color = 'white' if np.mean(plt.matplotlib.colors.to_rgb(data['color'])) < 0.5 else 'black'
            
            ax.text(pos[0], pos[1], text, fontsize=7, ha='center', va='center', 
                   color=text_color, zorder=2,
                   bbox=dict(facecolor=data['color'], alpha=0.0, pad=1))
    
    # Draw word nodes
    for node, data in G.nodes(data=True):
        # Skip rendering merged original nodes
        if data.get('merged', False) and not node.startswith('merged_'):
            continue
            
        # Skip sentence nodes (already drawn)
        if node.startswith('sentence_'):
            continue
            
        pos = data['position']
        
        # For merged nodes, draw the emoji
        if node.startswith('merged_'):
            emoji = data['emoji']
            size = data['size']
            ax.text(pos[0], pos[1], emoji, fontsize=24, ha='center', va='center')
            
            # Draw a subtle highlight circle
            circle = plt.Circle(
                pos, 0.15,
                facecolor=data['color'],
                alpha=0.3,
                edgecolor=None,
                zorder=3
            )
            ax.add_patch(circle)
            continue
        
        # Regular word nodes
        node_type = data['type']
        shape = get_node_shape(node_type)
        color = data['color']
        alpha = data.get('alpha', 1.0)
        size = data.get('size', 300)
        
        if size > 0:
            ax.scatter(pos[0], pos[1], c=color, marker=shape, s=size, alpha=alpha, edgecolors='black', linewidths=1, zorder=4)
            
            # Label the node
            label_color = 'white' if np.mean(plt.matplotlib.colors.to_rgb(color)) < 0.5 else 'black'
            ax.text(pos[0], pos[1], node, fontsize=9, ha='center', va='center', color=label_color, zorder=5)

# Create the animation function
def create_semantica_animation():
    # Set up the output path
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    output_file = os.path.join(output_data_path, f"semantica_animation_{timestamp}.gif")
    
    print(f"Animation will be saved to: {output_file}")
    print("Initializing animation...")
    
    # Create figure and axes
    fig, ax = plt.subplots(figsize=(12, 10))
    
    # Create initial graph
    G, sentence_positions = create_initial_graph()
    
    # Set up animation
    total_frames = 120
    
    # Create list to store frames for GIF
    frames = []
    
    # Use tqdm for progress bar
    for frame in tqdm(range(total_frames + 1), desc="Generating frames"):
        # Update graph based on frame
        G = update_graph(G, sentence_positions, frame, total_frames)
        
        # Draw the current state
        draw_graph(G, ax, frame, total_frames)
        
        # Capture the frame
        buf = io.BytesIO()
        plt.savefig(buf, format='png', dpi=100)
        buf.seek(0)
        img = Image.open(buf)
        frames.append(img)
        
        # Print progress outside of tqdm
        if frame % 30 == 0 and frame > 0:
            print(f"Processed {frame}/{total_frames} frames ({frame/total_frames*100:.1f}%)")
    
    # Save the gif
    print(f"Saving animation to {output_file}...")
    frames[0].save(
        output_file,
        format='GIF',
        append_images=frames[1:],
        save_all=True,
        duration=100,  # milliseconds per frame
        loop=0  # loop forever
    )
    print("Animation saved!")
    
    plt.close()
    return output_file

# Execute the function to create the animation
if __name__ == "__main__":
    print("Starting Semantica visualization with sentence relations")
    gif_path = create_semantica_animation()
    print(f"Animation created at: {gif_path}")
    print("Process complete!")

Starting Semantica visualization with sentence relations
Animation will be saved to: c:\Users\erich\OneDrive\Documents\Python Projects\Semantica\Semantica\Notebooks\..\Data\Output\semantica_animation_20250430-211824.gif
Initializing animation...
Creating initial semantic graph structure...
Added 22 nodes to the graph


Generating frames:  26%|██▋       | 32/121 [00:04<00:11,  7.63it/s]

Processed 30/120 frames (25.0%)


Generating frames:  51%|█████     | 62/121 [00:08<00:08,  6.82it/s]

Processed 60/120 frames (50.0%)


Generating frames:  76%|███████▌  | 92/121 [00:12<00:04,  7.16it/s]

Processed 90/120 frames (75.0%)


  plt.savefig(buf, format='png', dpi=100)
  plt.savefig(buf, format='png', dpi=100)
Generating frames: 100%|██████████| 121/121 [00:16<00:00,  7.36it/s]


Processed 120/120 frames (100.0%)
Saving animation to c:\Users\erich\OneDrive\Documents\Python Projects\Semantica\Semantica\Notebooks\..\Data\Output\semantica_animation_20250430-211824.gif...
Animation saved!
Animation created at: c:\Users\erich\OneDrive\Documents\Python Projects\Semantica\Semantica\Notebooks\..\Data\Output\semantica_animation_20250430-211824.gif
Process complete!
