    "Controls:",
    "B: Balanced Precision",
    "S: Standard Precision",
    "Space: Pause/Resume",
    "R: Reset",
    "T: Toggle Trails",
    "Up/Down: Adjust Speed",
    "Mouse: Rotate View",
    "Scroll: Zoom In/Out"
    "ESC": exit

In [2]:
import numpy as np
import cupy as cp
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import time
import pygame
from pygame.locals import DOUBLEBUF, OPENGL
from OpenGL.GL import *
from OpenGL.GLU import *
import math
from tensor_matrix_ops import TensorMatrixOps  # Tensor core wrapper

class TensorSolarSystem:
    """N-body simulation of solar system with tensor core acceleration."""
    
    def __init__(self, use_tensor_cores=True):
        """Initialize the simulator with optional tensor core usage."""
        # Initialize tensor core operations
        self.tensor_ops = TensorMatrixOps()
        self.use_tensor_cores = use_tensor_cores
        
        # Constants
        self.G = 6.674e-11  # Gravitational constant
        self.AU = 149.6e9   # 1 AU in meters
        self.TIME_STEP = 24*3600  # 1 day in seconds
        self.SOFTENING = 1e-6  # Softening parameter to prevent division by zero
        
        # Celestial bodies
        self.init_celestial_bodies()
        
        # Performance metrics
        self.force_calc_time = 0
        self.total_calc_time = 0
        self.elapsed_simulation_time = 0
        
    def init_celestial_bodies(self):
        """Initialize the solar system bodies."""
        self.bodies = [
            # Sun
            {
                "name": "Sun",
                "mass": 1.989e30,  # kg
                "radius": 695700 * 1000,  # m
                "position": cp.array([0, 0, 0], dtype=cp.float64),  # m
                "velocity": cp.array([0, 0, 0], dtype=cp.float64),  # m/s
                "color": "yellow",
                "size": 20,
                "static": True
            },
            # Mercury
            {
                "name": "Mercury",
                "mass": 3.3e23,
                "radius": 2440 * 1000,
                "position": cp.array([0.387 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 47870, 0], dtype=cp.float64),
                "color": "gray",
                "size": 5
            },
            # Venus
            {
                "name": "Venus",
                "mass": 4.8675e24,
                "radius": 6052 * 1000,
                "position": cp.array([0.723 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 35020, 0], dtype=cp.float64),
                "color": "wheat",
                "size": 8
            },
            # Earth
            {
                "name": "Earth",
                "mass": 5.972e24,
                "radius": 6371 * 1000,
                "position": cp.array([1.0 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 29783, 0], dtype=cp.float64),
                "color": "blue",
                "size": 8
            },
            # Mars
            {
                "name": "Mars",
                "mass": 6.39e23,
                "radius": 3389 * 1000,
                "position": cp.array([1.524 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 24130, 0], dtype=cp.float64),
                "color": "red",
                "size": 7
            },
            # Jupiter
            {
                "name": "Jupiter",
                "mass": 1.898e27,
                "radius": 69911 * 1000,
                "position": cp.array([5.203 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 13070, 0], dtype=cp.float64),
                "color": "orange",
                "size": 15
            },
            # Saturn
            {
                "name": "Saturn",
                "mass": 5.683e26,
                "radius": 58232 * 1000,
                "position": cp.array([9.537 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 9690, 0], dtype=cp.float64),
                "color": "goldenrod",
                "size": 14
            },
            # Uranus
            {
                "name": "Uranus",
                "mass": 8.681e25,
                "radius": 25362 * 1000,
                "position": cp.array([19.191 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 6810, 0], dtype=cp.float64),
                "color": "lightblue",
                "size": 10
            },
            # Neptune
            {
                "name": "Neptune",
                "mass": 1.024e26,
                "radius": 24622 * 1000,
                "position": cp.array([30.069 * self.AU, 0, 0], dtype=cp.float64),
                "velocity": cp.array([0, 5430, 0], dtype=cp.float64),
                "color": "blue",
                "size": 10
            }
        ]
        
        # Setup arrays for tensor core calculation
        self.positions = cp.zeros((len(self.bodies), 3), dtype=cp.float64)
        self.velocities = cp.zeros((len(self.bodies), 3), dtype=cp.float64)
        self.masses = cp.zeros(len(self.bodies), dtype=cp.float64)
        self.is_static = cp.zeros(len(self.bodies), dtype=bool)
        
        # Fill arrays
        for i, body in enumerate(self.bodies):
            self.positions[i] = body["position"]
            self.velocities[i] = body["velocity"]
            self.masses[i] = body["mass"]
            self.is_static[i] = body.get("static", False)
            
        # Orbit trails
        self.trails = [[] for _ in range(len(self.bodies))]
            
    def calculate_forces_cupy(self):
        """Calculate forces using standard CuPy operations."""
        # Setup force calculation matrices
        num_bodies = len(self.bodies)
        forces = cp.zeros((num_bodies, 3), dtype=cp.float64)
        
        # For each active body
        for i in range(num_bodies):
            if self.is_static[i]:
                continue
                
            # Calculate forces from all other bodies
            for j in range(num_bodies):
                if i == j:
                    continue
                    
                # Calculate displacement vector (r_j - r_i)
                r_vec = self.positions[j] - self.positions[i]
                
                # Distance magnitude squared with softening
                r_squared = cp.sum(r_vec**2) + self.SOFTENING**2
                
                # Distance magnitude
                r_mag = cp.sqrt(r_squared)
                
                # Force magnitude (F = G * m_i * m_j / r^2)
                force_mag = self.G * self.masses[i] * self.masses[j] / r_squared
                
                # Force vector (F * r_vec / r)
                force_vec = force_mag * r_vec / r_mag
                
                # Add to total force on body i
                forces[i] += force_vec
                
        return forces
        
    def calculate_forces_tensor(self):
        """Calculate forces using tensor core acceleration."""
        start_time = time.time()
        
        # Setup arrays for computation
        num_bodies = len(self.bodies)
        forces = cp.zeros((num_bodies, 3), dtype=cp.float64)
        
        # Extract non-static bodies for calculation
        active_indices = cp.where(~self.is_static)[0].get()
        
        # Use tensor cores to compute all pairwise position differences efficiently
        # This is where we leverage tensor cores through matmul operations
        
        # Step 1: Compute position differences for each dimension separately
        # For each active body
        for i in active_indices:
            # Positions of current body (i) and all other bodies (j)
            pos_i = self.positions[i].reshape(1, 3)  # Shape: (1, 3)
            pos_j = self.positions  # Shape: (num_bodies, 3)
            
            # Calculate distance vectors (r_j - r_i) for all bodies at once
            # Using broadcasting (standard CuPy operation)
            r_vecs = pos_j - pos_i  # Shape: (num_bodies, 3)
            
            # Step 2: Compute squared distances using tensor cores
            # This leverages tensor cores for the matrix operations
            # We'll use tensor_ops.matmul(A, B.T) for dot products
            
            # Reshape for correct matrix multiplication
            r_vecs_2d = r_vecs.reshape(num_bodies, 3)  # Ensure 2D shape
            
            # Compute dot products of each distance vector with itself using tensor cores
            # This effectively computes all squared magnitudes at once
            if self.use_tensor_cores:
                # Use tensor cores for matrix multiplication
                # r_dot_r[j] = r_vecs[j] · r_vecs[j] for all j
                r_dot_r = self.tensor_ops.matmul(r_vecs_2d, r_vecs_2d.T)
                
                # Extract diagonal for squared magnitudes
                # We could use cp.diag, but direct indexing is more efficient
                r_squared = cp.zeros(num_bodies, dtype=cp.float64)
                for j in range(num_bodies):
                    r_squared[j] = r_dot_r[j, j]
            else:
                # Compute squared magnitudes directly
                r_squared = cp.sum(r_vecs**2, axis=1)
            
            # Add softening
            r_squared += self.SOFTENING**2
            
            # Zero out self-interaction
            r_squared[i] = float('inf')  # Set to infinity to yield zero force
            
            # Step 3: Compute force magnitudes
            # F_mag = G * m_i * m_j / r^2
            force_mags = self.G * self.masses[i] * self.masses / r_squared
            
            # Step 4: Compute force components
            # We need to normalize r_vecs and multiply by force_mags
            r_mags = cp.sqrt(r_squared).reshape(-1, 1)  # Column vector for broadcasting
            
            # Avoid division by zero or infinity
            r_mags = cp.where(r_mags > 1e20, 1e20, r_mags)  # Cap extremely large values
            r_mags = cp.maximum(r_mags, 1e-20)  # Avoid zero division
            
            # Compute normalized direction vectors
            directions = r_vecs / r_mags
            
            # Compute forces as force_mags * directions
            force_vecs = directions * force_mags.reshape(-1, 1)  # Broadcasting
            
            # Sum all forces acting on body i
            forces[i] = cp.sum(force_vecs, axis=0)
        
        self.force_calc_time = time.time() - start_time
        return forces
        
    def calculate_forces(self):
        """Calculate forces using selected method."""
        start_time = time.time()
        
        if self.use_tensor_cores:
            forces = self.calculate_forces_tensor()
        else:
            forces = self.calculate_forces_cupy()
            
        self.force_calc_time = time.time() - start_time
        return forces
        
    def update(self, num_steps=1):
        """Update the simulation for the given number of steps."""
        total_start = time.time()
        
        for _ in range(num_steps):
            # Calculate forces
            forces = self.calculate_forces()
            
            # Update velocities and positions using velocity Verlet integration
            # This is more accurate than simple Euler integration
            
            # First half of velocity update (v += 0.5*a*dt)
            for i, body in enumerate(self.bodies):
                if self.is_static[i]:
                    continue
                
                # Update velocity (F = ma => a = F/m)
                self.velocities[i] += 0.5 * forces[i] / self.masses[i] * self.TIME_STEP
                
            # Position update (x += v*dt)
            for i, body in enumerate(self.bodies):
                if self.is_static[i]:
                    continue
                
                # Update position
                self.positions[i] += self.velocities[i] * self.TIME_STEP
                
                # Update body position in the dictionary
                body["position"] = self.positions[i]
                
                # Add to trail
                self.trails[i].append(self.positions[i].get().copy())
                
                # Keep trail length reasonable
                if len(self.trails[i]) > 5000:
                    self.trails[i].pop(0)
            
            # Recalculate forces with updated positions
            forces = self.calculate_forces()
            
            # Second half of velocity update (v += 0.5*a*dt)
            for i, body in enumerate(self.bodies):
                if self.is_static[i]:
                    continue
                
                # Update velocity
                self.velocities[i] += 0.5 * forces[i] / self.masses[i] * self.TIME_STEP
                
                # Update body velocity in the dictionary
                body["velocity"] = self.velocities[i]
            
            # Update elapsed time
            self.elapsed_simulation_time += self.TIME_STEP
            
        self.total_calc_time = time.time() - total_start
        
    def get_positions(self):
        """Get current positions of all bodies in AU."""
        return [body["position"].get() / self.AU for body in self.bodies]
        
    def get_trails(self):
        """Get all orbit trails in AU."""
        return [[np.array(pos) / self.AU for pos in trail] for trail in self.trails]
        
    def reset(self):
        """Reset the simulation to initial state."""
        self.init_celestial_bodies()
        self.elapsed_simulation_time = 0
        
    def get_performance_stats(self):
        """Get performance statistics."""
        return {
            "force_calc_time": self.force_calc_time * 1000,  # ms
            "total_calc_time": self.total_calc_time * 1000,  # ms
            "elapsed_days": self.elapsed_simulation_time / (24*3600),
            "elapsed_years": self.elapsed_simulation_time / (365.25*24*3600),
            "use_tensor_cores": self.use_tensor_cores
        }

def run_solar_system_simulation():
    """Run the solar system simulation with visualization."""
    import math
    
    # Initialize PyGame and OpenGL with improved settings
    pygame.init()
    display = (1600, 1000)
    
    # Use HWSURFACE, OPENGL, and enable vsync
    pygame.display.gl_set_attribute(pygame.GL_DOUBLEBUFFER, 1)
    pygame.display.gl_set_attribute(pygame.GL_SWAP_CONTROL, 1)  # Enable vsync
    screen = pygame.display.set_mode(display, DOUBLEBUF | OPENGL | pygame.HWSURFACE |pygame.FULLSCREEN)
    pygame.display.set_caption("Solar System N-Body Simulation - Tensor Core Edition")
    
    # Improved OpenGL setup
    glClearColor(0.0, 0.0, 0.1, 1.0)
    glEnable(GL_DEPTH_TEST)
    glShadeModel(GL_SMOOTH)
    glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST)
    
    # Add basic lighting
    glEnable(GL_LIGHTING)
    glEnable(GL_LIGHT0)
    glLightfv(GL_LIGHT0, GL_POSITION, (0, 0, 0, 1))  # Light at sun position
    glLightfv(GL_LIGHT0, GL_DIFFUSE, (1, 1, 0.9, 1))
    glLightfv(GL_LIGHT0, GL_AMBIENT, (0.2, 0.2, 0.2, 1))
    
    # Projection setup
    glMatrixMode(GL_PROJECTION)
    gluPerspective(45, (display[0]/display[1]), 0.1, 100.0)
    glMatrixMode(GL_MODELVIEW)
    
    # Camera position
    camera_dist = 20.0
    camera_pitch = 30.0
    camera_yaw = 0.0
    
    # Create two simulators for comparison
    tensor_sim = TensorSolarSystem(use_tensor_cores=True)
    cupy_sim = TensorSolarSystem(use_tensor_cores=False)
    
    # Currently active simulator
    active_sim = tensor_sim
    
    # Simulation parameters
    paused = False
    time_scale = 5.0  # days per frame
    show_trails = True
    
    # Colors for bodies
    colors = {
        "Sun": (1.0, 1.0, 0.0),
        "Mercury": (0.7, 0.7, 0.7),
        "Venus": (0.8, 0.7, 0.5),
        "Earth": (0.0, 0.6, 1.0),
        "Mars": (1.0, 0.4, 0.3),
        "Jupiter": (0.9, 0.6, 0.3),
        "Saturn": (0.9, 0.8, 0.5),
        "Uranus": (0.6, 0.8, 0.9),
        "Neptune": (0.2, 0.2, 0.8)
    }
    
    # Font for text rendering
    pygame.font.init()
    font = pygame.font.SysFont('Arial', 18)
    
    # Create a separate surface for text overlay
    text_surface = pygame.Surface(display, pygame.SRCALPHA)
    
    # Track FPS
    fps_history = []
    fps_update_time = time.time()
    
    # Main loop
    steps_per_frame = 5
    clock = pygame.time.Clock()
    
    running = True
    while running:
        frame_start = time.time()
        
        # Handle events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    running = False
                elif event.key == pygame.K_SPACE:
                    paused = not paused
                elif event.key == pygame.K_r:
                    tensor_sim.reset()
                    cupy_sim.reset()
                elif event.key == pygame.K_t:
                    show_trails = not show_trails
                elif event.key == pygame.K_UP:
                    time_scale = min(time_scale * 1.5, 100)
                elif event.key == pygame.K_DOWN:
                    time_scale = max(time_scale / 1.5, 0.1)
                elif event.key == pygame.K_1:
                    active_sim = tensor_sim
                elif event.key == pygame.K_2:
                    active_sim = cupy_sim
                    
            elif event.type == pygame.MOUSEMOTION:
                if pygame.mouse.get_pressed()[0]:
                    dx, dy = event.rel
                    camera_yaw += dx * 0.5
                    camera_pitch = max(-89, min(89, camera_pitch - dy * 0.5))
                    
            elif event.type == pygame.MOUSEBUTTONDOWN:
                if event.button == 4:  # Scroll up
                    camera_dist = max(5, camera_dist - 1)
                elif event.button == 5:  # Scroll down
                    camera_dist = min(50, camera_dist + 1)
        
        # Clear the color and depth buffers
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        
        # Update simulation
        if not paused:
            active_sim.TIME_STEP = 24*3600 * time_scale / steps_per_frame
            active_sim.update(steps_per_frame)
        
        # Reset model view matrix
        glLoadIdentity()
        
        # Set camera position with improved calculation
        gluLookAt(
            camera_dist * math.cos(math.radians(camera_yaw)) * math.cos(math.radians(camera_pitch)),
            camera_dist * math.sin(math.radians(camera_pitch)),
            camera_dist * math.sin(math.radians(camera_yaw)) * math.cos(math.radians(camera_pitch)),
            0, 0, 0,
            0, 1, 0
        )
        
        # Draw celestial bodies
        for i, body in enumerate(active_sim.bodies):
            glPushMatrix()
            
            pos = body["position"].get() / active_sim.AU
            glTranslatef(pos[0], pos[1], pos[2])
            
            # Set color
            color = colors.get(body["name"], (1, 1, 1))
            glColor3f(*color)
            
            # Set material properties
            glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, (*color, 1.0))
            glMaterialfv(GL_FRONT, GL_SPECULAR, (1.0, 1.0, 1.0, 1.0))
            glMaterialf(GL_FRONT, GL_SHININESS, 50.0)
            
            # Add glow effect for Sun
            if body["name"] == "Sun":
                glDisable(GL_LIGHTING)
            else:
                glEnable(GL_LIGHTING)
            
            # Draw sphere with improved quality
            size = body["size"] / 300.0  # Scale down for visualization
            quad = gluNewQuadric()
            gluQuadricNormals(quad, GLU_SMOOTH)
            gluQuadricTexture(quad, GL_TRUE)
            gluSphere(quad, size, 30, 30)  # Increased resolution
            gluDeleteQuadric(quad)
            
            glPopMatrix()
        
        # Draw orbit trails
        glDisable(GL_LIGHTING)
        if show_trails:
            trails = active_sim.get_trails()
            
            glBegin(GL_LINES)
            for i, trail in enumerate(trails):
                if i == 0:  # Skip Sun
                    continue
                    
                color = colors.get(active_sim.bodies[i]["name"], (1, 1, 1))
                glColor3f(*color)
                
                for j in range(1, len(trail), 2):  # Skip every other point for performance
                    if j >= len(trail):
                        break
                    glVertex3f(trail[j-1][0], trail[j-1][1], trail[j-1][2])
                    glVertex3f(trail[j][0], trail[j][1], trail[j][2])
            glEnd()
        
        # Render text overlay
        text_surface.fill((0, 0, 0, 0))  # Clear with transparent background
        
        # Calculate FPS
        current_time = time.time()
        frame_time = current_time - frame_start
        
        # Update FPS every 0.5 seconds
        if current_time - fps_update_time > 0.5:
            fps_history.append(1.0 / max(frame_time, 0.0001))
            if len(fps_history) > 10:
                fps_history.pop(0)
            fps_update_time = current_time
        
        # Calculate average FPS
        avg_fps = sum(fps_history) / max(len(fps_history), 1)
        
        # Render stats
        stats = active_sim.get_performance_stats()
        lines = [
            f"Mode: {'TENSOR CORES' if stats['use_tensor_cores'] else 'STANDARD CUPY'}",
            f"FPS: {avg_fps:.1f}",
            f"Simulation: {stats['elapsed_years']:.2f} Earth years",
            f"Time Scale: {time_scale:.1f} days/frame",
            f"Force Calc: {stats['force_calc_time']:.2f} ms",
            "",
            "Controls:",
            "1: Tensor Core Mode",
            "2: CuPy Mode",
            "Space: Pause/Resume",
            "R: Reset",
            "T: Toggle Trails",
            "Up/Down: Adjust Speed",
            "Mouse: Rotate View",
            "Scroll: Zoom In/Out"
        ]
        
        # Render text to overlay surface
        y = 20
        for line in lines:
            text_render = font.render(line, True, (255, 255, 255))
            text_surface.blit(text_render, (10, y))
            y += 20
        
        # Mode indicator
        mode_text = "TENSOR CORE MODE" if active_sim.use_tensor_cores else "STANDARD CUPY MODE"
        mode_color = (0, 255, 0) if active_sim.use_tensor_cores else (255, 255, 0)
        mode_render = font.render(mode_text, True, mode_color)
        text_surface.blit(mode_render, (display[0] - mode_render.get_width() - 10, 20))
        
        # Blit text surface to screen
        screen.blit(text_surface, (0, 0))
        
        # Swap buffers
        pygame.display.flip()
        
        # Control frame rate
        clock.tick(60)  # Cap at 60 FPS
    
    # Cleanup
    pygame.quit()

if __name__ == "__main__":
    run_solar_system_simulation()

Initializing CUDA...
CUDA initialization complete
Function signatures configured
Initializing CUDA...
CUDA initialization complete
Function signatures configured
