# System Diagnostics and Resource Monitoring

This notebook demonstrates how to use the utility modules to monitor system resources, detect GPU capabilities, and optimize memory usage in the research environment.

## 1. Import Required Modules

Let's start by importing the necessary utilities from our research environment.

In [None]:
# Import system utilities
import sys
import os
import psutil
import platform

# Import custom utilities
sys.path.append('..')
from utils.system_utils import system_manager
from utils.gpu_utils import gpu_manager
from utils.memory_optimizer import get_memory_optimization_settings

print(f"Python version: {platform.python_version()}")
print(f"System platform: {platform.platform()}")

## 2. System Resource Overview

Let's check the current system resources using our utility functions.

In [None]:
# Get system summary
system_summary = system_manager.get_system_summary()

# Display key metrics
print(f"CPU Usage: {system_summary['cpu_percent']:.1f}%")
print(f"Memory Usage: {system_summary['memory_percent']:.1f}%")
print(f"Disk Usage: {system_summary['disk_percent']:.1f}%")
print(f"Network In: {system_summary['network_recv_bytes']/1024:.1f} KB/s")
print(f"Network Out: {system_summary['network_sent_bytes']/1024:.1f} KB/s")

## 3. GPU Detection and Information

Check if GPUs are available and get their details.

In [None]:
# Check GPU availability
gpu_available = gpu_manager.check_gpu_availability()
print(f"GPU available: {gpu_available}")

if gpu_available:
    # Get GPU information
    gpu_info = gpu_manager.get_gpu_info()
    
    print(f"\nGPU Count: {len(gpu_info)}")
    
    for idx, gpu in enumerate(gpu_info):
        print(f"\nGPU {idx}:")
        print(f"  Name: {gpu.get('name', 'Unknown')}")
        print(f"  Memory Total: {gpu.get('memory_total_mb', 0)} MB")
        print(f"  Memory Used: {gpu.get('memory_used_mb', 0)} MB")
        print(f"  Utilization: {gpu.get('utilization_percent', 0)}%")
        print(f"  Temperature: {gpu.get('temperature_c', 0)}°C")
        
    # Check Docker GPU support
    docker_gpu_support = gpu_manager.check_docker_gpu_support()
    print(f"\nNVIDIA Docker support: {docker_gpu_support}")
else:
    print("No GPU detected. Running in CPU-only mode.")

## 4. Memory Optimization Settings

Get recommended memory optimization settings for ML frameworks.

In [None]:
# Get memory optimization settings
memory_settings = get_memory_optimization_settings()

print("Recommended Memory Optimization Settings:")
for key, value in memory_settings.items():
    print(f"  {key}: {value}")

## 5. Real-time Resource Monitoring

Set up a simple monitoring function to track resources over time.

In [None]:
import time
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# Create arrays for monitoring data
timestamps = []
cpu_data = []
memory_data = []

# Function to update the plot
def monitor_resources(duration=30, interval=1):
    plt.figure(figsize=(12, 6))
    
    # Initialize data arrays
    timestamps.clear()
    cpu_data.clear()
    memory_data.clear()
    
    start_time = time.time()
    current_time = start_time
    
    while current_time - start_time < duration:
        # Get current metrics
        cpu_percent = psutil.cpu_percent()
        memory_percent = psutil.virtual_memory().percent
        
        # Append to data arrays
        timestamps.append(current_time - start_time)
        cpu_data.append(cpu_percent)
        memory_data.append(memory_percent)
        
        # Clear and redraw plot
        plt.clf()
        plt.plot(timestamps, cpu_data, label='CPU %')
        plt.plot(timestamps, memory_data, label='Memory %')
        plt.xlabel('Time (seconds)')
        plt.ylabel('Usage %')
        plt.title('System Resource Usage')
        plt.legend()
        plt.grid(True)
        plt.ylim(0, 100)
        plt.xlim(0, duration)
        plt.pause(interval)
        
        # Update current time
        time.sleep(interval)
        current_time = time.time()
    
    plt.show()

# Uncomment to run monitoring for 30 seconds
# monitor_resources(duration=30, interval=1)

## 6. Summary and Recommendations

Based on the system diagnostics, here are some recommendations for optimizing your research environment.

In [None]:
def get_recommendations():
    recommendations = []
    system_summary = system_manager.get_system_summary()
    
    # CPU recommendations
    if system_summary['cpu_percent'] > 80:
        recommendations.append("CPU usage is high. Consider reducing the number of parallel processes.")
    
    # Memory recommendations
    if system_summary['memory_percent'] > 80:
        recommendations.append("Memory usage is high. Consider using smaller batch sizes or enabling memory optimization.")
    
    # GPU recommendations
    if gpu_manager.check_gpu_availability():
        gpu_info = gpu_manager.get_gpu_info()
        for idx, gpu in enumerate(gpu_info):
            memory_used_percent = gpu.get('memory_used_mb', 0) / gpu.get('memory_total_mb', 1) * 100
            if memory_used_percent > 80:
                recommendations.append(f"GPU {idx} memory usage is high ({memory_used_percent:.1f}%). Consider enabling mixed precision training.")
    else:
        recommendations.append("No GPU detected. For faster training, consider running on a GPU-enabled system.")
    
    # If no specific recommendations
    if not recommendations:
        recommendations.append("System resources look good. No specific optimizations needed at this time.")
    
    return recommendations

# Get and display recommendations
recommendations = get_recommendations()
print("System Recommendations:")
for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")