# Test and Demonstrate zero_gpu_vram_usage Analysis

This notebook tests and demonstrates the main functions in `src/analysis/zero_gpu_vram_usage.py` for analyzing hybrid workload efficiency and CPU-GPU balance.

In [2]:
# Import required modules
import sys
from pathlib import Path
import pandas as pd

# Add src to sys.path for module imports
sys.path.append(str(Path.cwd().parent / 'src'))

from analysis import zero_gpu_vram_usage

ModuleNotFoundError: No module named 'database.DatabaseConnection'

In [None]:
# Load the jobs DataFrame from DuckDB
df = zero_gpu_vram_usage.load_jobs_dataframe_from_duckdb()
display(df.head())

In [None]:
# Run analyze_hybrid_workload_efficiency
df_hybrid = zero_gpu_vram_usage.analyze_hybrid_workload_efficiency(df)
display(df_hybrid.head())
print(f"Hybrid jobs found: {len(df_hybrid)}")

In [None]:
# Run evaluate_cpu_gpu_balance
analysis_results = zero_gpu_vram_usage.evaluate_cpu_gpu_balance(df_hybrid)

# Display key summary statistics
print("Total jobs:", analysis_results['total_jobs'])
print("Total GPU hours:", analysis_results['total_gpu_hours'])
print("Average VRAM efficiency:", f"{analysis_results['avg_efficiency']:.2%}")
print("Median VRAM efficiency:", f"{analysis_results['median_efficiency']:.2%}")

# Show recommendations
print("\nRecommendations:")
for rec in analysis_results['recommendations']:
    print("-", rec)

# Display efficiency patterns table
analysis_results['efficiency_patterns']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot VRAM efficiency distribution
plt.figure(figsize=(8, 5))
sns.histplot(df_hybrid['vram_efficiency'].dropna(), bins=30, kde=True)
plt.xlabel('VRAM Efficiency')
plt.ylabel('Number of Jobs')
plt.title('Distribution of VRAM Efficiency for Hybrid Jobs')
plt.show()

# Plot top inefficient users by GPU hours
if 'top_inefficient_users' in analysis_results:
    top_users = analysis_results['top_inefficient_users'].head(10)
    plt.figure(figsize=(10, 5))
    sns.barplot(y=top_users.index, x=top_users['GPU_Hours'], orient='h')
    plt.xlabel('GPU Hours')
    plt.ylabel('User')
    plt.title('Top 10 Users by GPU Hours (Hybrid Jobs)')
    plt.show()