# Test and Demonstrate zero_gpu_vram_usage Analysis

This notebook tests and demonstrates the main functions in `src/analysis/zero_gpu_vram_usage.py` for analyzing hybrid workload efficiency and CPU-GPU balance.

In [None]:
# Import required modules
import sys
from pathlib import Path
import pandas as pd

Jupyter server should be run at the notebook directory, so the output of the following cell would be the project root:

In [None]:
project_root = str(Path.cwd().resolve().parent)
print(f"Project root: {project_root}")

In [None]:
# Add project root to sys.path for module imports
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.analysis import zero_gpu_vram_usage

# Automatically reload modules before executing code
# This is useful for development to see changes without restarting the kernel.
%load_ext autoreload
# Reload all modules imported with %aimport every time before executing the Python code typed.
%autoreload 1
%aimport src.analysis.zero_gpu_vram_usage

In [None]:
# Load the jobs DataFrame from DuckDB

df = zero_gpu_vram_usage.load_jobs_dataframe_from_duckdb(db_path='../data/slurm_data.db')
display(df.head())
print(len(df))

In [None]:
# Run analyze_hybrid_workload_efficiency with modular filters
# Example: Only jobs with 0GB requested, at least 1 GPU, and at least 600s duration
# You can change the arguments to filter for different criteria

df_hybrid = zero_gpu_vram_usage.analyze_hybrid_workload_efficiency(
    df,
    requested_vram_filter=0,  # or a function, e.g., lambda x: x <= 2
    allocated_vram_greater_than=0,
    gpu_mem_usage_min = None,
    gpu_mem_usage_max = None,
    gpu_mem_usage_exact= 0,
    gpus_min=1,
    elapsed_seconds_min=0,  # all jobs
)

# Set option to display all columns
pd.set_option('display.max_columns', None)
# Display the DataFrame
display(df_hybrid.head())
# To revert to default settings (optional)
pd.reset_option('display.max_columns')
print(f"Hybrid jobs found: {len(df_hybrid)}")

In [None]:
# Run evaluate_cpu_gpu_balance
analysis_results = zero_gpu_vram_usage.evaluate_cpu_gpu_balance(df_hybrid)

# Display key summary statistics
print("Total jobs:", analysis_results["total_jobs"])
print("Total GPU hours:", analysis_results["total_gpu_hours"])
print("Average VRAM efficiency:", f"{analysis_results['avg_efficiency']:.2%}")
print("Median VRAM efficiency:", f"{analysis_results['median_efficiency']:.2%}")

# Show recommendations
print("\nRecommendations:")
for rec in analysis_results["recommendations"]:
    print("-", rec)
# Display efficiency patterns table
analysis_results["efficiency_patterns"]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot VRAM efficiency distribution
plt.figure(figsize=(8, 5))
sns.histplot(df_hybrid["vram_efficiency"].dropna(), bins=30, kde=True)
plt.xlabel("VRAM Efficiency")
plt.ylabel("Number of Jobs")
plt.title("Distribution of VRAM Efficiency for Hybrid Jobs")
plt.xlim(0, 1.0)
plt.show()

# print(analysis_results['top_inefficient_users'].head(5))

# Plot top inefficient users by GPU hours
if "top_inefficient_users" in analysis_results:
    top_users = analysis_results["top_inefficient_users"].head(10)
    plt.figure(figsize=(10, 5))
    sns.barplot(y=top_users.index, x=top_users["GPU_Hours"], orient="h")
    plt.xlabel("GPU Hours")
    plt.ylabel("User")
    plt.title("Top 10 Inefficient Users by GPU Hours (Hybrid Jobs)")
    plt.show()

In [None]:
# Filter jobs where 0GB VRAM was requested but a GPU was allocated (modular function)
# You can change requested_vram and gpus_min as needed
zero_vram_jobs = zero_gpu_vram_usage.filter_zero_vram_requested_with_gpu_allocated(df, requested_vram=0, gpus_min=1)
display(zero_vram_jobs.head(10))
print(f"Found {len(zero_vram_jobs)} jobs where 0GB VRAM was requested but a GPU was allocated.")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Plot statistics for jobs where 0GB VRAM was requested but a GPU was allocated
if not zero_vram_jobs.empty:
    # Plot distribution of GPU memory usage
    plt.figure(figsize=(8, 5))
    sns.histplot(zero_vram_jobs["GPUMemUsage"].dropna(), bins=30, kde=True)
    plt.xlabel("GPU Memory Usage (bytes)")
    plt.ylabel("Number of Jobs")
    plt.title("Distribution of GPU Memory Usage (0GB VRAM Requested)")
    plt.show()

    # Plot distribution of job durations (Elapsed_seconds)
    if "Elapsed_seconds" in zero_vram_jobs.columns:
        plt.figure(figsize=(8, 5))
        # Line plot of histogram (number of jobs vs. duration in hours)
        counts, bins = np.histogram(zero_vram_jobs["Elapsed_seconds"].dropna() / 3600, bins=30)
        bin_centers = 0.5 * (bins[1:] + bins[:-1])
        plt.plot(bin_centers, counts, marker='o', linestyle='-')
        plt.xlabel("Job Duration (hours)")
        plt.ylabel("Number of Jobs")
        plt.title("Distribution of Job Durations (0GB VRAM Requested)")
        plt.show()

    # Plot number of jobs by user
    plt.figure(figsize=(10, 5))
    user_counts = zero_vram_jobs["User"].value_counts().head(20)
    sns.barplot(x=user_counts.values, y=user_counts.index, orient="h")
    plt.xlabel("Number of Jobs")
    plt.ylabel("User")
    plt.title("Top 20 Users: Jobs with 0GB VRAM Requested but GPU Allocated")
    plt.tight_layout()
    plt.show()
else:
    print("No jobs found where 0GB VRAM was requested but a GPU was allocated.")