# Efficiency Analysis
This notebook tests and demonstrates the use of `EfficiencyAnalysis` class in `src/analysis/vram_usage.py` for analyzing job efficiency.

In [None]:
# Import required modules
import sys
from pathlib import Path
import pandas as pd

Jupyter server should be run at the notebook directory, so the output of the following cell would be the project root:

In [None]:
project_root = str(Path.cwd().resolve().parent)
print(f"Project root: {project_root}")

In [None]:
# Add project root to sys.path for module imports
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.analysis import vram_usage

# Automatically reload modules before executing code
# This is useful for development to see changes without restarting the kernel.
%load_ext autoreload
# Reload all modules imported with %aimport every time before executing the Python code typed.
%autoreload 1
%aimport src.analysis.vram_usage, src.preprocess.preprocess

In [None]:
# Load the jobs DataFrame from DuckDB

efficiency_analysis = vram_usage.EfficiencyAnalysis(
	db_path='../data/slurm_data_small.db',
	sample_size=5000,
	random_state=42
)

display(efficiency_analysis.jobs_df.head(10))

## Example: Analyze workload efficiency of GPU users who set no VRAM constraints and used 0 GB of VRAM


In [None]:
# Analyze workload efficiency of GPU users with no VRAM constraints who used 0 GB of VRAM
filtered_jobs = efficiency_analysis.filter_jobs_for_analysis(
    vram_constraint_filter=pd.NA,  # No VRAM constraints
    gpu_mem_usage_filter=0,  # Used 0 GB of VRAM
)

jobs_with_metrics = efficiency_analysis.calculate_job_efficiency_metrics(filtered_jobs)

# Set option to display all columns
pd.set_option('display.max_columns', None)
# Display the DataFrame
display(jobs_with_metrics.head(10))
# To revert to default settings (optional)
pd.reset_option('display.max_columns')
print(f"Jobs found: {len(jobs_with_metrics)}")

In [None]:
users_with_metrics = efficiency_analysis.calculate_user_efficiency_metrics()
users_with_metrics

In [None]:
# Run evaluate_cpu_gpu_balance
analysis_results = efficiency_analysis.evaluate_cpu_gpu_usage()
print("Efficiency summary stats:")

# Display key summary statistics
print("Total jobs:", analysis_results["total_jobs"])
print("Total GPU hours:", analysis_results["total_gpu_hours"])
print("Average VRAM efficiency:", f"{analysis_results['avg_efficiency']:.2%}")
print("Median VRAM efficiency:", f"{analysis_results['median_efficiency']:.2%}")

# Show report
print("\nReport:")
for rec in analysis_results["report"]:
    print("-", rec)
# Display efficiency patterns table
analysis_results["efficiency_patterns"]

### Find Inefficient Users

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

inefficient_users = efficiency_analysis.find_inefficient_users_by_alloc_vram_efficiency(
    min_jobs=5,  # Minimum number of jobs to consider a user
    efficiency_threshold=0.3  # Efficiency threshold for identifying inefficient users
)

# Display top inefficient users by job count
print("\nTop inefficient users by job count:")
display(inefficient_users.head(10))



# Plot top inefficient users by GPU hours, with efficiency as labels
top_users = inefficient_users.head(10)

plt.figure(figsize=(8, 5))
barplot = sns.barplot(
    y=top_users["User"],
    x=top_users["user_job_hours"],
    orient="h"
)
plt.xlabel("Job Hours")
plt.ylabel("User")
plt.title("Top 10 Inefficient Users by Allocated VRAM Efficiency Contribution")

# Annotate bars with expected_value_alloc_vram_efficiency, keeping text fully inside the plot's right spine
ax = barplot
xmax = top_users["user_job_hours"].max()
# Add headroom for annotation space (20% extra)
xlim = xmax * 1.20 if xmax > 0 else 1
ax.set_xlim(0, xlim)

# Calculate annotation x-position: place at 98% of xlim or just left of the right spine, whichever is smaller
for i, (job_hours, efficiency) in enumerate(
    zip(
        top_users["user_job_hours"],
        top_users["expected_value_alloc_vram_efficiency"],
        strict=True,
    )
):
    # Place annotation at min(job_hours + 2% of xlim, 98% of xlim)
    xpos = min(job_hours + xlim * 0.02, xlim * 0.98)
    # If bar is very close to right spine, nudge annotation left to avoid overlap
    if xpos > xlim * 0.96:
        xpos = xlim * 0.96
    ax.text(
        xpos,
        i,
        f"Eff: {efficiency:.2f}",
        va="center",
        ha="left",
        fontsize=10,
        color="black",
        clip_on=True
    )

plt.tight_layout()
plt.show()

In [None]:
inefficient_users_vram_hours = efficiency_analysis.find_inefficient_users_by_vram_hours(
    min_jobs=5,  # Minimum number of jobs to consider a user
    vram_hours_threshold=200  # VRAM-hours threshold for identifying inefficient users
)
# Display top inefficient users by VRAM-hours
print("\nTop inefficient users by VRAM-hours:")
display(inefficient_users_vram_hours.head(10))

# Plot top inefficient users by VRAM-hours, with VRAM-hours as labels
plt.figure(figsize=(8, 5))
barplot = sns.barplot(
    y=inefficient_users_vram_hours["User"],
    x=inefficient_users_vram_hours["vram_hours"],
    orient="h"
)
plt.xlabel("VRAM-Hours")
plt.ylabel("User")
plt.title("Top 10 Inefficient Users by VRAM-Hours")
# Annotate bars with gpu_hours, keeping text fully inside the plot's right spine
ax = barplot
xmax = inefficient_users_vram_hours["vram_hours"].max()
# Add headroom for annotation space (20% extra)
xlim = xmax * 1.6 if xmax > 0 else 1
ax.set_xlim(0, xlim)
# Calculate annotation x-position: place at 98% of xlim or just left of the right spine, whichever is smaller
for i, (vram_hours, user_job_hours) in enumerate(
    zip(
        inefficient_users_vram_hours["vram_hours"],
        inefficient_users_vram_hours["user_job_hours"],
        strict=True,
    )
):
    # Place annotation at min(vram_hours + 2% of xlim, 98% of xlim)
    xpos = min(vram_hours + xlim * 0.02, xlim * 0.98)
    ax.text(
        xpos,
        i,
        f"VRAM-Hours: {vram_hours:.2f}\n Job Hours: {user_job_hours:.2f}",
        va="center",
        ha="left",
        fontsize=10,
        color="black",
        clip_on=True
    )
plt.tight_layout()
plt.show()

### Find inefficient PIs

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

inefficient_pis = efficiency_analysis.find_inefficient_pis_weighted_by_hours(
    min_jobs=5,  # Minimum number of jobs to consider a user
    efficiency_threshold=0.3  # Efficiency threshold for identifying inefficient users
)

# Display top inefficient PI groups by job count
top_pis = inefficient_pis.head(10)

plt.figure(figsize=(7, 5))
sns.barplot(
    y=top_pis["Account"],
    x=top_pis["Total_GPU_Hours"],
    order=top_pis["Account"].tolist(),  # Only show present values
    orient="h"
)
plt.xlabel("GPU Hours")
plt.ylabel("PI Group")
plt.title("Top 10 Inefficient PI Groups by Weighted VRAM Efficiency Contribution")
plt.show()


In [None]:
# Filter jobs where 0GB VRAM was requested but a GPU was allocated (modular function)
# You can change requested_vram and gpus_min as needed
zero_vram_jobs = vram_usage.filter_zero_vram_requested_with_gpu_allocated(
	efficiency_analysis.jobs_df,
	requested_vram=0,
	gpus_min=1
)
display(zero_vram_jobs.head(10))
print(f"Found {len(zero_vram_jobs)} jobs where 0GB VRAM was requested but a GPU was allocated.")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Plot statistics for jobs where 0GB VRAM was requested but a GPU was allocated
if not zero_vram_jobs.empty:
    # Plot distribution of GPU memory usage
    plt.figure(figsize=(8, 5))
    sns.histplot(zero_vram_jobs["GPUMemUsage"].dropna(), bins=30, kde=True)
    plt.xlabel("GPU Memory Usage (bytes)")
    plt.ylabel("Number of Jobs")
    plt.title("Distribution of GPU Memory Usage (0GB VRAM Requested)")
    plt.show()

    # Plot distribution of job durations (Elapsed_seconds)
    if "Elapsed_seconds" in zero_vram_jobs.columns:
        plt.figure(figsize=(8, 5))
        # Line plot of histogram (number of jobs vs. duration in hours)
        counts, bins = np.histogram(zero_vram_jobs["Elapsed_seconds"].dropna() / 3600, bins=30)
        bin_centers = 0.5 * (bins[1:] + bins[:-1])
        plt.plot(bin_centers, counts, marker='o', linestyle='-')
        plt.xlabel("Job Duration (hours)")
        plt.ylabel("Number of Jobs")
        plt.title("Distribution of Job Durations (0GB VRAM Requested)")
        plt.show()

    # Plot number of jobs by user
    plt.figure(figsize=(10, 5))
    user_counts = zero_vram_jobs["User"].value_counts().head(20)
    sns.barplot(x=user_counts.values, y=user_counts.index, orient="h")
    plt.xlabel("Number of Jobs")
    plt.ylabel("User")
    plt.title("Top 20 Users: Jobs with 0GB VRAM Requested but GPU Allocated")
    plt.tight_layout()
    plt.show()
else:
    print("No jobs found where 0GB VRAM was requested but a GPU was allocated.")