# VRAM Efficiency Analysis Demo

This notebook demonstrates the use of the `plot_vram_efficiency` method in the `EfficiencyAnalysis` class. The method allows for customizable VRAM efficiency plotting for selected users over a specified date range.

In [None]:
# Import required modules
import sys
from pathlib import Path

In [None]:
project_root = str(Path.cwd().resolve().parent)
print(f"Project root: {project_root}")

# Add project root to sys.path for module imports
if project_root not in sys.path:
    sys.path.insert(0, project_root)


# Automatically reload modules before executing code
# This is useful for development to see changes without restarting the kernel.
%load_ext autoreload
# Reload all modules imported with %aimport every time before executing the Python code typed.
%autoreload 1
%aimport src.analysis.vram_usage, src.preprocess.preprocess

In [None]:
# Import Required Libraries and Modules
from src.analysis.vram_usage import EfficiencyAnalysis
from src.preprocess.preprocess import preprocess_data
from src.database.database_connection import DatabaseConnection

In [None]:
# Load and Preprocess GPU Job Data
# Connect to the database
db = DatabaseConnection(db_url="../slurm_data.db")

# Query jobs with GPUs
gpu_df = db.connection.query("SELECT * FROM Jobs WHERE GPUs > 0").to_df()

# Preprocess the data
preprocessed_df = preprocess_data(
    gpu_df, min_elapsed_seconds=0, include_failed_cancelled_jobs=False, include_cpu_only_jobs=True
)

# Initialize the EfficiencyAnalysis class
efficiency_analyzer = EfficiencyAnalysis(df=preprocessed_df, table_name="Jobs")

In [None]:
import numpy as np

jobs = efficiency_analyzer.filter_jobs_for_analysis(
    gpu_count_filter=1,
    vram_constraint_filter=None,
    allocated_vram_filter={"min": 0, "max": np.inf, "inclusive": False},
    gpu_mem_usage_filter={"min": 0.1, "max": np.inf, "inclusive": False},
)
# Find inefficient users
users = efficiency_analyzer.calculate_user_efficiency_metrics()

inefficient_users = efficiency_analyzer.find_inefficient_users_by_alloc_vram_efficiency(
    efficiency_threshold=0.3, min_jobs=5
)
inefficient_users = inefficient_users[inefficient_users["expected_value_alloc_vram_efficiency"] > 0]

# Select the top 5 problematic users
top_problematic_users = inefficient_users.head(5)["User"].tolist()

# Plot VRAM efficiency for the top problematic users
efficiency_analyzer.plot_vram_efficiency(users=["msharma8_umassd_edu"])
inefficient_users.head(5)

In [None]:
import numpy as np

jobs = efficiency_analyzer.filter_jobs_for_analysis(
    gpu_count_filter=1,
    vram_constraint_filter=None,
    allocated_vram_filter={"min": 0, "max": np.inf, "inclusive": False},
    gpu_mem_usage_filter={"min": 0.1, "max": np.inf, "inclusive": False},
)
# Find inefficient users
users = efficiency_analyzer.calculate_user_efficiency_metrics()

inefficient_users = efficiency_analyzer.find_inefficient_users_by_alloc_vram_efficiency(
    efficiency_threshold=0.3, min_jobs=5
)
inefficient_users = inefficient_users[inefficient_users["expected_value_alloc_vram_efficiency"] > 0]

# Select the top 5 problematic users
top_problematic_users = inefficient_users.head(5)["User"].tolist()

# Plot VRAM efficiency for the top problematic users
efficiency_analyzer.plot_vram_efficiency(users=top_problematic_users)
inefficient_users.head(5)