# VRAM Efficiency Analysis Demo

This notebook demonstrates the use of the `plot_vram_efficiency` method in the `EfficiencyAnalysis` class. The method allows for customizable VRAM efficiency plotting for selected users over a specified date range.

In [None]:
# Import required modules
import sys
from pathlib import Path

In [None]:
project_root = str(Path.cwd().resolve().parent)
print(f"Project root: {project_root}")

# Add project root to sys.path for module imports
if project_root not in sys.path:
    sys.path.insert(0, project_root)


# Automatically reload modules before executing code
# This is useful for development to see changes without restarting the kernel.
%load_ext autoreload
# Reload all modules imported with %aimport every time before executing the Python code typed.
%autoreload 1
%aimport src.analysis.efficiency_analysis
%aimport src.preprocess.preprocess

In [None]:
# Import Required Libraries and Modules
from src.analysis.efficiency_analysis import EfficiencyAnalysis
from src.preprocess.preprocess import preprocess_data
from src.database.database_connection import DatabaseConnection
from src.analysis.frequency_analysis import FrequencyAnalysis

In [None]:
# Load and Preprocess GPU Job Data
# Connect to the database
db = DatabaseConnection(db_url="../slurm_data_new.db")

# Query jobs with GPUs
gpu_df = db.fetch_query("SELECT * FROM Jobs WHERE GPUs > 0")

In [None]:
# Preprocess the data
preprocessed_df = preprocess_data(
    gpu_df, min_elapsed_seconds=0, include_failed_cancelled_jobs=False, include_cpu_only_jobs=True
)

# Initialize the EfficiencyAnalysis class
efficiency_analyzer = EfficiencyAnalysis(jobs_df=preprocessed_df)

# Initialize the FrequencyAnalyzer with the preprocessed DataFrame
frequency_analyzer = FrequencyAnalysis(preprocessed_df)

In [None]:
import numpy as np

jobs = efficiency_analyzer.filter_jobs_for_analysis(
    gpu_count_filter=1,
    vram_constraint_filter=None,
    allocated_vram_filter={"min": 0, "max": np.inf, "inclusive": False},  # Filter jobs with allocated VRAM > 0
    gpu_mem_usage_filter={"min": 0, "max": np.inf, "inclusive": False},  # Filter jobs with GPU memory usage > 0 GB
)

# Calculate job efficiency metrics
job_metrics = efficiency_analyzer.calculate_job_efficiency_metrics(filtered_jobs=jobs)

# Find inefficient users
users = efficiency_analyzer.calculate_user_efficiency_metrics()

inefficient_users = efficiency_analyzer.find_inefficient_users_by_alloc_vram_efficiency(
    alloc_vram_efficiency_filter={"min": 0, "max": 0.3, "inclusive": False}, min_jobs=5
)
display(inefficient_users)

# Filter out users with zero efficiency
inefficient_users = inefficient_users[inefficient_users["expected_value_alloc_vram_efficiency"] > 0]

# Select the top 5 problematic users
top_problematic_users = inefficient_users.head(5)["User"].tolist()

users_to_analyze = top_problematic_users[:4]  # Limit to first 4 users for demonstration

In [None]:
# Initialize the TimeSeriesVisualizer with the users
from src.visualization.time_series import TimeSeriesVisualizer

time_series_visualizer = TimeSeriesVisualizer(users_to_analyze)

In [None]:
# Secondary Y-axis for job counts (dual-metric visualization)
print("=== Secondary Y-Axis for Job Counts ===")

hours_df = efficiency_analyzer.plot_vram_efficiency(
    users=users_to_analyze,
    time_unit="Months",
    days_back=365,
    annotation_style="none",  # No text annotations to keep plot clean
    show_secondary_y=True,  # Show job counts on secondary y-axis
    remove_zero_values=True,
)

In [None]:
print("=== Plotting VRAM Hours ===")
vram_df = efficiency_analyzer.plot_vram_hours(
    users=users_to_analyze,
    time_unit="Months",
    days_back=365,
    annotation_style="none",  # No text annotations to keep plot clean
    remove_zero_values=True,
)
vram_df

In [None]:
# Interactive VRAM Hours plot for the same users
print("=== Interactive VRAM Hours Plot ===")
fig = efficiency_analyzer.plot_vram_hours_interactive(
    users=users_to_analyze, time_unit="Months", days_back=365, exclude_fields=["Time"]
)

## Customization Options

The efficiency analysis tools provide extensive customization options:

1. **Time Units**: Choose between "Days", "Weeks", "Months"
2. **Date Range**: Specify exact `start_date` and `end_date` or use `days_back`
3. **Annotation Styles**: "hover", "combined", "table", "none"
4. **Visualization Options**: Show secondary y-axis, filter zero values
5. **Data Processing**: Control max points, exclude specific fields from annotations

These options help tailor the analysis to specific needs and visualization preferences.

In [None]:
from src.config.enum_constants import TimeUnitEnum


# Configuring a specific date range for focused analysis: January to March 2025
start_date = "2025-01-01"  # Start of January 2025
end_date = "2025-03-31"  # End of March 2025

print("=== Comparing Users with Custom Parameters (Jan-March 2025) ===")

print(f"Date range: {start_date} to {end_date}")


# Plot first group with Weeks timeunit and combined annotations
fig = time_series_visualizer.plot_vram_efficiency_interactive(
    users=users_to_analyze,
    time_unit=TimeUnitEnum.WEEKS.value,
    start_date=start_date,
    end_date=end_date,
    remove_zero_values=True,
    exclude_fields=["Time"],
)

fig = time_series_visualizer.plot_vram_efficiency_interactive(
    users=users_to_analyze,
    time_unit="Days",
    start_date=start_date,
    end_date="2025-01-15",
    remove_zero_values=True,
    exclude_fields=["Time"],
)