**Table of contents**<a id='toc0_'></a>    
- [Analyze High CPU Memory and Core Usage Across Jobs Resulting in Hoarding of Resources:](#toc1_1_)    
      - [Find most inefficient jobs hoarding node RAM based on `ram_hoarding_fraction_diff`](#toc1_1_1_1_)    
      - [Find most inefficient jobs hoarding CPU cores based on `core_hoarding_fraction_diff`](#toc1_1_1_2_)    
  - [Analyze High CPU Memory and Core Usage Resulting in Hoarding of Resources Across Users:](#toc1_2_)    
      - [Find most inefficient users hoarding CPU cores based on `expected_value_core_hoarding_fraction_diff`](#toc1_2_1_1_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

In [None]:
# Import required modules
import sys
from pathlib import Path
import pandas as pd

# import matplotlib.pyplot as plt
# import seaborn as sns
import os

Jupyter server should be run at the notebook directory, so the output of the following cell would be the project root:

In [None]:
project_root = str(Path.cwd().resolve().parent)
print(f"Project root: {project_root}")
os.environ["OUTPUT_MODE"] = ""

In [None]:
# Automatically reload modules before executing code (set this up BEFORE imports)
%load_ext autoreload
%autoreload 2

# Add project root to sys.path for module imports
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.analysis import ResourceHoarding as ResourceHoarding
from src.analysis import efficiency_analysis as ea
from src.visualization import JobsWithMetricsVisualizer, UsersWithMetricsVisualizer
from src.config.enum_constants import ResourceHoardingDataFrameNameEnum

In [None]:
# Load the jobs DataFrame from DuckDB
preprocessed_jobs_df = ea.load_preprocessed_jobs_dataframe_from_duckdb(
    db_path="../data/slurm_data_small.db",
    table_name="Jobs",
)
display(preprocessed_jobs_df.head(10))
print(preprocessed_jobs_df.shape)

## <a id='toc1_1_'></a>[Analyze High CPU Memory and Core Usage Across Jobs Resulting in Hoarding of Resources:](#toc0_)


In [None]:
hoarding_analysis = ResourceHoarding(jobs_df=preprocessed_jobs_df)

In [None]:
filtered_jobs = hoarding_analysis.filter_jobs_for_analysis()
filtered_jobs

Generate all hoarding analysis metrics:

In [None]:
memory_hoarding_jobs = hoarding_analysis.calculate_node_resource_hoarding_for_jobs(filtered_jobs)

# Set option to display all columns
pd.set_option("display.max_columns", None)
# Display the DataFrame
display(memory_hoarding_jobs.head(10))
# To revert to default settings (optional)
pd.reset_option("display.max_columns")

print(f"Jobs found: {len(memory_hoarding_jobs)}")

#### <a id='toc1_1_1_1_'></a>[Find most inefficient jobs hoarding node RAM based on `ram_hoarding_fraction_diff`](#toc0_)

In [None]:
inefficient_jobs_hoarding_cpu_cores = hoarding_analysis.sort_and_filter_records_with_metrics(
    metrics_df_name_enum=ResourceHoardingDataFrameNameEnum.JOBS_WITH_RESOURCE_HOARDING_METRICS,
    sorting_key="ram_hoarding_fraction_diff",
    ascending=False,  # Sort in descending order
    filter_criteria={"ram_hoarding_fraction_diff": {"min": 0, "inclusive": True}},
)
# Display top inefficient users by RAM hoarding fraction
print("\nTop inefficient Jobs by RAM hoarding fraction:")
display(inefficient_jobs_hoarding_cpu_cores.head(10))

# Plot top inefficient jobs by RAM hoarding fraction, with RAM hoarding fraction as labels
jobs_with_metrics_visualizer = JobsWithMetricsVisualizer(inefficient_jobs_hoarding_cpu_cores.head(20))
jobs_with_metrics_visualizer.visualize(
    column="ram_hoarding_fraction_diff",
    bar_label_columns=["ram_hoarding_fraction_diff", "cpu_mem_efficiency", "alloc_vram_efficiency"],
    figsize=(12, 12),
)

#### <a id='toc1_1_1_2_'></a>[Find most inefficient jobs hoarding CPU cores based on `core_hoarding_fraction_diff`](#toc0_)

In [None]:
inefficient_jobs_hoarding_cpu_cores = hoarding_analysis.sort_and_filter_records_with_metrics(
    metrics_df_name_enum=ResourceHoardingDataFrameNameEnum.JOBS_WITH_RESOURCE_HOARDING_METRICS,
    sorting_key="core_hoarding_fraction_diff",
    ascending=False,  # Sort in descending order
    filter_criteria={"core_hoarding_fraction_diff": {"min": 0, "inclusive": True}},
)
# Display top inefficient users by CPU core hoarding fraction
print("\nTop inefficient Jobs by CPU core hoarding fraction:")
display(inefficient_jobs_hoarding_cpu_cores.head(10))

# Plot top inefficient jobs by CPU core hoarding fraction, with CPU core hoarding fraction as labels
jobs_with_metrics_visualizer = JobsWithMetricsVisualizer(inefficient_jobs_hoarding_cpu_cores.head(20))
jobs_with_metrics_visualizer.visualize(
    column="core_hoarding_fraction_diff",
    bar_label_columns=["core_hoarding_fraction_diff", "ram_hoarding_fraction_diff", "alloc_vram_efficiency"],
    figsize=(12, 12),
)

## <a id='toc1_2_'></a>[Analyze High CPU Memory and Core Usage Resulting in Hoarding of Resources Across Users:](#toc0_)


In [None]:
memory_hoarding_users = hoarding_analysis.calculate_node_resource_hoarding_for_users(filtered_jobs)
display(memory_hoarding_users)

#### <a id='toc1_2_1_1_'></a>[Find most inefficient users hoarding CPU cores based on `expected_value_core_hoarding_fraction_diff`](#toc0_)

In [None]:
inefficient_users_hoarding_cpu_cores = hoarding_analysis.sort_and_filter_records_with_metrics(
    metrics_df_name_enum=ResourceHoardingDataFrameNameEnum.USERS_WITH_RESOURCE_HOARDING_METRICS,
    sorting_key="expected_value_core_hoarding_fraction_diff",
    ascending=False,  # Sort in descending order
    filter_criteria={"expected_value_core_hoarding_fraction_diff": {"min": 0, "inclusive": True}},
)
# Display top inefficient users by CPU core hoarding fraction

print("\nTop inefficient Users by CPU core hoarding fraction:")
display(inefficient_users_hoarding_cpu_cores.head(10))

# Plot top inefficient users by CPU core hoarding fraction, with CPU core hoarding fraction as labels
users_with_metrics_visualizer = UsersWithMetricsVisualizer(inefficient_users_hoarding_cpu_cores.head(20))
users_with_metrics_visualizer.visualize(
    column="expected_value_core_hoarding_fraction_diff",
    bar_label_columns=["expected_value_core_hoarding_fraction_diff",
                        "expected_value_ram_hoarding_fraction_diff",
                        "expected_value_alloc_vram_efficiency"],
    figsize=(14, 12),
)