# Plots

This notebook shows how to generate interesting plots using FABRIC reports database.


In [None]:
from fabric_reports_client.reports_api import ReportsApi
from datetime import datetime, timezone
import json
import pandas as pd
import matplotlib.pyplot as plt
from dateutil.relativedelta import relativedelta

api =  ReportsApi(base_url="https://reports.fabric-testbed.net/reports", token_file="/Users/kthare10/work/id_token_prod.json")

query_start=None

query_end = datetime.now(timezone.utc)
query_end = query_end.isoformat(timespec='milliseconds')

fabric_projects = ['2dd1ffb8-1aff-45cc-a70d-eb93b65cc26b', '4604cab7-41ff-4c1a-a935-0ca6f20cceeb', '6b76128d-c73f-431f-a245-0397586a7d40', '32e7160e-0318-43f5-a4e3-80209f880833', '75835e68-f91f-474d-8d54-27a576cc252f', '990d8a8b-7e50-4d13-a3be-0f133ffa8653', '04b14c17-e66a-4405-98fc-d737717e2160', '1630021f-0a0c-4792-a241-997f410d36e1', '7a5adb91-c4c0-4a1c-8021-7b6c56af196f', '06e8d02a-b27f-4437-829e-8378d20e5a08', '7f33ecf0-5dd7-4fd5-b1b7-061367f8bca6']

## Query all Slices

In [None]:
# Step 1: Fetch all slices
response = api.query_slices(fetch_all=True, per_page=1000, exclude_project_id=fabric_projects)
slices = response["data"]
print(f"Total slices: {response.get('total')}")

## Slices per Month

In [None]:
slices = response["data"]
print(f"Total slices: {len(slices)}")


# Step 3: Create DataFrame and process dates
df = pd.DataFrame(slices) 
print(f"Total slices: {len(df)}")

df['lease_start'] = pd.to_datetime(df['lease_start'], format="mixed", utc=True)
print(f"Total slices: {len(df)}")

print(df['lease_start'].isna().sum())  # How many missing dates


df['month'] = df['lease_start'].dt.to_period('M')  # '2025-04'

print(f"Total slices: {len(df)}")

monthly_counts = df.groupby('month').size().sort_index()
for month, count in monthly_counts.items():
    print(f"{month}: {count}")

# Step 6: Plot
monthly_counts.plot(kind='bar', figsize=(12, 6))
plt.title('Number of Slices Created Per Month')
plt.xlabel('Month')
plt.ylabel('Number of Slices')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()


## Active Slices per Month

In [None]:
# Create DataFrame
df = pd.DataFrame(slices)

# Robust datetime parsing using 'mixed' format
df['lease_start'] = pd.to_datetime(df['lease_start'], format="mixed", utc=True)
df['lease_end'] = pd.to_datetime(df['lease_end'], format="mixed", utc=True)

# Generate monthly range
start_month = df['lease_start'].min().to_period('M').to_timestamp()
end_month = df['lease_end'].max().to_period('M').to_timestamp()
month_range = pd.date_range(start=start_month, end=end_month, freq='MS')

# Count active slices per month
active_counts = []
for month_start in month_range:
    month_start = month_start.tz_localize("UTC")
    month_end = (month_start + relativedelta(months=1)) - pd.Timedelta(seconds=1)
    active = df[(df['lease_start'] <= month_end) & (df['lease_end'] >= month_start)]
    active_counts.append(len(active))

for month, count in zip(month_range, active_counts):
    print(f"{month.strftime('%Y-%m')}: {count}")

# Plot
plt.figure(figsize=(12, 6))
plt.bar(month_range.strftime('%Y-%m'), active_counts)
plt.xticks(rotation=45)
plt.title("Active Slices Per Month")
plt.xlabel("Month")
plt.ylabel("Active Slice Count")
plt.tight_layout()
plt.grid(axis='y')
plt.show()


## Active Slices with SmartNICs

In [None]:
# Step 1: Fetch all slices that used SmartNICs
response = api.query_slices(component_type=["SmartNIC"], fetch_all=True, per_page=1000, exclude_project_id=fabric_projects)
print(f"Total slices: {response.get('total')}")
slices = response["data"]
print(f"Total slices: {len(slices)}")


# Step 3: Create DataFrame and process dates
df = pd.DataFrame(slices) 
display(df)

# Step 2: Convert lease_start and lease_end to datetime (with UTC awareness)
# Robust datetime parsing using 'mixed' format
df['lease_start'] = pd.to_datetime(df['lease_start'], utc=True, format='mixed')
df['lease_end'] = pd.to_datetime(df['lease_end'], utc=True, format='mixed')

# Step 3: Define the month range for plotting
start_month = df['lease_start'].min().to_period('M').to_timestamp()
end_month = df['lease_end'].max().to_period('M').to_timestamp()
month_range = pd.date_range(start=start_month, end=end_month, freq='MS')

# Step 4: Count SmartNIC slices active in each month
smartnic_counts = []

for month_start in month_range:
    month_start = month_start.tz_localize("UTC")
    month_end = (month_start + relativedelta(months=1)) - pd.Timedelta(seconds=1)

    active_slices = df[
        (df['lease_start'] <= month_end) &
        (df['lease_end'] >= month_start)
    ]
    smartnic_counts.append(len(active_slices))

for month, count in zip(month_range, smartnic_counts):
    print(f"{month.strftime('%Y-%m')}: {count}")

# Step 5: Plot the results
plt.figure(figsize=(12, 6))
plt.bar(month_range.strftime('%Y-%m'), smartnic_counts)
plt.xticks(rotation=45)
plt.title("SmartNIC Slice Usage Per Month")
plt.xlabel("Month")
plt.ylabel("Number of Active SmartNIC Slices")
plt.tight_layout()
plt.grid(axis='y')
plt.show()


## Query current Active Slices

In [None]:
response = api.query_slices(fetch_all=True, per_page=1000, exclude_slice_state=["Closing", "Dead"], exclude_project_id=fabric_projects)
slices = response["data"]

print(f"Total slices: {len(slices)}")
display(pd.DataFrame(slices))


## Top 10 users for SmartNIC in a time range

In [None]:
import pandas as pd
from collections import defaultdict
from dateutil import parser

query_end = datetime.now(timezone.utc)
query_end = query_end.isoformat(timespec='milliseconds')

# Possible component types
# component_types = ["SmartNIC", "GPU", "FPGA", "Storage", "SharedNIC"]

component_type = "SmartNIC"

# NOTE: If no start time is provided, it will default to 30 days before the end time

# Load slivers with SmartNICs
slivers = api.query_slivers(end_time=query_end, component_type=component_type, fetch_all=True, per_page=1000, exclude_project_id=fabric_projects)["data"]

In [None]:
# {user_id: {"usage": float, "user_email": str, "project_id": str, "project_name": str}}
user_usage = {}

for s in slivers:
    user_id = s.get("user_id")
    user_email = s.get("user_email")
    project_id = s.get("project_id")
    project_name = s.get("project_name")
    lease_start = s.get("lease_start")
    lease_end = s.get("lease_end")
    smartnic_count = 0
    for c in s.get("components", {}).get("data", []):
        if c.get("type") == component_type.lower():
            smartnic_count += 1


    if not user_id or not lease_start or not lease_end:
        continue

    try:
        start = pd.to_datetime(lease_start, utc=True)
        end = pd.to_datetime(lease_end, utc=True)
        hours = max((end - start).total_seconds() / 3600.0, 0)
    except Exception:
        continue

    usage = smartnic_count * hours

    if user_id not in user_usage:
        user_usage[user_id] = {
            "smartnic_usage_hours": 0,
            "user_email": user_email,
            "project_id": project_id,
            "project_name": project_name
        }

    user_usage[user_id]["smartnic_usage_hours"] += usage

# Convert to DataFrame
usage_df = pd.DataFrame([
    {
        "user_id": uid,
        "user_email": info["user_email"],
        "project_id": info["project_id"],
        "project_name": info["project_name"],
        "smartnic_usage_hours": info["smartnic_usage_hours"]
    }
    for uid, info in user_usage.items()
])

# Sort and display top 10
top_users = usage_df.sort_values("smartnic_usage_hours", ascending=False).head(10)
display(top_users)


## Slices by User ID

In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# Step 1: Fetch all slices
def fetch_all_slices():
    response = api.query_slices(fetch_all=True, per_page=1000, exclude_project_id=fabric_projects)
    return response["data"]

# Step 2: Load into DataFrame
def load_slices_dataframe(slices_json):
    df = pd.DataFrame(slices_json)
    # Ensure timestamp columns are readable
    for col in ['lease_start', 'lease_end']:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], utc=True, format='mixed')
    return df

# Step 3: Interactive selection and filtering
def create_user_selector(df):
    user_emails = sorted(df['user_email'].dropna().unique())
    dropdown = widgets.Dropdown(options=user_emails, description="User Email:")
    
    def on_select_user(change):
        selected_user = change['new']
        user_slices = df[df['user_email'] == selected_user]
        display(user_slices[['user_email', 'user_id', 'slice_name', 'slice_id', 'lease_start', 'lease_end']].sort_values(by='lease_start'))

    dropdown.observe(on_select_user, names='value')
    display(dropdown)

# Run everything
slices = fetch_all_slices()
df_slices = load_slices_dataframe(slices)
create_user_selector(df_slices)


## Agggregate VMs

In [None]:
import pandas as pd
from datetime import datetime, timedelta

# Load the full slices JSON
with open("all_slices.json") as f:
    all_slices = pd.read_json(f)

# Flatten slivers across all slices
all_slivers = []
for slice_record in all_slices.itertuples():
    slice_id = getattr(slice_record, "slice_id")
    slivers = getattr(slice_record, "slivers", [])
    for sliver in slivers:
        sliver["slice_id"] = slice_id
        all_slivers.append(sliver)

# Convert to DataFrame
df_slivers = pd.DataFrame(all_slivers)

# Parse timestamps
df_slivers["lease_start"] = pd.to_datetime(df_slivers["lease_start"], errors="coerce")
df_slivers["lease_end"] = pd.to_datetime(df_slivers["lease_end"], errors="coerce")

# Filter only VM slivers
vm_slivers = df_slivers[df_slivers["sliver_type"].str.lower() == "vm"]

# Define the time range for aggregation
start_time = datetime(2025, 1, 1)
end_time = datetime(2025, 2, 1)
interval = timedelta(hours=1)

# Create a time series index
time_index = pd.date_range(start=start_time, end=end_time, freq=interval)
vm_usage = pd.Series(0, index=time_index)

# Count how many VMs are active during each interval
for _, row in vm_slivers.iterrows():
    lease_start = row["lease_start"]
    lease_end = row["lease_end"]
    if pd.isnull(lease_start) or pd.isnull(lease_end):
        continue
    active_range = (time_index >= lease_start) & (time_index <= lease_end)
    vm_usage[active_range] += 1

# Plot the VM usage over time
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 5))
vm_usage.plot()
plt.title("Number of VMs in Use Over Time")
plt.xlabel("Time")
plt.ylabel("Active VM Count")
plt.grid(True)
plt.tight_layout()
plt.show()
