In [None]:
import os
from dotenv import load_dotenv
import hashlib
from datetime import datetime, timedelta, timezone

import time # for some performance testing
from tqdm.autonotebook import tqdm

from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session

import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

In [None]:
# CloudIQ REST API endpoints
CIQ_TOKEN_URL = 'https://cloudiq.apis.dell.com/auth/oauth/v2/token'
CIQ_BASE_API_URL = 'https://cloudiq.apis.dell.com/cloudiq/rest/v1/'

In [None]:
# You don't want your API credentials to leak, do you?
load_dotenv()

client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

print('OAuth2 Client Credentials ID used:', client_id)

In [None]:
# Authentication to CloudIQ REST API
client = BackendApplicationClient(client_id=client_id)
oauth = OAuth2Session(client=client)
token = oauth.fetch_token(token_url=CIQ_TOKEN_URL, client_id=client_id, client_secret=client_secret)

print('OAuth2 Token type:', token['token_type'], '- Expires in:', token['expires_in'])

In [None]:
# Fetching all volumes / LUNs
params = { 
#     'filter': "system_type eq 'POWERSTORE'",
    'select': 'object_name,system,system_type,allocated_size,total_size,tags,system_tags'
}

r = oauth.get(CIQ_BASE_API_URL + 'volumes', params=params)
print('Status Code:', r.status_code)

In [None]:
# How many instances?
print('Number of instances:', r.json()['paging']['total_instances'])
# r.json()

In [None]:
# Creating DataFrame
df_object = pd.json_normalize(r.json(), record_path =['results'])

print('Dataframe size:', df_object.shape)
# df_object

In [None]:
# Transforming the data
df_object.dropna(inplace=True) #, subset=['allocated_size'])
print('Dataframe size:', df_object.shape)

# Protecting the innocent
def md5hash(s):
    s = str(s)
    return hashlib.md5(s.encode('utf-8')).hexdigest()

df_object['object_name_hash'] = df_object['object_name'].apply(md5hash)
# An obfuscated object name, in case real names should not be displayed
df_object['o_object_name'] = df_object['object_name_hash'].apply(lambda x: x[:4] + x[-4:])
df_object['used_pct'] = (df_object['allocated_size'] / df_object['total_size']) * 100
df_object['used_pct'] = df_object['used_pct'].round(2)
df_object['total_size_gb'] = df_object['total_size'].apply(lambda x: x/(1000**3))
df_object['total_size_gb'] = df_object['total_size_gb'].round(2)
df_object['allocated_size_gb'] = df_object['allocated_size'].apply(lambda x: x/(1000**3))
df_object['allocated_size_gb'] = df_object['allocated_size_gb'].round(2)

print('Dataframe size:', df_object.shape)
# df_object

In [None]:
# Aggregate per BU and per project - those are CloudIQ custom tags defined at the volume level
# Compute KPIs
by_bu = df_object.groupby('tags.business_unit')
by_project = df_object.groupby('tags.project')

df_aggregates_by_bu = by_bu.agg(
    total_size=pd.NamedAgg(column='total_size_gb', aggfunc='sum'),
    used_size=pd.NamedAgg(column='allocated_size_gb', aggfunc='sum'),
    count_object=pd.NamedAgg(column='object_name', aggfunc='count'),
).reset_index()

df_aggregates_by_project = by_project.agg(
    total_size=pd.NamedAgg(column='total_size_gb', aggfunc='sum'),
    used_size=pd.NamedAgg(column='allocated_size_gb', aggfunc='sum'),
    count_object=pd.NamedAgg(column='object_name', aggfunc='count'),
).reset_index()

print('BU Dataframe size:', df_aggregates_by_bu.shape)
print('Project Dataframe size:', df_aggregates_by_project.shape)
print(df_aggregates_by_bu)
print(df_aggregates_by_project)


In [None]:
# Fetching some metrics - functions
MAX_ID = 5 # Not all resource ids at the same time, but in chunks

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
        
# Transforming the metrics data into a DataFrame
def flatten(s):
    metrics = s['metrics']
    results = s['results']
    
    # Creating a record per object, per metric and per timestamp to then create a DataFrame
    data = [{'object': metric['id'], 'datetime': ts['timestamp']} | dict(zip(metrics, ts['values']))
            for metric in results
            for ts in metric['timestamps']]
    
    df = pd.DataFrame.from_records(data)
    if not df.empty:
        df['datetime'] = pd.to_datetime(df['datetime'])

    return df

# Crafting the POST query
def post_metric_json(from_dt, to_dt, ids, resource, metrics, interval):
    return {
        'from': from_dt.isoformat(),
        'to': to_dt.isoformat(),
        'resource_type': resource,
        'ids': ids,
        'interval': interval,
        'metrics': metrics
    }
    
def get_metrics_df(from_dt, to_dt, ids, resource, metrics, interval):
    data = pd.DataFrame()

    with tqdm(total=len(ids)) as pbar:
        # Split the request into sub-requests (resource ids)
        for ids_subset in chunks(ids, MAX_ID):
            from_dt_post = from_dt
            to_dt_post = to_dt
            
            # Send requests until we have all content - i.e. until no more HTTP 206 / Partial Content
            nb_request = 0
            while True:
                json_to_post = post_metric_json(from_dt_post, to_dt_post, ids_subset, resource, metrics, interval)
                nb_request += 1

                r = oauth.post(CIQ_BASE_API_URL + 'metrics/query', json=json_to_post)
                if r.status_code == 200:
                    new_data = flatten(r.json())
                    data = pd.concat([data, new_data])
                    break
                elif r.status_code == 206:
                    new_data = flatten(r.json())
                    data = pd.concat([data, new_data])
                    if not new_data.empty:
                        from_dt_post = max(new_data['datetime'])
                    else:
                        break

            # print('Number of API calls:', nb_request)
            print('Updated dataframe size:', data.shape)
            pbar.update(len(ids_subset))
        
    return data

In [None]:
# Fetching some metrics
TO = datetime.now(timezone.utc).replace(microsecond=0)
FROM = TO - timedelta(days=90)
print('From:', FROM)
print('To:', TO)

MAX_ID = 5

RESOURCE = 'volume'

METRICS = [
        'used_size',
        'free_size',
        'total_size',
        'iops',
        'bandwidth'
]

INTERVAL = 'PT1H'

OBJECTS = df_object['id'].tolist()

start = time.time()
df_all_metrics_data = get_metrics_df(FROM, TO, OBJECTS, RESOURCE, METRICS, INTERVAL)
end = time.time()

print('Dataframe size:', df_all_metrics_data.shape)
print('Time (ms):', (end-start)*1000)
# df_all_metrics_data

In [None]:
# Transforming in right scale
# df_all_metrics_data['total_size_gb'] = df_all_metrics_data['total_size'].apply(lambda x: x/(1000**3))
# df_all_metrics_data['total_size_gb'] = df_all_metrics_data['total_size_gb'].round(2)
# df_all_metrics_data['used_size_gb'] = df_all_metrics_data['used_size'].apply(lambda x: x/(1000**3))
# df_all_metrics_data['used_size_gb'] = df_all_metrics_data['used_size_gb'].round(2)

# Merge with metadata
df_all_metrics_data = pd.merge(df_all_metrics_data, df_object[['tags.business_unit', 'tags.project', 'id', 'object_name', 'o_object_name']], how='left', left_on='object', right_on='id')
df_all_metrics_data

In [None]:
# Compute KPIs
by_object = df_all_metrics_data.groupby('object')

def p95(x):
    return x.quantile(0.95)

df_aggregates_by_object = by_object.agg(
    min_iops=pd.NamedAgg(column='iops', aggfunc='min'),
    max_iops=pd.NamedAgg(column='iops', aggfunc='max'),
    avg_iops=pd.NamedAgg(column='iops', aggfunc='mean'),
    median_iops=pd.NamedAgg(column='iops', aggfunc='median'),
    count_iops=pd.NamedAgg(column='iops', aggfunc='count'),
    p95_iops=pd.NamedAgg(column='iops', aggfunc=p95),
).reset_index()

df_aggregates_by_object['p95_iops'] = df_aggregates_by_object['p95_iops'].round(2)
df_aggregates_by_object['avg_iops'] = df_aggregates_by_object['avg_iops'].round(2)
df_aggregates_by_object['median_iops'] = df_aggregates_by_object['median_iops'].round(2)

print('Dataframe size:', df_aggregates_by_object.shape)
# df_aggregates_by_object

In [None]:
# Merge attribute level and KPI level datasets
df_all_object = pd.merge(df_object, df_aggregates_by_object, how='left', left_on='id', right_on='object')

print('Dataframe size:', df_all_object.shape)
# df_all_object

In [None]:
# Show default treemap on "used_pct"
fig = px.treemap(df_all_object, path=[px.Constant("ALL"), 'tags.business_unit', 'tags.project', 'o_object_name'], 
                 values='total_size_gb',
                 color='used_pct',
                 custom_data=df_object[['tags.business_unit', 'tags.project', 'o_object_name']],
                 hover_data=['id'],
                 color_continuous_scale='Jet')

fig.update_traces(root_color='lightgrey', hovertemplate='Volume: %{label}<br>Used capacity: %{color} %<br>Used capacity: %{value} GB<extra></extra><br><br>Project: %{customdata[0]} / %{customdata[1]}')
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()

In [None]:
# Heatmap of the metric over time
fig = go.Figure(data=go.Heatmap(
        hovertemplate='Volume: %{y}<br>Date: %{x}<br>Value: %{z} IOPS<extra></extra><br><br>Project: %{customdata[0]} / %{customdata[1]}',
        z=df_all_metrics_data['iops'],
        x=df_all_metrics_data['datetime'],
        y=df_all_metrics_data['o_object_name'],
        customdata=df_all_metrics_data[['tags.business_unit', 'tags.project']],
        colorscale='RdBu_r')) # RdBu_r OR Jet

fig.update_layout(
    title='Value over time',
    xaxis_nticks=24,
    height=800)

fig.show()

In [None]:
# In order to play with the Figure's layout and data
# print(fig)

In [None]:
# Just for fun: buttons to switch metrics and chart type
cols = ['used_pct', 'max_iops', 'median_iops', 'p95_iops']

fig = go.Figure()
for col in cols:
    figpx = px.treemap(df_all_object.assign(Plot=col), 
                       path=[px.Constant("ALL"), 'tags.business_unit', 'tags.project', 'o_object_name'],
                       values='total_size_gb',
                       color=col,
                       hover_data=['Plot'],
                       color_continuous_scale='Jet').update_traces(visible=False)
    
    fig.add_traces(figpx.data)
    
fig.update_layout({
    'coloraxis': {
        'colorbar': {'title': {'text': 'Value'}},
        'colorscale': [[0.0, 'rgb(0,0,131)'], 
                       [0.2, 'rgb(0,60,170)'], 
                       [0.4, 'rgb(5,255,255)'], 
                       [0.6, 'rgb(255,255,0)'],
                       [0.8, 'rgb(250,0,0)'],
                       [1.0, 'rgb(128,0,0)']]
    },
    'legend': {'tracegroupgap': 0},
    'margin': {'b': 25, 'l': 25, 'r': 25, 't': 50}
})
    
fig.update_layout(
    updatemenus=[
        {
            "buttons": 
            [
                {
                    "label": k,
                    "method": "update",
                    "args": 
                    [
                        {"visible": [t.customdata[0][0]==k for t in fig.data]},
                    ],
                }
                for k in cols
            ],
            'x': 0.27,
            'y': 1.1
        },
        dict(
            buttons = list(
            [
                dict(
                    args=["type", 'treemap'],
                    label="Treemap",
                    method="restyle"
                ),
                dict(
                    args=["type", 'sunburst'],
                    label="Sunburst",
                    method="restyle"
                )
            ]),
            type = "buttons",
            direction="right",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.55,
            xanchor="left",
            y=1.125,
            yanchor="top"
        )
    ]
).update_traces(visible=True, selector=lambda t: t.customdata[0][0]==cols[0])

fig.update_layout(
    annotations=[
        dict(text="Color's value:", x=0, xref="paper", y=1.08, yref="paper",
                             align="left", showarrow=False),
        dict(text="Chart style:", x=0.5, xref="paper", y=1.08,
                             yref="paper", showarrow=False),
    ])

fig

In [None]:
# Chart over time for time series / metric
fig = px.line(df_all_metrics_data, color='o_object_name', x='datetime', y=['iops'], 
        title='IOPS over time')
fig.update_xaxes(rangeslider_visible=True)