In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
pd.set_option('display.float_format','{:.2f}'.format)

### Load latest NIST framework from CSV export

In [2]:
NIST_FRAMEWORK = '2018-04-16_framework_v1.1_core.csv'
header_list = ['func_id', 'cat_id', 'subcat_id', 'ref']
framework = pd.read_csv(NIST_FRAMEWORK, encoding = "ISO-8859-1", names=header_list, skiprows=1) .fillna(method='ffill')

### Split columns,  extract data and reset index  

In [3]:
framework[['cat_id', 'cat_desc']] = framework["cat_id"].str.split(":", n = 1, expand = True)
framework[['subcat_id', 'subcat_desc']] = framework["subcat_id"].str.split(":", n = 1, expand = True)
framework[['cat_name', 'cat_id']] = framework['cat_id'].str.extract(r'^([a-zA-Z\s,]+)\s\((\w{2}.\w{2})\)$', expand=True)
framework[['func_name', 'func_id']] = framework['func_id'].str.extract(r'^([a-zA-Z\s]+)\s\((\w{2})\)$', expand=True)
framework['func_id'] = framework['func_id'].astype('category')
framework['cat_id'] = framework['cat_id'].astype('category')
framework['subcat_id'] = framework['subcat_id'].astype('category')
framework.set_index(['func_id', 'cat_id','subcat_id'], inplace=True)

### Clean up data, aggregate references and sanitize special characters

In [4]:
framework['ref'] = framework['ref'].str.replace('-','')
framework['ref'] = framework.groupby(['func_id', 'cat_id','subcat_id'], sort=False)['ref'].agg(lambda x: ';'.join(x))
framework.drop(columns='ref', inplace=True)
framework['cat_desc'] = framework['cat_desc'].str.replace('\x92','')
framework['cat_desc'] = framework['cat_desc'].str.replace('\n','').str.lstrip()
framework['subcat_desc'] = framework['subcat_desc'].str.replace('\x92','')
framework['subcat_desc'] = framework['subcat_desc'].str.replace('\n','').str.lstrip()
framework.drop_duplicates(inplace=True)

### Define Risk Management constants and extract dictionaries

In [5]:
implementation_tiers = ['None', 'Partial', 'Risk Informed', 'Repeatable', 'Adaptive']
risk_type = ['Compound', 'Technological', 'Environmental', 'Regulatory', 'Human', 'Insider', 'Reputational']
priority_level = ['Very Low', 'Low', 'Medium', 'High', 'Very High']
risk_level = ['Very Low', 'Low', 'Medium', 'High', 'Very High']
function_dict = framework.reset_index().set_index('func_id')['func_name'].str.capitalize().to_dict()
category_dict = framework.reset_index().set_index('cat_id')['cat_name'].to_dict()
description_dict = framework.reset_index().set_index('cat_id')['cat_desc'].to_dict()
framework.drop(columns=['func_name', 'cat_name', 'cat_desc'], inplace=True)

### Generate few random numbers to visualization what would look like
Observed Maturity (1,5), Business Priority (1,5), Severity (1,5), Likelihood (1,5), Threshold (1,5)

In [6]:
framework['maturity'] =  np.random.randint(1,6, framework.shape[0]).astype(np.float)
framework['priority'] =  np.random.randint(1,6, framework.shape[0]).astype(np.float)
framework['impact'] =  np.random.randint(1,6, framework.shape[0]).astype(np.float)
framework['likelihood'] =  np.random.randint(1,6, framework.shape[0]).astype(np.float)
framework['threshold'] = np.random.randint(1,6, framework.shape[0]).astype(np.float)
#framework[['maturity', 'priority', 'impact', 'likelihood', 'threshold']] = pd.read_csv(file)

### Calculate risks using a simplified risk formula
Inherent Risk (1,25), Mitigated Risk (1,25), Residual Risk (0, 25)

In [7]:
framework['inherent_risk'] = framework['impact'] * framework['likelihood']
framework['mitigated_risk'] = framework['maturity'] * framework['threshold']
framework['residual_risk'] = np.clip(framework['inherent_risk'] - framework['mitigated_risk'], 0, 25)

### Plot categorical data into few different visualizations based on risk and exposure

In [8]:
scatter = framework.reset_index()[['cat_id', 'impact', 'likelihood']].set_index('cat_id')
fig = px.density_heatmap(scatter, x="impact", y="likelihood", nbinsx=5, nbinsy=5, color_continuous_scale='Reds')
fig.update_layout(title_text='Density heatmap of Impact/Likelihood distribution', title_x=0.5)
fig.show()

In [9]:
theta = framework.index.levels[1]
risk_mean = framework.groupby(['cat_id'], as_index=False, sort=False)[['inherent_risk']].mean()
control_mean = framework.groupby(['cat_id'], as_index=False, sort=False)[['mitigated_risk']].mean()
fig = go.Figure()
fig.add_trace(go.Scatterpolar(r=risk_mean['inherent_risk'], theta=theta, 
                              fill='toself', name='Exposure', marker_color='red'))
fig.add_trace(go.Scatterpolar(r=control_mean['mitigated_risk'], theta=theta, 
                              fill='toself', name='Coverage', marker_color='blue'))
fig.update_layout(polar=dict(radialaxis=dict(visible=False)), showlegend=False,)
fig.update_layout(title_text='Radial aggregated exposure map of security controls versus inherent risks', title_x=0.5)
fig.show()

In [10]:
heatmap = framework.groupby(['cat_id'])[['inherent_risk', 'mitigated_risk','residual_risk']].agg(['mean', 'max'])
heatmap.columns = ['_'.join(tups) for tups in heatmap.columns]
heatmap = heatmap.reset_index()
heatmap['cat_id'] = heatmap['cat_id'].apply(lambda x: ' - '.join([x, category_dict[x]]))
heatmap = heatmap.set_index('cat_id')
view = ['inherent_risk_mean', 'residual_risk_mean', 'residual_risk_max']
fig = go.Figure(data=go.Heatmap(
                x = view,
                y = heatmap.index,
                z = heatmap[view],
                type = 'heatmap',
                xgap = .5,
                ygap = .5,
                colorscale = 'RdYlGn_r'))
fig.update_layout(title_text='Heatmap of aggregated inherent and residual risks', title_x=0.5)
fig.show()