In [2]:

import pandas as pd

In [3]:
cached_data = None

def load_data():
    """Load the VERIS data and cache it for reuse."""
    
    global cached_data
    if cached_data is None:
        cached_data = load_veris_data()  # Cache the processed data for future calls


def load_veris_data():
    """Load the VERIS data from CSV files."""
    veris_df = pd.read_csv('../data/veris_attack_mapping.csv')
    veris_impact_df = pd.read_csv('../score/veris_impact.csv')

    # Clean the veris_df
    veris_df = veris_df.drop(veris_df[veris_df['mapping_type'] == 'non_mappable'].index)
    veris_df = veris_df.drop(columns=['attack_version', 'technology_domain', 'mapping_type',
                                       'references', 'comments', 'organization', 'creation_date',
                                       'last_update', 'mapping_framework_version', 'mapping_framework', 'Unnamed: 0'])
    
    # Clean the veris_impact_df
    veris_impact_df = veris_impact_df.drop(columns=['description', 'id'])

    # Create DataFrame with "action"
    veris_df_action = veris_df[veris_df['capability_group'].str.contains('action')]
    veris_df_action.loc[:, 'capability_group'] = veris_df_action['capability_group'].str.replace('action.', '', regex=False)
    veris_df_action.loc[:, 'capability_group'] = veris_df_action['capability_group'].str.replace('attribute.', '', regex=False)
    veris_df_action.loc[:, 'capability_id'] = veris_df_action['capability_id'].str.replace(r'action\.\w+\.(variety|vector)\.', '', regex=True)
    veris_df_action = pd.merge(veris_df_action, veris_impact_df, left_on='capability_id', right_on='attack_type', how='left')
    veris_df_action.drop(columns=['capability_id'], inplace=True)

    # Create DataFrame with "attribute"
    veris_df_attribute = veris_df[veris_df['capability_group'].str.contains('attribute')]
    veris_df_attribute.loc[:, 'capability_group'] = veris_df_attribute['capability_group'].str.replace('attribute.', '', regex=False)

    return veris_df_action, veris_df_attribute

load_data()

In [None]:

veris_df_action, veris_df_attribute = cached_data



# Group by attack_object_id and calculate the average severity
average_severity = veris_df_action.groupby('attack_object_id')['severity'].mean().reset_index()


# Rename columns for clarity
average_severity.columns = ['ttp', 'severity']


# Define severity categories
bins = [0, 4.0, 6.0, 7.5, 10.0]
labels = ['Low', 'Moderate', 'High', 'Critical']
severity_order = pd.CategoricalDtype(['Low', 'Moderate', 'High', 'Critical'], ordered=True)

# Create a new column for severity levels
average_severity['severity_level'] = pd.cut(average_severity['severity'], bins=bins, labels=labels, right=True)

# Count occurrences of each severity level
severity_counts = average_severity['severity_level'].value_counts().reset_index();
severity_counts.columns = ['severity_level', 'count']

# order based on Low Moderate High critical
severity_counts['severity_level'] = severity_counts['severity_level'].astype(severity_order)

severity_counts = severity_counts.sort_values('severity_level', ignore_index=True)

display(severity_counts)
display(veris_df_attribute.groupby('capability_group')['capability_id'].count().reset_index())

In [9]:
# Assuming cached_data is a tuple containing veris_df_action and veris_df_attribute
veris_df_action, veris_df_attribute = cached_data

display(veris_df_action)

# Group by attack_object_id and calculate average severity, renaming columns
average_severity = veris_df_action.groupby('attack_object_id')['severity'].mean().reset_index().rename(columns={'attack_object_id': 'ttp'})

# Define severity categories and create a new column for severity levels
bins = [0, 4.0, 6.0, 7.5, 10.0]
labels = ['Low', 'Moderate', 'High', 'Critical']
severity_order = pd.CategoricalDtype(labels, ordered=True)
average_severity['severity_level'] = pd.cut(average_severity['severity'], bins=bins, labels=labels, right=True).astype(severity_order)

# Count occurrences of each severity level and sort
severity_counts = average_severity['severity_level'].value_counts().reset_index().rename(columns={'index': 'severity_level'}).sort_values('severity_level')

# Display results
display(severity_counts)
display(veris_df_attribute.groupby('capability_group')['capability_id'].count().reset_index())

Unnamed: 0,capability_group,capability_description,attack_object_id,attack_object_name,attack_type,severity
0,hacking,Abuse of functionality.,T1047,Windows Management Instrumentation,Abuse of functionality,6.0
1,hacking,Remote shell,T1047,Windows Management Instrumentation,Command shell,7.0
2,malware,Directly installed or inserted by threat agent...,T1047,Windows Management Instrumentation,Direct install,6.0
3,hacking,Abuse of functionality.,T1053,Scheduled Task/Job,Abuse of functionality,6.0
4,hacking,Hacking action that creates a backdoor for use.,T1053,Scheduled Task/Job,Backdoor,9.0
...,...,...,...,...,...,...
874,social,Pretexting (dialogue leveraging invented scena...,T1598.001,Phishing for Information: Spearphishing Service,Pretexting,5.0
875,social,Pretexting (dialogue leveraging invented scena...,T1534,Internal Spearphishing,Pretexting,5.0
876,social,Pretexting (dialogue leveraging invented scena...,T1585,Establish Accounts,Pretexting,5.0
877,social,Pretexting (dialogue leveraging invented scena...,T1585.001,Establish Accounts: Social Media Accounts,Pretexting,5.0


Unnamed: 0,severity_level,count
3,Low,48
1,Moderate,132
0,High,179
2,Critical,110


Unnamed: 0,capability_group,capability_id
0,availability,40
1,confidentiality,69
2,integrity,81
