In [1]:
import pandas as pd
from mitreattack.stix20 import MitreAttackData

mitre_attack_data = MitreAttackData("enterprise-attack.json")

In [2]:
# nist data preprocessing and cleaning
# this data maps nist violations against mitre techniques
# nist mapping
nist_df = pd.read_csv('nist_800_53_mapping.csv')
nist_df = nist_df.drop(nist_df[nist_df['mapping_type']=='non_mappable'].index)
nist_df = nist_df.drop(columns=['mapping_type','attack_version',
                                'technology_domain',
                                'references',
                                'comments',
                                'organization',
                                'creation_date',
                                'last_update',
                                'mapping_framework_version',
                                'mapping_framework', 'Unnamed: 0'])
# display(nist_df)

In [3]:
# cve data preprocessing and cleaning
# this dataset maps techniques in maps to according cves
# here the impact is import
# the string is like 
# -> 1. execution technique  (low impact as recon or initial steps)
# -> 2. primary impact       (higher impact as technique is already successful)
# -> 3. secondary impact     (higher impact as penetrates after primary impact)
# the
cve_df  = pd.read_csv('cve_mapping.csv')
cve_df = cve_df.drop(columns=['attack_version',
                                'technology_domain',
                                'references',
                                'comments',
                                'organization',
                                'creation_date',
                                'last_update',
                                'mapping_framework_version',
                                'mapping_framework', 'Unnamed: 0'])
# display(cve_df)

# cve_df[cve_df['mapping_type'].str.contains('primary_impact')]


In [4]:
# this dataset pertains to veris
# this will help us identify how the attacker will target organization
# it categorizes ttps into various as per below list
# hacking, malware, integrity, confidentiality, availability, social, valuechain(persistant)

veris_df  = pd.read_csv('veris_attack_mapping.csv')

veris_df = veris_df.drop(veris_df[veris_df['mapping_type']=='non_mappable'].index)

veris_df = veris_df.drop(columns=['attack_version',
                                'technology_domain',
                                'mapping_type',
                                'references',
                                'comments',
                                'organization',
                                'creation_date',
                                'last_update',
                                'mapping_framework_version',
                                'mapping_framework', 'Unnamed: 0'])
# display(veris_df)

In [5]:
# cve to cwe and severity matrix
# this maps the corresponding cves to cwes and in turn gives the vulnerability score
# severity is classified and the score is also there
cwe_df = pd.read_excel('cve_to_cwe.xlsx')

cwe_df.rename(columns={"CVE-ID": "capability_id",
                       "CVSS-V3": "cvss_v3",
                       "CVSS-V2": "cvss_v2",
                       "SEVERITY": "severity",
                       "DESCRIPTION": "description",
                       "CWE-ID": "cwe_id",
                       "ID": "id",
                       }, inplace=True)
cwe_df['severity'] = cwe_df['severity'].str.lower()
# display(cwe_df)

In [51]:

# get the data of techniques used by all the groups
# below preprocessing will be done of data to extract techniques
technique_using_groups = mitre_attack_data.get_all_techniques_used_by_all_groups()


# just skip over the function no need to use a lot brain !!
# extracting techniques used by a group
groups_list = []

for id, technique  in technique_using_groups.items():
    group_id = mitre_attack_data.get_attack_id(id)
    ttp_list = []

    # get ttp ids of techniques
    for t in technique:
        external_id = t['object'].external_references[0].external_id 
        ttp_list.append(external_id)

    groups_list.append([group_id,ttp_list])

In [53]:

# get the cves related to ttps in one of the groups


for group_id, ttps  in groups_list:
    # comment this line if you want to get info for all the groups
    # 'G0045'
    if group_id !=  'G0007': continue

    print(f"{group_id} group uses")

    # get all nist violations by one technique(ttp)
    # nistviolations = nist_df.loc[nist_df['attack_object_id'].isin(ttps), 'capability_id'].reset_index(drop=True)
    nistviolations = nist_df.loc[nist_df['attack_object_id'].isin(ttps)].reset_index(drop=True)
    # filter duplicates (ex. t1001 & 1002 both has access control violations AC02, but that is only one record)
    nistviolations = nistviolations.drop_duplicates(subset=['capability_id'])

    # filter if ttp matches
    # cve_list = cve_df.loc[cve_df['attack_object_id'].isin(ttps), 'capability_id'].reset_index(drop=True)
    cve_list = cve_df.loc[cve_df['attack_object_id'].isin(ttps)].reset_index(drop=True)
    # remove duplicates
    cve_list = cve_list.drop_duplicates(subset=['capability_id'])

    #veris data
    veris_df = veris_df.loc[veris_df['attack_object_id'].isin(ttps)].reset_index(drop=True)

    # display(cve_list)
    print(" ")
    print("cves and their impact type")
    print("----------------------------")
    display(cve_list.groupby('mapping_type')['capability_id'].count().reset_index())

    # get severity of a cve with the cwe list
    print("cwes severity")
    print("----------------------------")
    cve_serverity = pd.merge(cve_list, cwe_df, left_on='capability_id', right_on='capability_id', how='inner')
    display(cve_serverity.groupby('severity')['capability_id'].count().reset_index())


    print("nist violations")
    print("----------------------------")
    # display(nistviolations)
    display(nistviolations.groupby('capability_group')['capability_id'].count().reset_index())

    

    print("veris (trying to find how the group works the main techniques)")
    print("----------------------------")
    # display(nistviolations)
    display(veris_df.groupby('capability_group')['capability_id'].count().reset_index())
    break

G0085 group uses
 
cves and their impact type
----------------------------


Unnamed: 0,mapping_type,capability_id
0,exploitation_technique,98
1,primary_impact,7
2,secondary_impact,2
3,uncategorized,44


cwes severity
----------------------------


Unnamed: 0,severity,capability_id
0,critical,8
1,high,82
2,low,4
3,medium,58


nist violations
----------------------------


Unnamed: 0,capability_group,capability_id
0,Access Control,9
1,Configuration Management,6
2,Identification and Authentication,4
3,Risk Assessment,1
4,Security Assessment and Authorization,2
5,Supply Chain Risk Management,1
6,System and Communications Protection,12
7,System and Information Integrity,9
8,System and Services Acquisition,7


veris (trying to find how the group works the main techniques)
----------------------------


Unnamed: 0,capability_group,capability_id
0,action.malware,5
1,action.social,5
