In [3]:
import pandas as pd
from mitreattack.stix20 import MitreAttackData
#dash mitreattack-python


mitre_attack_data = MitreAttackData("../data/enterprise-attack.json")


In [4]:
# how many groups is a country sponsoring
# this means that if the group is from this country it is more likely to be sponnsored
# country_sponsor_df = pd.read_csv('cyber_operations_incidents.csv')
country_sponsor_df = pd.read_csv('../data/cyber_operations_incidents.csv')
country_sponsor_df = country_sponsor_df.groupby('Sponsor')['Title'].count().reset_index();
# display(country_sponsor_df)

In [5]:
# nist data preprocessing and cleaning
# this data maps nist violations against mitre techniques
# nist mapping
nist_df = pd.read_csv('../data/nist_800_53_mapping.csv')
nist_df = nist_df.drop(nist_df[nist_df['mapping_type']=='non_mappable'].index)
nist_df = nist_df.drop(columns=['mapping_type','attack_version',
                                'technology_domain',
                                'references',
                                'comments',
                                'organization',
                                'creation_date',
                                'last_update',
                                'mapping_framework_version',
                                'mapping_framework', 'Unnamed: 0'])
# display(nist_df)

In [13]:
# cve data preprocessing and cleaning
# this dataset maps techniques in maps to according cves
# here the impact is import
# the string is like 
# -> 1. execution technique  (low impact as recon or initial steps)
# -> 2. primary impact       (higher impact as technique is already successful)
# -> 3. secondary impact     (higher impact as penetrates after primary impact)
# the
cve_df  = pd.read_csv('../data/cve_mapping.csv')
cve_df = cve_df.drop(columns=['attack_version',
                                'technology_domain',
                                'references',
                                'comments',
                                'organization',
                                'creation_date',
                                'last_update',
                                'mapping_framework_version',
                                'mapping_framework', 'Unnamed: 0'])
display(cve_df.tail())

# cve_df[cve_df['mapping_type'].str.contains('primary_impact')]


Unnamed: 0,capability_group,capability_id,capability_description,mapping_type,attack_object_id,attack_object_name
1701,2018 CVEs,CVE-2018-19833,,uncategorized,T1565.001,Stored Data Manipulation
1702,2019 CVEs,CVE-2019-13533,Omron PLC CJ and CS Series,uncategorized,T0855,Unauthorized Command Message
1703,2019 CVEs,CVE-2019-13533,Omron PLC CJ and CS Series,uncategorized,T0842,Network Sniffing
1704,2019 CVEs,CVE-2019-10980,LCDS LAquis SCADA,uncategorized,T0873,Project File Infection
1705,2019 CVEs,CVE-2019-10980,LCDS LAquis SCADA,uncategorized,T1203,Exploitation for Client Execution


In [7]:
# this dataset pertains to veris
# this will help us identify how the attacker will target organization
# it categorizes ttps into various as per below list
# hacking, malware, integrity, confidentiality, availability, social, valuechain(persistant)

veris_df  = pd.read_csv('../data/veris_attack_mapping.csv')

veris_df = veris_df.drop(veris_df[veris_df['mapping_type']=='non_mappable'].index)

veris_df = veris_df.drop(columns=['attack_version',
                                'technology_domain',
                                'mapping_type',
                                'references',
                                'comments',
                                'organization',
                                'creation_date',
                                'last_update',
                                'mapping_framework_version',
                                'mapping_framework', 'Unnamed: 0'])
# display(veris_df)

# Create one DataFrame with "action"
veris_df_action = veris_df[veris_df['capability_group'].str.contains('action')]

veris_df_action.loc[:, 'capability_group'] = veris_df_action['capability_group'].str.replace('action.', '', regex=False)
veris_df_action.loc[:, 'capability_id'] = veris_df_action['capability_id'].str.replace(r'action\.\w+\.(variety|vector)\.', '', regex=True)

# Create another DataFrame with "attribute"

veris_df_attribute = veris_df[veris_df['capability_group'].str.contains('attribute')]



veris_df_action.loc[:, 'capability_group'] = veris_df_action['capability_group'].str.replace('attribute.', '', regex=False)
pd.set_option('display.max_rows', None)

display(veris_df_action.head())
# display(veris_df_attribute)




# vdf = veris_df_action.groupby('capability_id')['attack_object_id'].apply(lambda x: ', '.join(x)).reset_index()

# display(vdf)
# display(veris_df_attribute)

Unnamed: 0,capability_group,capability_id,capability_description,attack_object_id,attack_object_name
0,hacking,Abuse of functionality,Abuse of functionality.,T1047,Windows Management Instrumentation
1,hacking,Command shell,Remote shell,T1047,Windows Management Instrumentation
2,malware,Direct install,Directly installed or inserted by threat agent...,T1047,Windows Management Instrumentation
3,hacking,Abuse of functionality,Abuse of functionality.,T1053,Scheduled Task/Job
4,hacking,Backdoor,Hacking action that creates a backdoor for use.,T1053,Scheduled Task/Job


In [11]:
# cve to cwe and severity matrix
# this maps the corresponding cves to cwes and in turn gives the vulnerability score
# severity is classified and the score is also there
cwe_df = pd.read_excel('../data/cve_to_cwe.xlsx')

cwe_df.rename(columns={"CVE-ID": "capability_id",
                       "CVSS-V3": "cvss_v3",
                       "CVSS-V2": "cvss_v2",
                       "SEVERITY": "severity",
                       "DESCRIPTION": "description",
                       "CWE-ID": "cwe_id",
                       "ID": "id",
                       }, inplace=True)
cwe_df['severity'] = cwe_df['severity'].str.lower()
cwe_df.tail()
# display(cwe_df)

Unnamed: 0,id,capability_id,cvss_v3,cvss_v2,severity,description,cwe_id
169561,169562,CVE-2021-45470,7.5,5.0,high,lib/DatabaseLayer.py in cve-search before 4.1....,NVD-CWE-Other
169562,169563,CVE-2021-45471,5.3,5.0,medium,"In MediaWiki through 1.37, blocked IP addresse...",NVD-CWE-noinfo
169563,169564,CVE-2021-45472,6.1,4.3,medium,"In MediaWiki through 1.37, XSS can occur in Wi...",CWE-79
169564,169565,CVE-2021-45473,6.1,4.3,medium,"In MediaWiki through 1.37, Wikibase item descr...",CWE-79
169565,169566,CVE-2021-45474,6.1,4.3,medium,"In MediaWiki through 1.37, the Special:ImportF...",CWE-79


In [9]:

# get the data of techniques used by all the groups
# below preprocessing will be done of data to extract techniques
technique_using_groups = mitre_attack_data.get_all_techniques_used_by_all_groups()


# just skip over the function no need to use a lot brain !!
# extracting techniques used by a group
groups_list = {}

for id, technique  in technique_using_groups.items():
    group_id = mitre_attack_data.get_attack_id(id)
    ttp_list = []

    # get ttp ids of techniques
    for t in technique:
        external_id = t['object'].external_references[0].external_id 
        ttp_list.append(external_id)

    groups_list[group_id] = ttp_list

# print(groups_list)
group_id = 'G0007'
ttps = groups_list[group_id]

{'G0119': ['T1003.001', 'T1587.001', 'T1136', 'T1036.005', 'T1007', 'T1070.001', 'T1562.001', 'T1074.001', 'T1059.001', 'T1078.002', 'T1059.003', 'T1484.001', 'T1047', 'T1486', 'T1018', 'T1059.007', 'T1585.002', 'T1105', 'T1489', 'T1204.002', 'T1584.004'], 'G1014': ['T1539', 'T1567.002', 'T1566.002', 'T1588.001', 'T1030', 'T1564.001', 'T1608.001', 'T1091', 'T1041', 'T1608.004', 'T1608.005', 'T1587.001', 'T1071.001', 'T1105', 'T1557.002', 'T1588.002', 'T1005', 'T1204.001', 'T1574.002', 'T1547.001', 'T1083', 'T1033', 'T1560', 'T1036.005', 'T1112', 'T1053.005', 'T1588.004', 'T1553.002'], 'G0102': ['T1136.001', 'T1588.003', 'T1210', 'T1560.001', 'T1059.003', 'T1047', 'T1588.002', 'T1543.003', 'T1021.002', 'T1074', 'T1078.002', 'T1055', 'T1021', 'T1021.001', 'T1550.002', 'T1222.001', 'T1570', 'T1204.002', 'T1053.005', 'T1027.010', 'T1070.004', 'T1552.006', 'T1048.003', 'T1518.001', 'T1218.011', 'T1558.003', 'T1059.001', 'T1567.002', 'T1112', 'T1490', 'T1133', 'T1547.004', 'T1036.004', 'T108

In [10]:

# get the cves related to ttps in one of the groups


    # comment this line if you want to get info for all the groups
    # 'G0045'
group_id = 'G0007'
ttps = groups_list[group_id]
ttps = ['T1136']

print(f"{group_id} group uses")


# get all nist violations by one technique(ttp)
# nistviolations = nist_df.loc[nist_df['attack_object_id'].isin(ttps), 'capability_id'].reset_index(drop=True)
nistviolations = nist_df.loc[nist_df['attack_object_id'].isin(ttps)].reset_index(drop=True)
# filter duplicates (ex. t1001 & 1002 both has access control violations AC02, but that is only one record)
nistviolations = nistviolations.drop_duplicates(subset=['capability_id'])

# filter if ttp matches
# cve_list = cve_df.loc[cve_df['attack_object_id'].isin(ttps), 'capability_id'].reset_index(drop=True)
cve_list = cve_df.loc[cve_df['attack_object_id'].isin(ttps)].reset_index(drop=True)


# remove duplicates
cve_list = cve_list.drop_duplicates(subset=['capability_id'])


#veris data
veris_df = veris_df.loc[veris_df['attack_object_id'].isin(ttps)].reset_index(drop=True)

# display(cve_list)
print(" ")
print("cves and their impact type")
print("----------------------------")
display(cve_list.groupby('mapping_type')['capability_id'].count().reset_index())

# get severity of a cve with the cwe list
print("cwes severity")
print("----------------------------")
cve_serverity = pd.merge(cve_list, cwe_df, left_on='capability_id', right_on='capability_id', how='inner')
display(cve_serverity.groupby('severity')['capability_id'].count().reset_index())


print("nist violations")
print("----------------------------")
display(nistviolations.groupby('capability_group')['capability_id'].count().reset_index())



print("veris (trying to find how the group works the main techniques)")
print("----------------------------")
display(veris_df_action.loc[veris_df_action['attack_object_id'].isin(ttps)])
# display(veris_df.groupby('capability_group')['capability_id'].count().reset_index())

G0007 group uses
 
cves and their impact type
----------------------------


Unnamed: 0,mapping_type,capability_id
0,exploitation_technique,1
1,secondary_impact,18
2,uncategorized,4


cwes severity
----------------------------


Unnamed: 0,severity,capability_id
0,critical,2
1,high,20
2,medium,1


nist violations
----------------------------


Unnamed: 0,capability_group,capability_id,capability_description,attack_object_id,attack_object_name
0,Access Control,AC-02,Account Management,T1136,Create Account
1,Access Control,AC-20,Use of External Systems,T1136,Create Account
2,Access Control,AC-03,Access Enforcement,T1136,Create Account
3,Access Control,AC-04,Information Flow Enforcement,T1136,Create Account
4,Access Control,AC-05,Separation of Duties,T1136,Create Account
5,Access Control,AC-06,Least Privilege,T1136,Create Account
6,Configuration Management,CM-05,Access Restrictions for Change,T1136,Create Account
7,Configuration Management,CM-06,Configuration Settings,T1136,Create Account
8,Configuration Management,CM-07,Least Functionality,T1136,Create Account
9,Identification and Authentication,IA-02,Identification and Authentication (organizatio...,T1136,Create Account


Unnamed: 0,capability_group,capability_id
0,Access Control,6
1,Configuration Management,3
2,Identification and Authentication,2
3,System and Communications Protection,2
4,System and Information Integrity,2


veris (trying to find how the group works the main techniques)
----------------------------


Unnamed: 0,capability_group,capability_id,capability_description,attack_object_id,attack_object_name
186,hacking,Backdoor,Hacking action that creates a backdoor for use.,T1136,Create Accounts
187,hacking,Backdoor,Hacking actions taken through a backdoor. C2 ...,T1136,Create Accounts
188,malware,Modify data,Malware which compromises a legitimate file ra...,T1136,Create Accounts


Deduction based cycle