## Paper Data

In [1]:
import pandas as pd
import settings
import re
import numpy as np

In [2]:
FRAMEWORKS = ['tensorflow','pytorch','opencv','keras','caffe']

DISTILLED_DIR = settings.DATA_CONFIG['distilled_dir']
MANUAL_DIR = settings.DATA_CONFIG['manual_dir']
MANUAL_VULN_FILE = MANUAL_DIR + 'vulnerability_manual.xlsx'
MANUAL_SHEET_NAME = 'manual'

In [3]:
def get_vuln_manual(filename, sheetname):
    return pd.read_excel(filename,sheet_name=sheetname)

In [4]:
df = get_vuln_manual(filename=MANUAL_VULN_FILE, sheetname=MANUAL_SHEET_NAME)

#### Table 1:Statistics of the studied deep learning frameworks in our studys

In [5]:
df['Framework'].value_counts()

tensorflow    2154
opencv         563
pytorch        217
caffe           70
keras           45
Name: Framework, dtype: int64

#### Table 3:Latent Vulnerabilities identified in Iteration 1

In [6]:
def get_vuln_pr(framework):
    filename = DISTILLED_DIR + f'vuln_{framework}_pr.csv'
    return pd.read_csv(filename)

def get_vuln_commit(framework):
    filename = DISTILLED_DIR + f'vuln_{framework}_commit.csv'
    return pd.read_csv(filename)

print(f"{'Framework':10} | {'   PR':5} | {'Commit':6}")
total_pr = 0
total_commit = 0
for framework in FRAMEWORKS:
    pr =  get_vuln_pr(framework)
    commit = get_vuln_commit(framework)
    print(f"{framework:10} | {len(pr):5} | {len(commit):6}")
    total_pr += len(pr)
    total_commit += len(commit)
print(f"{'Total':10} | {total_pr:5} | {total_commit:6}")

Framework  |    PR | Commit
tensorflow |  1950 |   3072
pytorch    |   286 |    930
opencv     |   735 |   2137
keras      |    97 |    373
caffe      |    96 |    241
Total      |  3164 |   6753


In [7]:
df = get_vuln_manual(filename=MANUAL_VULN_FILE, sheetname=MANUAL_SHEET_NAME)
df_ = df[df['Source']  == 'Official'].reset_index(drop=True)
df_ = df_[['Framework','CVE ID']]

In [8]:
total_CVE = df_['Framework'].value_counts().sum()
df_['Framework'].value_counts()
print(f'Total:{total_CVE}')

Total:443


In [9]:
total_record = total_CVE + total_pr + total_commit
total_record

10360

#### Table 4: The vulnerability types based on CWE version 4.9 from analytic data

**CWE Pillars**

CWE-664 Improper Control of a Resource Through its Lifetime    
CWE-682 Incorrect Calculation    
CWE-691 Insufficient Control Flow Management    
CWE-693 Protection Mechanism Failure    
CWE-703 Improper Check or Handling of Exceptional Conditions    
CWE-707 Improper Neutralization    
CWE-710 Improper Adherence to Coding Standards   

In [10]:
Pillars = {
    'Improper Control of a Resource Through its Lifetime': 'CWE-664',
    'Incorrect Calculation':'CWE-682',
    'Insufficient Control Flow Management': 'CWE-691', 
    'Protection Mechanism Failure':'CWE-693',
    'Improper Check or Handling of Exceptional Conditions':'CWE-703',
    'Improper Neutralization':'CWE-707',
    'Improper Adherence to Coding Standards':'CWE-710',
}

In [11]:
df_ = df[['CWE Pillar', 'CWE ID']].copy()
# replace CWE Pillar `Other` to nan
df_.loc[df_['CWE Pillar'] == 'Other', 'CWE Pillar'] = np.nan
# replace Noisy CWE ID
CWE_NOISE = ['NVD-CWE-noinfo', 'CWE-Other', 'NVD-CWE-Other']
for cwe in CWE_NOISE:
    df_.loc[df_['CWE ID'] == cwe, 'CWE ID'] = np.nan
# replace Pillar to CWE ID
for k, v in Pillars.items():
    df_.loc[df_['CWE Pillar'] == k, 'CWE Pillar'] = v
    
df_.dropna(inplace=True)

In [12]:
PillarIDs = list(df_['CWE Pillar'].unique())

In [13]:
total_cwe = len(PillarIDs)
for p in sorted(PillarIDs, key=lambda x: int(x[4:])):
    cwe = list(df_[df_['CWE Pillar'] == p]['CWE ID'].unique())
    if p in cwe:
        cwe.remove(p)
    total_cwe += len(cwe)
    print(f'Pi {p}: CWE {len(cwe)}: {sorted(cwe, key=lambda x: int(x[4:]))}')
print('Total:',total_cwe)

Pi CWE-664: CWE 33: ['CWE-22', 'CWE-94', 'CWE-118', 'CWE-119', 'CWE-120', 'CWE-125', 'CWE-134', 'CWE-197', 'CWE-400', 'CWE-401', 'CWE-404', 'CWE-413', 'CWE-415', 'CWE-416', 'CWE-459', 'CWE-471', 'CWE-502', 'CWE-662', 'CWE-665', 'CWE-667', 'CWE-668', 'CWE-681', 'CWE-704', 'CWE-770', 'CWE-787', 'CWE-789', 'CWE-820', 'CWE-824', 'CWE-825', 'CWE-843', 'CWE-883', 'CWE-908', 'CWE-913']
Pi CWE-682: CWE 6: ['CWE-131', 'CWE-190', 'CWE-191', 'CWE-193', 'CWE-369', 'CWE-1339']
Pi CWE-691: CWE 9: ['CWE-248', 'CWE-362', 'CWE-431', 'CWE-617', 'CWE-670', 'CWE-674', 'CWE-696', 'CWE-705', 'CWE-835']
Pi CWE-693: CWE 3: ['CWE-311', 'CWE-354', 'CWE-778']
Pi CWE-703: CWE 6: ['CWE-237', 'CWE-241', 'CWE-252', 'CWE-280', 'CWE-754', 'CWE-755']
Pi CWE-707: CWE 5: ['CWE-20', 'CWE-77', 'CWE-1284', 'CWE-1285', 'CWE-1287']
Pi CWE-710: CWE 4: ['CWE-476', 'CWE-476 ', 'CWE-561', 'CWE-1357']
Total: 73
