In [24]:
import codecs
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import random
from IPython.display import clear_output
from ast import literal_eval

import settings
from utils import vplot, vprint

In [25]:
FRAMEWORKS = ['tensorflow','opencv','pytorch','keras', 'caffe']

FILTERED_DIR = settings.DATA_CONFIG['distilled_dir']
MANUAL_DIR = settings.DATA_CONFIG['manual_dir']

VULN_FILE = MANUAL_DIR + 'vulnerability.xlsx'
VULN_SHEET_NAME = 'manual'

MANUAL_COLUMNS = ['CVE ID', 'framework',  'Description', 'Taxonomy','Root Cause','Fix Pattern', 'Symptom', 
                  'CWE ID', 'CWE Name', 'commit hash', 'commit description', 'CVSS2 Access Complexity',
       'CVSS2 Authentication Required', 'CVSS2 Availability Impact',
       'CVSS2 Confidentiality Impact', 'CVSS2 Score', 'CVSS2 Integrity Impact',
       'CVSS3attackVector', 'CVSS3attackComplexity', 'CVSS3privilegesRequired',
       'CVSS3userInteraction', 'CVSS3scope', 'CVSS3confidentialityImpact',
       'CVSS3integrityImpact', 'CVSS3availabilityImpact', 'CVSS3baseScore',
       'CVSS3baseSeverity']

OUTPUT_FILE = MANUAL_DIR + 'vulnerability_official.xlsx'

In [40]:
def read_csv():
    filename = OUTPUT_FILE
    df = pd.read_excel(filename,sheet_name='manual').astype('string')
    return df

In [41]:
df = read_csv()

In [28]:
def get_vuln():
    df = pd.read_excel(VULN_FILE,sheet_name=VULN_SHEET_NAME)
    return df

In [29]:
vuln_df = get_vuln()

In [30]:
def find_row(cve, df):    
    result = df[df['CVE ID'] == cve]
    if len(result) > 0:
        return result.iloc[0]
    return None

In [42]:
find_row(cve='CVE-2019-15939',df=vuln_df)

Framework                                                          opencv
Description             An issue was discovered in OpenCV 4.1.0. There...
CVE ID                                                     CVE-2019-15939
Root Cause                                          Commits Not Traceable
Fixing Pattern                                                        NaN
Symptom                                                               NaN
Taxonomy                                                   Divide by Zero
CWE ID                                                            CWE-369
CWE Name                                                   Divide by Zero
CWE Pillar                                          Incorrect Calculation
Line Addition                                                         NaN
Line Deletion                                                         NaN
Line Changed                                                          NaN
File Changed                          

In [43]:
total = len(df)
for idx, row in df.iterrows():
    row = find_row(cve=row['CVE ID'],df=vuln_df)
    if row is not None:
        df.at[idx, 'Taxonomy'] = row['Taxonomy']
        df.at[idx, 'Root Cause'] = row['Root Cause']
        df.at[idx, 'Fixing Pattern'] = row['Fixing Pattern']
        df.at[idx, 'Symptom'] = row['Symptom']
        df.at[idx, 'CWE Name'] = row['CWE Name']
        clear_output(wait=True)
        print(f"({idx+1}/{total})")  

(465/465)


In [45]:
df.to_excel(OUTPUT_FILE,sheet_name='manual', index=False)  