In [3]:
import os, json
import pandas as pd
import requests

In [4]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)

In [5]:
from datetime import datetime
runDate= datetime.today().strftime('%Y%m%d')
runDate

'20220809'

In [6]:
query_nvd = False

In [7]:
input_dir = os.getenv('ANCHORE_SCAN_REPORTS_DIR', default = '../../.bob/va-reports/anchore')
output_dir = os.getenv('ANCHORE_ANALYSIS_REPORTS_DIR', default ='../../.bob/va-reports/analysis-reports/anchore/')

print(input_dir)
print(output_dir)

../../.bob/va-reports/anchore
../../.bob/va-reports/analysis-reports/anchore/


#### Report to Name Mapping

In [8]:
REPORT_NAMES={  'CVE_COUNT_BY_SEVERITY' : 'cve_count_by_severity',
                'CVE_COUNT_BY_SEVERITY_AND_AGE' : 'cve_count_by_severity_and_age',
                'IMAGE_COUNT_BY_CVE': 'image_count_by_cve',
                'DETAILED_REPORT': 'detailed_report',
                'CONSOLIDATED_EXCEL_REPORT' : f"mxe_va_analysis_report_{runDate}"
}

In [9]:
print(os.environ)

environ({'USER': 'enxxram', 'SSH_CLIENT': '100.121.61.109 49692 22', 'XDG_SESSION_TYPE': 'tty', 'SHLVL': '0', 'MOTD_SHOWN': 'pam', 'HOME': '/home/enxxram', 'DBUS_SESSION_BUS_ADDRESS': 'unix:path=/run/user/1011/bus', 'LOGNAME': 'enxxram', '_': '/mnt/disk0/enxxram/home/enxxram/.vscode-server/bin/da76f93349a72022ca4670c1b84860304616aaa2/node', 'XDG_SESSION_CLASS': 'user', 'XDG_SESSION_ID': '5293', 'PATH': '/bin:/mnt/disk0/enxxram/home/enxxram/.vscode-server/bin/da76f93349a72022ca4670c1b84860304616aaa2/bin/remote-cli:/home/enxxram/miniconda3/bin:/home/enxxram/miniconda3/condabin:/home/enxxram/.asdf/shims:/home/enxxram/.asdf/bin:/usr/lib/jvm/java-11-openjdk-amd64/bin:/home/enxxram/.krew/bin:/mnt/disk0/enxxram/home/enxxram/.linuxbrew/bin:/mnt/disk0/enxxram/home/enxxram/.linuxbrew/sbin:/home/enxxram/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/usr/local/go/bin:/home/enxxram/.fzf/bin', 'VSCODE_AGENT_FOLDER': '/home/enxxram/.vsco

In [10]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)

In [11]:
json_files = [os.path.join(dp, f) 
            for dp, dn, filenames in os.walk(input_dir) 
                for f in filenames if f.endswith('-vuln.json')]
print(json_files)
no_of_files=len(json_files)

if no_of_files==0:
    raise Exception('Anchore scan reports not found.. Cannot proceed')

print(no_of_files)

['../../.bob/va-reports/anchore/internal-reports/keycloak-init_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/eric-mxe-gatekeeper_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/cli_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/model-packager_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/model-training-packager_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/gui_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/kubernetes-modifier_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/jupyterhub_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/author-service_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/pypi-eea_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anchore/internal-reports/model-training-service_2.4.0-dev-160-vuln.json', '../../.bob/va-reports/anc

## cvss fields which we are interested in from nvd_data section of each vuln record

In [12]:
nvd_cvss_cols=['base_score', 'impact_score', 'exploitability_score']
cvss_versions=['cvss_v2', 'cvss_v3']
cvss_dict={version:nvd_cvss_cols for version in cvss_versions}
cvss_dict

{'cvss_v2': ['base_score', 'impact_score', 'exploitability_score'],
 'cvss_v3': ['base_score', 'impact_score', 'exploitability_score']}

## Flatten json and load into pandas

Some of the vuln Ids starting with GHSA are from github security advisor

the report has a nvd_data section which gives the corresponding NVD CVE_ID using which we can lookup in NVD Database for information

if nvd section is unavaiable for a record, set nvd_data_id as NA and default all scores to -99.0 (probably this is a newly added CVE/reserved CVE - so all details aren't available from NVD)

In [13]:
def modify(input_list):
    for x in input_list:
        if len(x['nvd_data']) >=1:
            nvd_data= x['nvd_data'][0]
            for cvss_version,metrices in cvss_dict.items():
                for metric in metrices: 
                        x[f"{cvss_version}_{metric}"]=nvd_data[cvss_version][metric]
            x['nvd_data_id']=nvd_data["id"]
        else: 
            for cvss_version,metrices in cvss_dict.items():
                for metric in metrices: 
                    x[f"{cvss_version}_{metric}"]=-99.0
                x['nvd_data_id']='NA' 
        del x['nvd_data']
    return input_list

def load_json(json_file):
    with open(json_file) as jsonReport:
        json_data= json.load(jsonReport)
    return json_data

def load_json_normalized(json_file):
    json_data=load_json(json_file)
    json_data['vulnerabilities']= modify(json_data['vulnerabilities'])
    df=pd.json_normalize(json_data, record_path=['vulnerabilities'])
    df.insert(loc=0, column='image_name', value=os.path.basename(json_file))
    return df 

dfs=[load_json_normalized(json_file) for json_file in json_files]
df=pd.concat(dfs)
df.head()


Unnamed: 0,image_name,feed,feed_group,fix,package,package_cpe,package_cpe23,package_name,package_path,package_type,package_version,severity,url,vendor_data,vuln,cvss_v2_base_score,cvss_v2_impact_score,cvss_v2_exploitability_score,cvss_v3_base_score,cvss_v3_impact_score,cvss_v3_exploitability_score,nvd_data_id
0,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,nvd:cpe,,spring-core-5.3.19,cpe:2.3:a:springsource-spring-framework:spring...,cpe:2.3:a:springsource-spring-framework:spring...,spring-core,/pkg.jar:BOOT-INF/lib/spring-core-5.3.19.jar,java-archive,5.3.19,Critical,https://nvd.nist.gov/vuln/detail/CVE-2016-1000027,[],CVE-2016-1000027,7.5,6.4,10.0,9.8,5.9,3.9,CVE-2016-1000027
1,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,sles:distro:sles:15.3,0:0.177-150300.11.3.1,libdw1-0.168-4.5.3,cpe:2.3:a:susellc<https://www.suse.com/>:libdw...,cpe:2.3:a:susellc<https://www.suse.com/>:libdw...,libdw1,/usr/lib/sysimage/rpm/Packages,rpm,0.168-4.5.3,Medium,https://www.suse.com/security/cve/CVE-2017-7607,[],CVE-2017-7607,4.3,2.9,8.6,5.5,3.6,1.8,CVE-2017-7607
2,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,sles:distro:sles:15.3,0:0.177-150300.11.3.1,libebl-plugins-0.168-4.5.3,cpe:2.3:a:susellc<https://www.suse.com/>:libeb...,cpe:2.3:a:susellc<https://www.suse.com/>:libeb...,libebl-plugins,/usr/lib/sysimage/rpm/Packages,rpm,0.168-4.5.3,Medium,https://www.suse.com/security/cve/CVE-2017-7607,[],CVE-2017-7607,4.3,2.9,8.6,5.5,3.6,1.8,CVE-2017-7607
3,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,sles:distro:sles:15.3,0:0.177-150300.11.3.1,libelf1-0.168-4.5.3,cpe:2.3:a:susellc<https://www.suse.com/>:libel...,cpe:2.3:a:susellc<https://www.suse.com/>:libel...,libelf1,/usr/lib/sysimage/rpm/Packages,rpm,0.168-4.5.3,Medium,https://www.suse.com/security/cve/CVE-2017-7607,[],CVE-2017-7607,4.3,2.9,8.6,5.5,3.6,1.8,CVE-2017-7607
4,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,sles:distro:sles:15.3,0:0.177-150300.11.3.1,libdw1-0.168-4.5.3,cpe:2.3:a:susellc<https://www.suse.com/>:libdw...,cpe:2.3:a:susellc<https://www.suse.com/>:libdw...,libdw1,/usr/lib/sysimage/rpm/Packages,rpm,0.168-4.5.3,Medium,https://www.suse.com/security/cve/CVE-2017-7608,[],CVE-2017-7608,4.3,2.9,8.6,5.5,3.6,1.8,CVE-2017-7608


## Make list of CRITICAL and HIGH vulnerabilities (excluding those which are not there in NVD Database)

In [14]:
critical_high_list =  ['Critical', 'High']
vuln_ids=df[(df['nvd_data_id']!='NA') & (df.severity.isin(critical_high_list))]['nvd_data_id'].drop_duplicates()
vuln_ids=vuln_ids.to_list()
print(len(vuln_ids))
vuln_ids

92


['CVE-2016-1000027',
 'CVE-2022-1587',
 'CVE-2022-33068',
 'CVE-2015-5237',
 'CVE-2021-22570',
 'CVE-2022-21698',
 'CVE-2018-1000538',
 'CVE-2020-11012',
 'CVE-2021-21287',
 'CVE-2021-43858',
 'CVE-2021-30465',
 'CVE-2021-42248',
 'CVE-2021-42836',
 'CVE-2017-18589',
 'CVE-2021-27478',
 'CVE-2021-27482',
 'CVE-2021-27498',
 'CVE-2021-27500',
 'CVE-2014-1936',
 'CVE-2015-0903',
 'CVE-2016-1905',
 'CVE-2016-1906',
 'CVE-2016-7075',
 'CVE-2022-31129',
 'CVE-2021-3807',
 'CVE-2022-29217',
 'CVE-2015-4035',
 'CVE-2015-7430',
 'CVE-2017-3162',
 'CVE-2018-11804',
 'CVE-2018-17190',
 'CVE-2020-10663',
 'CVE-2020-13949',
 'CVE-2021-33036',
 'CVE-2021-34538',
 'CVE-2021-37404',
 'CVE-2021-43045',
 'CVE-2022-2048',
 'CVE-2022-25647',
 'CVE-2022-26612',
 'CVE-2022-29244',
 'CVE-2022-33891',
 'CVE-2022-29241',
 'CVE-2021-22569',
 'CVE-2022-31054',
 'CVE-2022-25856',
 'CVE-2020-9492',
 'CVE-2021-38296',
 'CVE-2022-1271',
 'CVE-2022-1304',
 'CVE-2022-1586',
 'CVE-2022-2097',
 'CVE-2022-21476',
 'CVE-

## Get CVE info from NVD DB by CVE_ID

In [16]:
import requests
def get_cve_info_from_nvd(req_id, cve_id, httpSession):
    url=f"https://services.nvd.nist.gov/rest/json/cve/1.0/{cve_id}"
    print(f"\nRequest ID={req_id} URL={url}", flush=True)
    try:
        get = httpSession.get(url)
        if get.status_code == 200:
            print(get.status_code, flush=True)
            return get.json()
        else:
            return {}
    except requests.exceptions.RequestException as e:
        print(f"{url}: is Not reachable \nErr: {e}")

## Parse required fields and make json

Parsing is mostly straightforward except for cases where the CVE is really old so does not have a CVSS_V3 score. for these records, default the cvss_v3 score to -99.0

See a sample cve json record [here](https://services.nvd.nist.gov/rest/json/cve/1.0/CVE-2020-10029)

In [17]:
def lookup_cve(req_id, cve_id, httpSession):
    cve_json=get_cve_info_from_nvd(req_id, cve_id, httpSession)
    if cve_json:
        cve=cve_json["result"]["CVE_Items"][0]
        for desc in cve['cve']['description']['description_data']:
            if desc['lang'] == 'en':
                cve_description = desc['value']
                break
        
        cvss_v3_base_score = -99.0
        cvss_v3_exploitability_score = -99.0
        cvss_v3_impact_score = -99.0
        cvss_v2_base_score = -99.0
        cvss_v2_exploitability_score = -99.0
        cvss_v2_impact_score = -99.0

        if 'baseMetricV2' in cve['impact']:
            cvss_v2_base_score = cve['impact']['baseMetricV2']['cvssV2']['baseScore']
            cvss_v2_exploitability_score= cve['impact']['baseMetricV2']['exploitabilityScore']
            cvss_v2_impact_score=cve['impact']['baseMetricV2']['impactScore']

        if 'baseMetricV3' in cve['impact']:
            cvss_v3_base_score = cve['impact']['baseMetricV3']['cvssV3']['baseScore']
            cvss_v3_exploitability_score= cve['impact']['baseMetricV3']['exploitabilityScore']
            cvss_v3_impact_score=cve['impact']['baseMetricV3']['impactScore']
        
        cve_published_date=cve['publishedDate']
        cve_last_modified_date=cve['lastModifiedDate']

        return {
            'cve_id' : cve_id,
            'cve_desc' : cve_description,
            'cvss_v2_base_score': cvss_v2_base_score,
            'cvss_v2_exploitability_score': cvss_v2_exploitability_score,
            'cvss_v2_impact_score' : cvss_v2_impact_score,
            'cvss_v3_base_score': cvss_v3_base_score,
            'cvss_v3_exploitability_score': cvss_v3_exploitability_score,
            'cvss_v3_impact_score' : cvss_v3_impact_score,
            'cve_published_date' : cve_published_date, 
            'cve_last_modified_date' : cve_last_modified_date
        }
    return {}




## HTTP Adapter to facilitate global timeout (no need to set timeout for each request)

In [18]:
from requests.adapters import HTTPAdapter

DEFAULT_TIMEOUT = 5 # seconds

class TimeoutHTTPAdapter(HTTPAdapter):
    def __init__(self, *args, **kwargs):
        self.timeout = DEFAULT_TIMEOUT
        if "timeout" in kwargs:
            self.timeout = kwargs["timeout"]
            del kwargs["timeout"]
        super().__init__(*args, **kwargs)

    def send(self, request, **kwargs):
        timeout = kwargs.get("timeout")
        if timeout is None:
            kwargs["timeout"] = self.timeout
        return super().send(request, **kwargs)   

## Create Session with Retry and Timeout params set

In [19]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry


retry_strategy = Retry(
    total=3,
    backoff_factor=0.5,
    status_forcelist=[429, 500, 502, 503, 504],
    allowed_methods=["HEAD", "GET", "OPTIONS"]
)

# Mount it for both http and https usage
adapter = TimeoutHTTPAdapter(timeout=10,max_retries=retry_strategy)

http_session = requests.Session()

http_session.mount("https://", adapter)
http_session.mount("http://", adapter)


## Call cve lookup func once for each Critical/High CVE ID, make json list and load to pandas

In [15]:
""" Sequential invocation. takes too long
nvd_vuln_json_data=[] 

for i, vuln_id in enumerate(vuln_ids):
    print(f"No:{i} - {vuln_id}")
    formatted_json = lookup_cve(i, vuln_id, http_session)
    if formatted_json:
        nvd_vuln_json_data.append(formatted_json)

nvd_vuln_data_df = pd.DataFrame(nvd_vuln_json_data)
print(nvd_vuln_data_df.head()) """

' Sequential invocation. takes too long\nnvd_vuln_json_data=[] \n\nfor i, vuln_id in enumerate(vuln_ids):\n    print(f"No:{i} - {vuln_id}")\n    formatted_json = lookup_cve(i, vuln_id, http_session)\n    if formatted_json:\n        nvd_vuln_json_data.append(formatted_json)\n\nnvd_vuln_data_df = pd.DataFrame(nvd_vuln_json_data)\nprint(nvd_vuln_data_df.head()) '

In [20]:
import multiprocessing

try:
    cpus = multiprocessing.cpu_count()
except NotImplementedError:
    cpus = 4   # arbitrary default

nvd_vuln_json_data=[]
def append_output(formatted_json):
    nvd_vuln_json_data.append(formatted_json)

def get_cve_data(index, vuln_id):
    json_data={}
    if query_nvd:
        print(f"\nSubmitting Request Id:{index} - {vuln_id}")
        json_data=lookup_cve(index, vuln_id, http_session)
        print(f"\nData fetch for {index} - {vuln_id} is complete")
    return json_data

pool = multiprocessing.Pool(processes=cpus)
for i, vuln_id in enumerate(vuln_ids):
    pool.apply_async(get_cve_data, args = (i, vuln_id), callback = append_output)
pool.close()
pool.join()


nvd_vuln_data_df = pd.DataFrame(nvd_vuln_json_data)
print(nvd_vuln_data_df.head())

Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4]


In [21]:
nvd_vuln_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92 entries, 0 to 91
Empty DataFrame

## Merge CVE info from NVD with anchore scan result

CASE 1: if record exists in both anchore scan and nvd df, then use the scores from nvd result. 

Reasoning:
Since nvd db is updated every 2hrs, it is possible that the numbers in scan result and those retrieved from nvd have differences especially if both run at different times

I saw occurences where anchore scan had -1.0 as the cvss score whereas there were real numbers in NVD database. Probably because the scans were run 2 weeks ago whereas the nvd database queries give the realtime results


CASE 2: If a record exists only in achore scan but not in nvd, use the result from anchore scan df

Reasoning:
Quickly checked few of these, and found that these do not exist in NVD Database, these vulnerabilities are recent and from 2021 June/July have been reserved.
Some of these also exist only in GHSA

In [22]:
def fn(rec,metric):
    if rec['_merge'] == 'both':
        return rec[f'{metric}_y']
    else:
        return rec[f'{metric}_x']
    
if query_nvd:
    merged_df= df.merge(nvd_vuln_data_df,left_on='nvd_data_id', right_on='cve_id', how='left', indicator=True)
    merged_df.head(1)
    merged_df['cvss_v2_base_score']=merged_df.apply(fn, metric='cvss_v2_base_score', axis=1)
    merged_df['cvss_v2_exploitability_score']=merged_df.apply(fn, metric='cvss_v2_exploitability_score', axis=1)
    merged_df['cvss_v3_impact_score']=merged_df.apply(fn, metric='cvss_v3_impact_score', axis=1)
    merged_df['cvss_v3_base_score']=merged_df.apply(fn, metric='cvss_v3_base_score', axis=1)
    merged_df['cvss_v3_exploitability_score']=merged_df.apply(fn, metric='cvss_v3_exploitability_score', axis=1)
    merged_df['cvss_v3_impact_score']=merged_df.apply(fn, metric='cvss_v3_impact_score', axis=1)

    merged_df.drop(['cvss_v2_base_score_x', 'cvss_v2_exploitability_score_x', 'cvss_v2_impact_score_x', 
                    'cvss_v2_base_score_y', 'cvss_v2_exploitability_score_y', 'cvss_v2_impact_score_y',
                    'cvss_v3_base_score_x', 'cvss_v3_exploitability_score_x', 'cvss_v3_impact_score_x', 
                    'cvss_v3_base_score_y', 'cvss_v3_exploitability_score_y', 'cvss_v3_impact_score_y'], inplace=True, axis=1)
    #x['cvss_v2_base_score_y'] if x['_merge']=='both' else x['cvss_v2_base_score_x'])
else:
    merged_df=df


In [23]:
merged_df.head(1)

Unnamed: 0,image_name,feed,feed_group,fix,package,package_cpe,package_cpe23,package_name,package_path,package_type,package_version,severity,url,vendor_data,vuln,cvss_v2_base_score,cvss_v2_impact_score,cvss_v2_exploitability_score,cvss_v3_base_score,cvss_v3_impact_score,cvss_v3_exploitability_score,nvd_data_id
0,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,nvd:cpe,,spring-core-5.3.19,cpe:2.3:a:springsource-spring-framework:spring...,cpe:2.3:a:springsource-spring-framework:spring...,spring-core,/pkg.jar:BOOT-INF/lib/spring-core-5.3.19.jar,java-archive,5.3.19,Critical,https://nvd.nist.gov/vuln/detail/CVE-2016-1000027,[],CVE-2016-1000027,7.5,6.4,10.0,9.8,5.9,3.9,CVE-2016-1000027


## Counts by Severity ordered from Critical to Unknown

In [24]:
from collections import defaultdict
SEVERITY_DICT = defaultdict(lambda: 7)
SEVERITY_DICT.update({"Critical":1, "High":2, "Medium":3, "Low":4, "Negligible":5,"Unknown":6})

In [25]:
unique_cves = merged_df[['severity', 'vuln']].drop_duplicates()
severity_counts_df = pd.DataFrame(unique_cves['severity'].value_counts())
severity_counts_df=severity_counts_df.reset_index()
severity_counts_df.columns = ['severity', 'cve_count']
severity_counts_df['rank'] = severity_counts_df['severity'].map(SEVERITY_DICT)
severity_counts_df.sort_values(by=['rank','severity', 'cve_count'],inplace=True)
severity_counts_df = severity_counts_df.drop(labels=['rank'],axis=1).reset_index(drop=True)
severity_counts_df

Unnamed: 0,severity,cve_count
0,Critical,16
1,High,87
2,Medium,220
3,Low,95
4,Negligible,17
5,Unknown,1


In [26]:
severity_counts_df.to_html(f"{output_dir}/{REPORT_NAMES['CVE_COUNT_BY_SEVERITY']}.html")
severity_counts_df.to_csv(f"{output_dir}/{REPORT_NAMES['CVE_COUNT_BY_SEVERITY']}.csv", index=False)

In [27]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4403 entries, 0 to 76
Data columns (total 22 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   image_name                    4403 non-null   object 
 1   feed                          4403 non-null   object 
 2   feed_group                    4403 non-null   object 
 3   fix                           4403 non-null   object 
 4   package                       4403 non-null   object 
 5   package_cpe                   4403 non-null   object 
 6   package_cpe23                 4403 non-null   object 
 7   package_name                  4403 non-null   object 
 8   package_path                  4403 non-null   object 
 9   package_type                  4403 non-null   object 
 10  package_version               4403 non-null   object 
 11  severity                      4403 non-null   object 
 12  url                           4403 non-null   object 
 13  vendo

In [31]:
def file_name_to_tag(rec):
    file_name=  rec['image_name']
    tagIndex = file_name.rindex('_')
    if tagIndex > 0:
        image_tag=file_name[tagIndex+1:].replace('-vuln.json', '')
        image_name=file_name[0:tagIndex]
        return f"{image_name}:{image_tag}"
    return file_name

select_cols = ['image_name', 'vuln', 'nvd_data_id',  'severity', 'feed_group', 'package_name', 'package_type']
extra_cols= ['cve_desc', 'cve_last_modified_date', 'cve_published_date']

if query_nvd:
    select_cols.extend(extra_cols)

print(select_cols)

detailed_report_df = merged_df[merged_df.columns[merged_df.columns.isin(select_cols)]].drop_duplicates()
detailed_report_df['image_name']=detailed_report_df.apply(file_name_to_tag, axis=1)

new_cols=['ImageName', 'VulnerabilityID', 'NVD_VulnerabilityID', 'Severity', 'FeedGroup', 'PackageName', 'PackageType']
extra_new_cols=['CVE_Description', 'CVE_Last_Modified_Date', 'CVE_Published_Date']
if query_nvd:
    new_cols.extend(extra_new_cols)

detailed_report_df.columns=new_cols
detailed_report_df['Rank'] = detailed_report_df['Severity'].map(SEVERITY_DICT)

detailed_report_df.sort_values(by=['Rank', 'ImageName'], ascending=[True, True],inplace=True)
detailed_report_df = detailed_report_df.drop(labels=['Rank'],axis=1).reset_index(drop=True)
detailed_report_df.head()

['image_name', 'vuln', 'nvd_data_id', 'severity', 'feed_group', 'package_name', 'package_type']


Unnamed: 0,ImageName,VulnerabilityID,NVD_VulnerabilityID,Severity,FeedGroup,PackageName,PackageType
0,argocd:v2.4.8-ubuntu-20220801,ubuntu:distro:ubuntu:20.04,login,deb,Low,CVE-2013-4235,CVE-2013-4235
1,argocd:v2.4.8-ubuntu-20220801,ubuntu:distro:ubuntu:20.04,passwd,deb,Low,CVE-2013-4235,CVE-2013-4235
2,argocd:v2.4.8-ubuntu-20220801,nvd:cpe,google.golang.org/protobuf,go-module,High,CVE-2015-5237,CVE-2015-5237
3,argocd:v2.4.8-ubuntu-20220801,ubuntu:distro:ubuntu:20.04,libc-bin,deb,Negligible,CVE-2016-20013,CVE-2016-20013
4,argocd:v2.4.8-ubuntu-20220801,ubuntu:distro:ubuntu:20.04,libc6,deb,Negligible,CVE-2016-20013,CVE-2016-20013


In [32]:
detailed_report_df.to_html(f"{output_dir}/{REPORT_NAMES['DETAILED_REPORT']}.html")
detailed_report_df.to_csv(f"{output_dir}/{REPORT_NAMES['DETAILED_REPORT']}.csv", index=False)

In [33]:
modified_base_df = merged_df[merged_df['severity'].isin(['Critical', 'High'])]
modified_base_df.head(5)

Unnamed: 0,image_name,feed,feed_group,fix,package,package_cpe,package_cpe23,package_name,package_path,package_type,package_version,severity,url,vendor_data,vuln,cvss_v2_base_score,cvss_v2_impact_score,cvss_v2_exploitability_score,cvss_v3_base_score,cvss_v3_impact_score,cvss_v3_exploitability_score,nvd_data_id
0,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,nvd:cpe,,spring-core-5.3.19,cpe:2.3:a:springsource-spring-framework:spring...,cpe:2.3:a:springsource-spring-framework:spring...,spring-core,/pkg.jar:BOOT-INF/lib/spring-core-5.3.19.jar,java-archive,5.3.19,Critical,https://nvd.nist.gov/vuln/detail/CVE-2016-1000027,[],CVE-2016-1000027,7.5,6.4,10.0,9.8,5.9,3.9,CVE-2016-1000027
59,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,sles:distro:sles:15.3,0:10.31-150000.3.12.1,libpcre2-8-0-10.31-150000.3.7.1.24547.1.PTF.12...,cpe:2.3:a:suselinuxproductsgmbh:libpcre2-8-0:1...,cpe:2.3:a:suselinuxproductsgmbh:libpcre2-8-0:1...,libpcre2-8-0,/usr/lib/sysimage/rpm/Packages,rpm,10.31-150000.3.7.1.24547.1.PTF.1201754,High,https://www.suse.com/security/cve/CVE-2022-1587,[],CVE-2022-1587,6.4,4.9,10.0,9.1,5.2,3.9,CVE-2022-1587
62,keycloak-init_2.4.0-dev-160-vuln.json,vulnerabilities,sles:distro:sles:15.3,0:2.6.4-150200.3.3.1,libharfbuzz0-2.6.4-1.56,cpe:2.3:a:susellc<https://www.suse.com/>:libha...,cpe:2.3:a:susellc<https://www.suse.com/>:libha...,libharfbuzz0,/usr/lib/sysimage/rpm/Packages,rpm,2.6.4-1.56,High,https://www.suse.com/security/cve/CVE-2022-33068,[],CVE-2022-33068,4.3,2.9,8.6,5.5,3.6,1.8,CVE-2022-33068
0,eric-mxe-gatekeeper_2.4.0-dev-160-vuln.json,vulnerabilities,nvd:cpe,,google.golang.org/protobuf-v1.23.0,cpe:2.3:a:google:protobuf:v1.23.0:*:*:*:*:*:*:*,cpe:2.3:a:google:protobuf:v1.23.0:*:*:*:*:*:*:*,google.golang.org/protobuf,,go-module,v1.23.0,High,https://nvd.nist.gov/vuln/detail/CVE-2015-5237,[],CVE-2015-5237,6.5,6.4,8.0,8.8,5.9,2.8,CVE-2015-5237
59,eric-mxe-gatekeeper_2.4.0-dev-160-vuln.json,vulnerabilities,nvd:cpe,,google.golang.org/protobuf-v1.23.0,cpe:2.3:a:google:protobuf:v1.23.0:*:*:*:*:*:*:*,cpe:2.3:a:google:protobuf:v1.23.0:*:*:*:*:*:*:*,google.golang.org/protobuf,,go-module,v1.23.0,High,https://nvd.nist.gov/vuln/detail/CVE-2021-22570,[],CVE-2021-22570,5.0,2.9,10.0,7.5,3.6,3.9,CVE-2021-22570


### Count of Modules by CVE

In [34]:

cve_with_modules_df = modified_base_df[['vuln', 'severity', 'image_name']].copy().drop_duplicates()
cve_with_modules_df.columns=['cve_id', 'severity', 'image_name']
cve_with_modules_df['image_name']=cve_with_modules_df.apply(file_name_to_tag, axis=1)
cve_with_modules_df['image_name']= cve_with_modules_df.groupby(['cve_id', 'severity'])['image_name'].transform(lambda x : '|'.join(x))
cve_with_modules_df['image_count']= cve_with_modules_df.apply(lambda rec: len(rec['image_name'].split('|')), axis=1)
cve_with_modules_df.drop_duplicates(inplace=True)
cve_with_modules_df['rank'] = cve_with_modules_df['severity'].map(SEVERITY_DICT)
cve_with_modules_df.sort_values(by=['rank','image_count'],ascending=[True, True],inplace=True)
cve_with_modules_df = cve_with_modules_df.drop(labels=['rank'],axis=1).reset_index(drop=True)

print(cve_with_modules_df)

                  cve_id  severity                                         image_name  image_count
0         CVE-2022-29155  Critical                  spark-operator:3.53.0-5-1.1.19-02            1
1    GHSA-gx2c-fvhc-ph4j  Critical                  spark-operator:3.53.0-5-1.1.19-02            1
2    GHSA-hf6f-jq25-8gq9  Critical         mxe-gitea:v1.16.7-ubuntu-20220801-rootless            1
3    GHSA-6635-c626-vj4r  Critical                      argocd:v2.4.8-ubuntu-20220801            1
4          CVE-2016-3088  Critical               eric-sec-access-mgmt-image:12.3.0-17            1
5       CVE-2018-1002105  Critical               eric-sec-access-mgmt-image:12.3.0-17            1
6         CVE-2019-12419  Critical               eric-sec-access-mgmt-image:12.3.0-17            1
7         CVE-2019-20444  Critical               eric-sec-access-mgmt-image:12.3.0-17            1
8         CVE-2019-20445  Critical               eric-sec-access-mgmt-image:12.3.0-17            1
9    GHSA-

In [35]:
cve_with_modules_df.to_html(f"{output_dir}/{REPORT_NAMES['IMAGE_COUNT_BY_CVE']}.html")
cve_with_modules_df.to_csv(f"{output_dir}/{REPORT_NAMES['IMAGE_COUNT_BY_CVE']}.csv", index=False)

## Count of High/Critical CVEs by Severity, Month and Year


Fill records which have no last_modified_date (records which don't exist in NVD rest api json feed) with last_modified_date as 1900-01-01
this is to handle NaNs

In [36]:

if query_nvd:
    cve_by_date_severity_df = modified_base_df[['severity', 'vuln', 'cve_last_modified_date']].copy()

    cve_by_date_severity_df['cve_last_modified_date'].fillna(value='1900-01-01T00:00Z', inplace=True)

    cve_by_date_severity_df['last_modified_date']=pd.to_datetime(cve_by_date_severity_df['cve_last_modified_date'])
    cve_by_date_severity_df['monthyear'] = pd.DatetimeIndex(cve_by_date_severity_df['cve_last_modified_date']).strftime('%Y-%m')

    severity_by_month_year_interim_df = cve_by_date_severity_df.sort_values(by=['last_modified_date'], ascending=False).drop(['last_modified_date'], axis=1)

    severity_by_month_year_df = severity_by_month_year_interim_df[['monthyear', 'vuln','severity']].drop_duplicates()

    severity_by_month_year_df= severity_by_month_year_df.drop("vuln", axis=1).value_counts().reset_index()

    severity_by_month_year_df.columns = ['monthyear', 'severity', 'cve_count']

    severity_by_month_year_df

KeyError: "['cve_last_modified_date'] not in index"

Pivot and get High/Critical CVEs by Category for each month year -- This is not properly ordered

In [62]:
if query_nvd:
    pivot_df = severity_by_month_year_df.pivot_table(index='monthyear', columns='severity', values='cve_count', fill_value=0)

    pivot_df

severity,Critical,High
monthyear,Unnamed: 1_level_1,Unnamed: 2_level_1
1900-01,37,108
2019-06,0,3
2020-05,1,0
2020-06,0,1
2020-07,0,2
2020-08,0,2
2020-09,0,1
2020-11,0,1
2021-01,0,2
2021-03,1,3


## Make a new date column from string monthyear column, use it for sorting and recreate the dataframe sorted

In [63]:
if query_nvd:
    format_pivot_df = pivot_df.reset_index()

    format_pivot_df['monthDate'] = pd.to_datetime(format_pivot_df['monthyear'])

    format_pivot_df.sort_values(by='monthDate', ascending=False, inplace=True)

    format_pivot_df.set_index(['monthyear'], inplace=True)

    format_pivot_df.drop(['monthDate'], inplace=True, axis=1)

    format_pivot_df

severity,Critical,High
monthyear,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09,2,6
2021-08,1,6
2021-07,5,6
2021-06,0,3
2021-05,0,1
2021-04,0,4
2021-03,1,3
2021-01,0,2
2020-11,0,1
2020-09,0,1


### Critical/High Age by Severity

In [41]:
if query_nvd:
    format_pivot_df.to_html(f"{output_dir}/{REPORT_NAMES['CVE_COUNT_BY_SEVERITY_AND_AGE']}.html")
    format_pivot_df.to_csv(f"{output_dir}/{REPORT_NAMES['CVE_COUNT_BY_SEVERITY_AND_AGE']}.csv", index=False)

In [42]:
with pd.ExcelWriter(f"{output_dir}/anchore_{REPORT_NAMES['CONSOLIDATED_EXCEL_REPORT']}.xlsx") as excelWriter: 
    detailed_report_df.to_excel(excelWriter, sheet_name=REPORT_NAMES['DETAILED_REPORT'], index=False)
    severity_counts_df.to_excel(excelWriter, sheet_name=REPORT_NAMES['CVE_COUNT_BY_SEVERITY'], index=False)
    cve_with_modules_df.to_excel(excelWriter, sheet_name=REPORT_NAMES['IMAGE_COUNT_BY_CVE'], index=False)
    if query_nvd:
        format_pivot_df.to_excel(excelWriter, sheet_name=REPORT_NAMES['CVE_COUNT_BY_SEVERITY_AND_AGE'], index=True)