In [1]:
import pandas as pd
import json

# Load the CSV file
df = pd.read_csv('../data/vuln_catalogue.csv')

def extract_scores(full_json):
    try:
        data = json.loads(full_json)
        exploitability = data.get('exploitabilityScore', None)
        impact = data.get('impactScore', None)
        # Some NVD records nest these inside 'impact' or 'metrics' subdicts
        # Try other locations if needed
        if exploitability is None or impact is None:
            # Try typical NVD v3.1 structure
            metrics = data.get('metrics', {})
            if 'cvssMetricV31' in metrics:
                metrics_list = metrics['cvssMetricV31']
                if isinstance(metrics_list, list) and len(metrics_list) > 0:
                    sub = metrics_list[0].get('cvssData', {})
                    exploitability = sub.get('exploitabilityScore', exploitability)
                    impact = sub.get('impactScore', impact)
        return pd.Series({'exploitabilityScore': exploitability, 'impactScore': impact})
    except Exception as e:
        return pd.Series({'exploitabilityScore': None, 'impactScore': None})

# Apply extraction to the 'full_json' column
df[['exploitabilityScore', 'impactScore']] = df['full_json'].apply(extract_scores)

# Save to new CSV
# df.to_csv('vuln_catalogue_with_scores.csv', index=False)

df.head()

Unnamed: 0.1,Unnamed: 0,sid,Title,cpeName,cveID,published,last_modified,vectorString,baseScore,baseSeverity,...,confidentialityImpact,integrityImpact,availabilityImpact,cwes,description,references,tags,full_json,exploitabilityScore,impactScore
0,0,0,Alteryx Server 2022.1.1.42590,cpe:2.3:a:alteryx:alteryx_server:2022.1.1.4259...,CVE-2023-26961,2023-08-08T20:15:10.080,2024-11-21T07:52:07.460,CVSS:3.1/AV:N/AC:L/PR:H/UI:R/S:C/C:L/I:L/A:N,4.8,MEDIUM,...,LOW,LOW,NONE,CWE-79,Alteryx Server 2022.1.1.42590 does not employ ...,http://alteryx.com | https://gist.github.com/D...,"Vendor Advisory, Exploit, Third Party Advisory...","{'cve': {'id': 'CVE-2023-26961', 'sourceIdenti...",,
1,1,1,Oracle SuiteCommerce Advanced,cpe:2.3:a:oracle:suitecommerce_advanced:-:*:*:...,CVE-2020-14728,2020-08-27T00:15:12.050,2024-11-21T05:03:59.317,CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N,5.4,MEDIUM,...,LOW,LOW,NONE,NVD-CWE-noinfo,Vulnerability in the SuiteCommerce Advanced (S...,https://system.netsuite.com/app/help/helpcente...,"Permissions Required, Vendor Advisory, Permiss...","{'cve': {'id': 'CVE-2020-14728', 'sourceIdenti...",,
2,2,2,Oracle SuiteCommerce Advanced,cpe:2.3:a:oracle:suitecommerce_advanced:-:*:*:...,CVE-2020-14729,2020-08-27T00:15:12.097,2024-11-21T05:03:59.443,CVSS:3.1/AV:N/AC:H/PR:L/UI:R/S:U/C:L/I:H/A:N,5.4,MEDIUM,...,LOW,HIGH,NONE,NVD-CWE-noinfo,Vulnerability in SuiteCommerce Advanced (SCA) ...,https://system.netsuite.com/app/help/helpcente...,"Permissions Required, Vendor Advisory, Permiss...","{'cve': {'id': 'CVE-2020-14729', 'sourceIdenti...",,
3,3,3,Oracle SuiteCommerce Advanced 2020.1.4,cpe:2.3:a:oracle:suitecommerce_advanced:2020.1...,,,,,,,...,,,,,NO CVEs FOUND FOR THIS ASSET,,,,,
4,4,4,Adobe Acrobat Reader 20.004.30006 Classic Edition,cpe:2.3:a:adobe:acrobat_reader:20.004.30006:*:...,CVE-2021-39836,2021-09-29T16:15:08.513,2024-11-21T06:20:20.730,CVSS:3.1/AV:L/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H,7.8,HIGH,...,HIGH,HIGH,HIGH,CWE-416,Acrobat Reader DC versions 2021.005.20060 (and...,https://helpx.adobe.com/security/products/acro...,"Release Notes, Vendor Advisory, Release Notes,...","{'cve': {'id': 'CVE-2021-39836', 'sourceIdenti...",,


In [2]:
df['exploitabilityScore'].unique()

array([None], dtype=object)

In [3]:
df.to_csv('../data/vuln_catalogue_with_scores.csv')

In [9]:
import pandas as pd
import json

# Load the CSV file
df = pd.read_csv('../data/vuln_catalogue.csv')

# Examine a single sample
sample_json = df['full_json'].iloc[0]
print('SAMPLE JSON:')
print(sample_json)

SAMPLE JSON:
{'cve': {'id': 'CVE-2023-26961', 'sourceIdentifier': 'cve@mitre.org', 'published': '2023-08-08T20:15:10.080', 'lastModified': '2024-11-21T07:52:07.460', 'vulnStatus': 'Modified', 'cveTags': [], 'descriptions': [{'lang': 'en', 'value': 'Alteryx Server 2022.1.1.42590 does not employ file type verification for uploaded files. This vulnerability allows attackers to upload arbitrary files (e.g., JavaScript content for stored XSS) via the type field in a JSON document within a PUT /gallery/api/media request.'}], 'metrics': {'cvssMetricV31': [{'source': 'nvd@nist.gov', 'type': 'Primary', 'cvssData': {'version': '3.1', 'vectorString': 'CVSS:3.1/AV:N/AC:L/PR:H/UI:R/S:C/C:L/I:L/A:N', 'baseScore': 4.8, 'baseSeverity': 'MEDIUM', 'attackVector': 'NETWORK', 'attackComplexity': 'LOW', 'privilegesRequired': 'HIGH', 'userInteraction': 'REQUIRED', 'scope': 'CHANGED', 'confidentialityImpact': 'LOW', 'integrityImpact': 'LOW', 'availabilityImpact': 'NONE'}, 'exploitabilityScore': 1.7, 'impactS

In [10]:
# Try loading the JSON
try:
    data = json.loads(sample_json)
    print('\nPARSED JSON KEYS:')
    print(data.keys())
    print('\nDATA STRUCTURE:')
    print(json.dumps(data, indent=2))
except Exception as e:
    print('Error parsing JSON:', e)

Error parsing JSON: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)


In [12]:
import pandas as pd
import json
import ast

sample_json = df['full_json'].iloc[0]
print('SAMPLE JSON:')
print(sample_json[:500])  # Print just a chunk in case it's huge

# Try to parse as JSON, fallback to Python dict
def parse_mixed_json(s):
    try:
        return json.loads(s)
    except Exception:
        try:
            return ast.literal_eval(s)
        except Exception as e:
            print('Could not parse:', e)
            return {}

data = parse_mixed_json(sample_json)
print('\nPARSED DATA TYPE:', type(data))
print('PARSED KEYS:', list(data.keys()))
print('\nDATA STRUCTURE:')
print(str(data)[:1000])  # Print only part if it's large


SAMPLE JSON:
{'cve': {'id': 'CVE-2023-26961', 'sourceIdentifier': 'cve@mitre.org', 'published': '2023-08-08T20:15:10.080', 'lastModified': '2024-11-21T07:52:07.460', 'vulnStatus': 'Modified', 'cveTags': [], 'descriptions': [{'lang': 'en', 'value': 'Alteryx Server 2022.1.1.42590 does not employ file type verification for uploaded files. This vulnerability allows attackers to upload arbitrary files (e.g., JavaScript content for stored XSS) via the type field in a JSON document within a PUT /gallery/api/media r

PARSED DATA TYPE: <class 'dict'>
PARSED KEYS: ['cve']

DATA STRUCTURE:
{'cve': {'id': 'CVE-2023-26961', 'sourceIdentifier': 'cve@mitre.org', 'published': '2023-08-08T20:15:10.080', 'lastModified': '2024-11-21T07:52:07.460', 'vulnStatus': 'Modified', 'cveTags': [], 'descriptions': [{'lang': 'en', 'value': 'Alteryx Server 2022.1.1.42590 does not employ file type verification for uploaded files. This vulnerability allows attackers to upload arbitrary files (e.g., JavaScript content f

In [14]:
import pandas as pd
import ast

def parse_mixed_json(s):
    try:
        return ast.literal_eval(s)
    except Exception:
        return {}

def extract_scores(full_json):
    data = parse_mixed_json(full_json)
    # Dig through nested structure
    try:
        entry = data['cve']['metrics']['cvssMetricV31'][0]
        exploit = entry.get('exploitabilityScore', None)
        impact = entry.get('impactScore', None)
        return pd.Series({'exploitabilityScore': exploit, 'impactScore': impact})
    except Exception:
        # fallback for missing or unexpected structure
        return pd.Series({'exploitabilityScore': None, 'impactScore': None})

df[['exploitabilityScore', 'impactScore']] = df['full_json'].apply(extract_scores)

# Optional: save or display
# df.to_csv('vuln_catalogue_with_scores.csv', index=False)
print(df[['exploitabilityScore', 'impactScore']].head())


   exploitabilityScore  impactScore
0                  1.7          2.7
1                  2.3          2.7
2                  1.2          4.2
3                  NaN          NaN
4                  1.8          5.9


In [16]:
import pandas as pd
import ast

df = pd.read_csv('../data/vuln_catalogue.csv')

def parse_mixed_json(s):
    try:
        return ast.literal_eval(s)
    except Exception:
        return {}

def extract_scores(full_json):
    data = parse_mixed_json(full_json)
    for key in ['cvssMetricV31', 'cvssMetricV30']:
        try:
            entry = data['cve']['metrics'][key][0]
            exploit = entry.get('exploitabilityScore', None)
            impact = entry.get('impactScore', None)
            if exploit is not None or impact is not None:
                return pd.Series({'exploitabilityScore': exploit, 'impactScore': impact})
        except Exception:
            continue
    # fallback: not found
    return pd.Series({'exploitabilityScore': None, 'impactScore': None})

df[['exploitabilityScore', 'impactScore']] = df['full_json'].apply(extract_scores)

# Optional: Save or inspect
#df.to_csv('vuln_catalogue_with_scores.csv', index=False)
df


Unnamed: 0.1,Unnamed: 0,sid,Title,cpeName,cveID,published,last_modified,vectorString,baseScore,baseSeverity,...,confidentialityImpact,integrityImpact,availabilityImpact,cwes,description,references,tags,full_json,exploitabilityScore,impactScore
0,0,0,Alteryx Server 2022.1.1.42590,cpe:2.3:a:alteryx:alteryx_server:2022.1.1.4259...,CVE-2023-26961,2023-08-08T20:15:10.080,2024-11-21T07:52:07.460,CVSS:3.1/AV:N/AC:L/PR:H/UI:R/S:C/C:L/I:L/A:N,4.8,MEDIUM,...,LOW,LOW,NONE,CWE-79,Alteryx Server 2022.1.1.42590 does not employ ...,http://alteryx.com | https://gist.github.com/D...,"Vendor Advisory, Exploit, Third Party Advisory...","{'cve': {'id': 'CVE-2023-26961', 'sourceIdenti...",1.7,2.7
1,1,1,Oracle SuiteCommerce Advanced,cpe:2.3:a:oracle:suitecommerce_advanced:-:*:*:...,CVE-2020-14728,2020-08-27T00:15:12.050,2024-11-21T05:03:59.317,CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N,5.4,MEDIUM,...,LOW,LOW,NONE,NVD-CWE-noinfo,Vulnerability in the SuiteCommerce Advanced (S...,https://system.netsuite.com/app/help/helpcente...,"Permissions Required, Vendor Advisory, Permiss...","{'cve': {'id': 'CVE-2020-14728', 'sourceIdenti...",2.3,2.7
2,2,2,Oracle SuiteCommerce Advanced,cpe:2.3:a:oracle:suitecommerce_advanced:-:*:*:...,CVE-2020-14729,2020-08-27T00:15:12.097,2024-11-21T05:03:59.443,CVSS:3.1/AV:N/AC:H/PR:L/UI:R/S:U/C:L/I:H/A:N,5.4,MEDIUM,...,LOW,HIGH,NONE,NVD-CWE-noinfo,Vulnerability in SuiteCommerce Advanced (SCA) ...,https://system.netsuite.com/app/help/helpcente...,"Permissions Required, Vendor Advisory, Permiss...","{'cve': {'id': 'CVE-2020-14729', 'sourceIdenti...",1.2,4.2
3,3,3,Oracle SuiteCommerce Advanced 2020.1.4,cpe:2.3:a:oracle:suitecommerce_advanced:2020.1...,,,,,,,...,,,,,NO CVEs FOUND FOR THIS ASSET,,,,,
4,4,4,Adobe Acrobat Reader 20.004.30006 Classic Edition,cpe:2.3:a:adobe:acrobat_reader:20.004.30006:*:...,CVE-2021-39836,2021-09-29T16:15:08.513,2024-11-21T06:20:20.730,CVSS:3.1/AV:L/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H,7.8,HIGH,...,HIGH,HIGH,HIGH,CWE-416,Acrobat Reader DC versions 2021.005.20060 (and...,https://helpx.adobe.com/security/products/acro...,"Release Notes, Vendor Advisory, Release Notes,...","{'cve': {'id': 'CVE-2021-39836', 'sourceIdenti...",1.8,5.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,315,315,Adobe Acrobat Reader 20.004.30006 Classic Edition,cpe:2.3:a:adobe:acrobat_reader:20.004.30006:*:...,CVE-2025-27162,2025-03-11T18:15:34.150,2025-04-28T16:48:39.727,CVSS:3.1/AV:L/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H,7.8,HIGH,...,HIGH,HIGH,HIGH,CWE-824,"Acrobat Reader versions 24.001.30225, 20.005.3...",https://helpx.adobe.com/security/products/acro...,Vendor Advisory,"{'cve': {'id': 'CVE-2025-27162', 'sourceIdenti...",1.8,5.9
316,316,316,Adobe Acrobat Reader 20.004.30006 Classic Edition,cpe:2.3:a:adobe:acrobat_reader:20.004.30006:*:...,CVE-2025-27163,2025-03-11T18:15:34.293,2025-04-28T16:48:36.780,CVSS:3.1/AV:L/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N,5.5,MEDIUM,...,HIGH,NONE,NONE,CWE-125,"Acrobat Reader versions 24.001.30225, 20.005.3...",https://helpx.adobe.com/security/products/acro...,"Vendor Advisory, Technical Description, Third ...","{'cve': {'id': 'CVE-2025-27163', 'sourceIdenti...",1.8,3.6
317,317,317,Adobe Acrobat Reader 20.004.30006 Classic Edition,cpe:2.3:a:adobe:acrobat_reader:20.004.30006:*:...,CVE-2025-27164,2025-03-11T18:15:34.437,2025-04-28T16:48:33.017,CVSS:3.1/AV:L/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N,5.5,MEDIUM,...,HIGH,NONE,NONE,CWE-125,"Acrobat Reader versions 24.001.30225, 20.005.3...",https://helpx.adobe.com/security/products/acro...,"Vendor Advisory, Technical Description, Third ...","{'cve': {'id': 'CVE-2025-27164', 'sourceIdenti...",1.8,3.6
318,318,318,Adobe Acrobat Reader 20.004.30006 Classic Edition,cpe:2.3:a:adobe:acrobat_reader:20.004.30006:*:...,CVE-2025-27174,2025-03-11T18:15:35.600,2025-04-28T16:48:26.390,CVSS:3.1/AV:L/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H,7.8,HIGH,...,HIGH,HIGH,HIGH,CWE-416,"Acrobat Reader versions 24.001.30225, 20.005.3...",https://helpx.adobe.com/security/products/acro...,Vendor Advisory,"{'cve': {'id': 'CVE-2025-27174', 'sourceIdenti...",1.8,5.9
