# Lab 12 Homework: Merging NVD & KEV

**Objective:**  
Acquire NVD & KEV data, load into Pandas DataFrames, clean, and merge on CVE ID to flag known exploits.

## 1. Imports & Helper Function

In [None]:
import json
import pandas as pd
from typing import Tuple

def process_nvd_json(file_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    with open(file_path, 'r', encoding='ISO-8859-1') as f:
        nvd_data = json.load(f)

    cve_items = []
    cpe_items = []
    for item in nvd_data['CVE_Items']:
        meta = item['cve']['CVE_data_meta']
        cve_id = meta['ID']
        descs = item['cve']['description']['description_data']

        prob = item['cve']['problemtype']['problemtype_data']
        try:
            cwe = prob[0]['description'][0]['value']
        except:
            cwe = ''

        impact3 = item.get('impact', {}) \
                      .get('baseMetricV3', {}) \
                      .get('cvssV3', {})
        cve_items.append({
            'id': cve_id,
            'assigner': meta.get('ASSIGNER',''),
            'published_date': item.get('publishedDate'),
            'last_modified_date': item.get('lastModifiedDate'),
            'description': next((d['value'] for d in descs if d['lang']=='en'), ''),
            'cwe': cwe,
            'cvss3_vector': impact3.get('vectorString',''),
            'cvss3_base_score': impact3.get('baseScore', None),
            'cvss3_base_severity': impact3.get('baseSeverity','')
        })

        for node in item['configurations'].get('nodes', []):
            for cm in node.get('cpe_match', []):
                uri = cm.get('cpe23Uri','')
                parts = uri.split(':')
                cpe_items.append({
                    'cve_id': cve_id,
                    'cpe23Uri': uri,
                    'vulnerable': cm.get('vulnerable', False),
                    'versionStartIncluding': cm.get('versionStartIncluding',''),
                    'versionEndIncluding': cm.get('versionEndIncluding',''),
                    'vendor': parts[3] if len(parts)>3 else '',
                    'product': parts[4] if len(parts)>4 else '',
                    'version': parts[5] if len(parts)>5 else ''
                })

    cve_df = pd.DataFrame(cve_items)
    cpe_df = pd.DataFrame(cpe_items)
    for col in ['published_date','last_modified_date']:
        cve_df[col] = pd.to_datetime(cve_df[col], errors='coerce')
    cve_df.sort_values('id', inplace=True)
    cpe_df.sort_values(['cve_id','cpe23Uri'], inplace=True)
    return cve_df, cpe_df


## 2. Load & Process NVD JSON (2023)

In [None]:
nvd_file = 'nvdcve-1.1-2023.json'
cve_df, cpe_df = process_nvd_json(nvd_file)
print("=== NVD CVE DataFrame ===")
cve_df.info()
cve_df.head()

## 3. Load KEV Catalog

In [None]:
kev_file = 'kev_catalog.csv'
df_kev = pd.read_csv(kev_file)
df_kev.rename(columns={'cveID':'id','dateAdded':'kev_added_date'}, inplace=True)
df_kev['kev_added_date'] = pd.to_datetime(df_kev['kev_added_date'], errors='coerce')
print("=== KEV DataFrame ===")
df_kev.info()
df_kev.head()

## 4. Merge & Flag Exploited

In [None]:
df_merged = cve_df.merge(df_kev[['id','kev_added_date']], on='id', how='left')
df_merged['is_exploited'] = df_merged['kev_added_date'].notna()
print("=== Merged DataFrame ===")
df_merged.info()
print("\nFirst 5 rows:")
df_merged.head()
print("\nExploited counts:")
df_merged['is_exploited'].value_counts()

## 5. Save Final Table

In [None]:
out_csv = 'nvd_plus_kev_2023.csv'
df_merged.to_csv(out_csv, index=False)
print(f"Saved merged table to {out_csv}")