# F3. Risk Scoring

## Risk-Scoring Methodology

For each asset category:
* Max Severity = highest CVSS base score
* Avg Severity = mean CVSS base score
* Vuln Count = total CVEs
* Composite Risk Score (normalized 0–1)

## Analysis & Visualization
* Heatmap: asset vs. RiskScore
* Bar Chart: Top 10 assets by score
* Time Series: monthly count of new CVEs per asset
* Pie Chart: distribution of severity levels (Critical/High/Medium/Low)

In [4]:
# interactive_risk_scoring_module.py
"""
Interactive Risk Scoring Module with User Input/Dropdown for Formula & Aggregation
- Supports multiple risk formulas (weighted, multiplicative, worst-case, mean)
- Aggregates by (max, mean, median, sum, count high-risk CVEs)
- Designed for Jupyter notebook (with ipywidgets dropdowns)
"""

import pandas as pd
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact, Dropdown
import matplotlib.pyplot as plt
import seaborn as sns


# --- CONFIG ---
input_file = "../data/vuln_catalogue_v2.csv"  # Change to your path if needed

# --- LOAD DATA ---
def load_vuln_data(file_path):
    df = pd.read_csv(file_path)
    return df

# --- RISK FORMULAS ---
def weighted_average_score(row, weights=None):
    if weights is None:
        weights = {'baseScore': 0.5, 'exploitabilityScore': 0.25, 'impactScore': 0.25}
    vals = [(row.get(col), w) for col, w in weights.items() if pd.notnull(row.get(col))]
    if not vals:
        return np.nan
    score = sum(v * w for v, w in vals)
    total_weight = sum(w for _, w in vals)
    return round(score / total_weight, 2)

def multiplicative_risk_score(row):
    vals = [row.get(col) for col in ['baseScore', 'exploitabilityScore', 'impactScore']]
    if any(pd.isnull(v) for v in vals):
        return np.nan
    vals_norm = [v / 10.0 for v in vals]
    score = np.prod(vals_norm) * 10
    return round(score, 2)

def worst_case_score(row):
    vals = [row.get(col) for col in ['baseScore', 'exploitabilityScore', 'impactScore']]
    vals = [v for v in vals if pd.notnull(v)]
    if not vals:
        return np.nan
    return max(vals)

def simple_mean_score(row):
    vals = [row.get(col) for col in ['baseScore', 'exploitabilityScore', 'impactScore']]
    vals = [v for v in vals if pd.notnull(v)]
    if not vals:
        return np.nan
    return round(np.mean(vals), 2)

formula_map = {
    'Weighted Average': weighted_average_score,
    'Multiplicative': multiplicative_risk_score,
    'Worst Case (Max)': worst_case_score,
    'Simple Mean': simple_mean_score,
}

agg_map = {
    'Max': 'max',
    'Mean': 'mean',
    'Median': 'median',
    'Sum': 'sum',
}

def count_high_risk(series, threshold=7.0):
    return (series >= threshold).sum()

# --- INTERACTIVE FUNCTION ---
def interactive_risk_scoring(input_file=input_file):
    df = load_vuln_data(input_file)
    for col in ['baseScore', 'exploitabilityScore', 'impactScore']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        else:
            df[col] = np.nan
    
    def update_scoring(formula, aggregation, highrisk_threshold):
        # Calculate riskScore
        df['riskScore'] = df.apply(formula_map[formula], axis=1)
        # Aggregate per asset (Title)
        group = df.groupby('Title')
        agg_df = group['riskScore'].agg(agg_map[aggregation]).reset_index()
        agg_df = agg_df.rename(columns={'riskScore': f'{aggregation}RiskScore'})
        # Count high risk CVEs per asset
        highrisk_df = group['riskScore'].apply(lambda x: (x >= highrisk_threshold).sum()).reset_index()
        highrisk_df = highrisk_df.rename(columns={'riskScore': f'countHighRiskCVEs (>{highrisk_threshold})'})
        # Merge for summary
        summary = pd.merge(agg_df, highrisk_df, on='Title', how='left').sort_values(by='MaxRiskScore',ascending=False)
        # Show sample summary and first few vulnerabilities for inspection
        print("\nAsset-level Risk Summary:")
        display(summary.sort_values(by='MaxRiskScore',ascending=False).head(10))
        print("\nSample vulnerabilities (with riskScore):")
        display(df[['Title', 'cveID', 'riskScore']].sort_values(by='riskScore',ascending=False).head(20))
        #pie chart
        severity_counts = df['baseSeverity'].value_counts()
        severity_counts.plot(kind='pie', autopct='%1.1f%%', startangle=140, figsize=(6,6))
        plt.title("Distribution of Severity Levels")
        plt.ylabel("")
        plt.show()
        # time series
        df['published'] = pd.to_datetime(df['published'], errors='coerce')
        df['month'] = df['published'].dt.to_period('M')
        df['year'] = df['published'].dt.to_period('Y')
        monthly_cves = df.groupby(['month', 'Title'])['cveID'].nunique().unstack(fill_value=0)
        monthly_cves.plot(figsize=(14,7))
        plt.title("Monthly Count of New CVEs per Asset")
        plt.ylabel("Number of New CVEs")
        plt.xlabel("Month")
        plt.legend(title='Asset', bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()
        return

    interact(
        update_scoring,
        formula=Dropdown(options=list(formula_map.keys()), value='Weighted Average', description='Risk Formula:'),
        aggregation=Dropdown(options=list(agg_map.keys()), value='Max', description='Aggregation:'),
        highrisk_threshold=widgets.FloatSlider(value=7.0, min=0.0, max=10.0, step=0.1, description='High Risk CVE:')
    )



# --- MAIN EXECUTION ---
if __name__ == "__main__":
    interactive_risk_scoring(input_file)

interactive(children=(Dropdown(description='Risk Formula:', options=('Weighted Average', 'Multiplicative', 'Wo…