In [1]:
import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [2]:
import pandas as pd 

from dbconn import get_database
from dataload import loadData, load_cves
from IPython.display import display,HTML
from itables import show,options
from ipywidgets import Dropdown, interactive, Box, HBox,VBox, Output, Tab, Label, Layout
from matplotlib import pyplot
from pandas.plotting import scatter_matrix
from sklearn import linear_model

options.maxBytes=0


In [3]:

def load_vulerability_history():
    return loadData("data/vuln_hist.p")

predictive_df_out = Output(layout={'width': '100%'})
cpe_history = load_vulerability_history()
df = pd.DataFrame.from_dict(cpe_history, orient='index')
df.columns = ['2020', '2021', '2022', '2023', 'low', 'medium', 'high', 'critical', 'labeled risk']
with predictive_df_out:
    show(df)

    

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


def create_logistic_model(df):
    confusion_out = Output(layout={'width': '50%'})
    y = df.values [:,8]
    X = df.values [:,0:8]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    pipe = make_pipeline(StandardScaler(), LogisticRegression())
    pipe.fit(X_train, y_train)  # apply scaling on training data
    y_pred = pipe.predict(X_test)
    logistic_accuracy=accuracy_score (y_test, y_pred)*100
    cm = confusion_matrix(y_test, y_pred, labels=pipe.classes_)
    ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipe.classes_).plot()
    with confusion_out:
        pyplot.show()
    return (pipe, confusion_out, logistic_accuracy)

    


In [5]:
# Create svm model
from sklearn import svm

def create_svm_model(df):
    confusion_svm_out = Output(layout={'width': '50%'})
    y = df.values [:,8]
    X = df.values [:,0:8]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    pipe_svm = make_pipeline(StandardScaler(), svm.SVC())
    pipe_svm.fit(X_train,y_train)
    y_pred_svm = pipe_svm.predict(X_test)
    svm_accuracy=accuracy_score (y_test, y_pred_svm)*100  
    cm_svm = confusion_matrix(y_test, y_pred_svm, labels=pipe_svm.classes_)
    ConfusionMatrixDisplay(confusion_matrix=cm_svm, display_labels=pipe_svm.classes_).plot()
    with confusion_svm_out:
        pyplot.show()
    return (pipe_svm, confusion_svm_out, svm_accuracy)
       

In [6]:
def predict(brand, product, data, test_result=None):
    if test_result:
        return test_result
    brand_product = (brand, product)
    try:
        product_data = data.loc[[brand_product]]
    except KeyError:
        return "Inconclusive"
    logistic_predict = pipe.predict(product_data.values [:,0:8])[0]
    svm_predict = pipe_svm.predict(product_data.values [:,0:8])[0]
    if logistic_accuracy > svm_accuracy:
        return logistic_predict
    else:
        return svm_predict

In [7]:
pipe, confusion_out, logistic_accuracy = create_logistic_model(df)
pipe_svm, confusion_svm_out, svm_accuracy = create_svm_model(df)

In [8]:
# load Brand and product data
db=get_database()
cpe_targets = db["cpe_targets"]
cpe_list = cpe_targets.find({ 'brand': {'$regex': '.*'}},{ 'brand': 1, 'product': 1 })
cpeFormatted = set([(x['brand'],x['product']) for x in cpe_list])
cpeBrands = sorted(set([cpe[0] for cpe in cpeFormatted]))

In [9]:
start_loading_sync = Label(value="0")

def updateProductSelection(*args):
    product_widget.options = sorted(set([cpe[1] for cpe in cpeFormatted if cpe[0] == brands_widget.value]))

def updateViperDisplay(*args):
    start_loading_sync.value = str(int(start_loading_sync.value) + 1)
    if int(start_loading_sync.value) > 0:
        try:
            for c in w.children:
                if isinstance(c, Output):
                    if '{} {}'.format(brands_widget.value.capitalize(), product_widget.value.capitalize()) in c.outputs[0]['data']['text/plain']:
                        load_label.value = ""
                    else:
                        load_label.value = 'Loading {} {}'.format(brands_widget.value.capitalize(), product_widget.value.capitalize())
        except:
            pass
def updateViperProductLabel(*args):
    load_label.value = ""
    start_loading_sync.value = "-3"
            
 
def displayVisualizations(brand, product):  
    # Only display visualizations if the brand and product are defined and that brand and product go together. 
    # Without this when the brand is changed it will try to pull something like 'Google Windows' which obviously causes errors  
    if brand and product and (brand, product) in cpeFormatted: 
    # Define layouts that will be used for displaying the page
        load_label.value = 'Loading {} {}'.format(brand.capitalize(), product.capitalize())
        center_half = Layout(display="flex", justify_content="center", align_items="center", width= '50%')
        center_full = Layout(display="flex", justify_content="center", align_items="baseline", width= '100%')
        center_align_full = Layout(display="flex", justify_content="space-around", align_items="center")
        center_top = Layout(display="flex", justify_content="center", align_items="center", flex='1 1 auto', width = '98%')
        center_error =  Layout(display="flex", justify_content="center", align_items="center", flex='1 1 auto', width = '750px')
        section_label = Layout(display="flex", justify_content="center", align_items="center", border="solid", padding="0 10px 0 10px")
        banner_top = Layout(display="flex", justify_content="center", align_items="center", border="solid", padding="0 10px 0 10px")
        
    # Define a dictionary that changes the bg color of the predictive level label
        predictive_level_bg = {"CRITICAL": 'red', 'HIGH': 'lightsalmon', 'MEDIUM': 'greenyellow', 'LOW': 'green', 'Inconclusive': 'Lavender'}
    
    # Create Output widgets for the visualizations - This allows us to treat arbitrary data as an IPyWidget
        hist_out = Output()
        scatter_out = Output()
        line_out = Output()
        vuln_count_out = Output()
        vuln_detail_out = Output(layout=center_top)
        
    # Destroy the datatables if they exist. This is needed because the js library itables uses pops up html alerts rather than throwing exceptions....
        try:
            dfVulnDetail.destroy()
            dfVs.destroy()
        except:
            pass
    # Fetch data
        try:
            dfVulnDetail = load_cves(brand, product, version_limit = 10)
        except KeyError:
            dfVulnDetail =  pd.DataFrame()
    # Check that we have data
        if dfVulnDetail.empty:
            data_error = Label(value="The National Vulnerability Database has insufficient data on this product to display predictions or visualizations.", layout=center_error)
            data_error.style.background = predictive_level_bg["CRITICAL"]
            display (data_error)
        else:
        # Create the detailed vulnerability datatable
            options.columnDefs = [{"width": "25x", "targets": "_all"}]
            with vuln_detail_out:
                show(dfVulnDetail, columnDefs=[{"width": "25x", "targets": "_all"}], scrollX="true")

        # Create the histogram
            dfVulnDetail.hist(column=["score", "version"], by="version")
            with hist_out:
                pyplot.show()

        # Create the vulnerability count datatable
            vulnCounts = dfVulnDetail.value_counts(subset=["version", "score_text"])
            dfVs = pd.DataFrame(vulnCounts).reset_index()
            dfVs.columns = ["version", "score_text", "score_count"]
            dfVs= dfVs.sort_values(by=["version"])      
            with vuln_count_out:
                show(vulnCounts)

        # Create the scatter matrix
            scatter = scatter_matrix(dfVs)
            with scatter_out:
                pyplot.show()

        # Create the line graph
            fig,ax= pyplot.subplots()
            for score in ["LOW", "MEDIUM", "HIGH", "CRITICAL"]:
                ax.plot(dfVs[dfVs.score_text==score].version, dfVs[dfVs.score_text==score].score_count, label=score)
            ax.set_xlabel("Versions")
            ax.set_ylabel("Vulnerabilities")
            ax.legend(loc="best")
            with line_out:
                pyplot.show()

         # Format the VIPER tab
            predict_level=predict(brand, product, df)
            product_label.value = "{} {}".format(brand.capitalize(), product.capitalize())
            product_label.style.background = predictive_level_bg[predict_level]
            product_label.layout = center_top
            vuln_count_label = Label(value="Vulnerability Counts by Version and Severity", layout=section_label)
            vuln_count_box = VBox([vuln_count_label, vuln_count_out], layout=center_half)
            hist_label = Label(value="Score Histogram by Version", layout=section_label)
            hist_box = VBox([hist_label, hist_out], layout=center_half)
            scatter_label = Label(value="Vulnerability Histogram by Version and Severity", layout=section_label)
            scatter_box = VBox([scatter_label, scatter_out], layout=center_half)
            line_label = Label(value="Vulnerabilities by Version and Severity", layout=section_label)
            line_box = VBox([line_label, line_out], layout=center_half)
            vuln_detail_label = Label(value="Past Vulnerabilities", layout=section_label)
            vuln_detail_box = VBox([vuln_detail_label, vuln_detail_out], layout=center_align_full)
            predictive_label = Label(value="Predicted Future Vulnerability Level", layout=section_label)
            predictive_level = Label(value=predict_level, layout=center_top)
            predictive_level.style.background = predictive_level_bg[predict_level]
            predictive_box = VBox([product_label, predictive_label, predictive_level], layout=center_align_full, background=predictive_level_bg[predict_level])
            middle_row = HBox([vuln_count_box, hist_box], layout=center_full)
            bottom_row = HBox([scatter_box, line_box], layout=center_full)
            viper_display = VBox([predictive_box, vuln_detail_box, middle_row, bottom_row])

        # Format the Metadata Tab 
            logistic_accuracy_score = Label(value="Logistic Regression - {:0.2f}% accuracy".format(logistic_accuracy), layout=Layout(display="flex", justify_content="center", align_items="center", border="solid", padding="0 10px 0 10px"))
            svm_accuracy_score = Label(value="Support Vector Machine - {:0.2f}% accuracy".format(svm_accuracy),  layout=Layout(display="flex", justify_content="center", align_items="center", border="solid", padding="0 10px 0 10px"))
            predictive_df_label = Label(value="Vulnerability Counts by Version and Severity", layout=section_label)
            predictive_df_box = VBox([predictive_df_label, predictive_df_out], layout=center_align_full)
            logistic_box = VBox([logistic_accuracy_score, confusion_out], layout=center_half)
            svm_box = VBox([svm_accuracy_score, confusion_svm_out], layout=center_half)
            meta_bottom_row = HBox([logistic_box, svm_box], layout=center_full)
            metadata_display = VBox([predictive_df_box, meta_bottom_row])

        # Create the Tab object and populating it
            t = Tab()
            t.children = [viper_display, metadata_display]
            t.set_title(0, 'VIPER')
            t.set_title(1, 'Metadata')

        # Display the tab object
            display(t)





In [10]:
# Create our selection widgets
brands_widget = Dropdown(options=cpeBrands, value="google")
product_widget = Dropdown(options=[cpe[1] for cpe in cpeFormatted])

# Set up callbacks
brands_widget.observe(updateProductSelection)
product_widget.observe(updateViperDisplay)


pd.set_option('display.max_columns', 500)

load_label = Label(value="Test", Layout=Layout(display="flex", justify_content="center", align_items="center", flex='1 1 auto', width = '98%'))
product_label = Label()
product_label.observe(updateViperProductLabel)
# Create and display interactive
w = interactive(displayVisualizations, brand = brands_widget, product = product_widget)
display_box = VBox([load_label, w], layout= Layout(display="flex", justify_content="space-around", align_items="center", width='100%'))
display(display_box)



VBox(children=(Label(value=''), interactive(children=(Dropdown(description='brand', index=1611, options=('0xac…