## Analysis of JavaScript Vulnerability Dataset

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
dataset = pd.read_csv('JSVulnerabilityDataSet-1.0.csv')
dataset.head()
dataset.info()

In [None]:
dataset.info()

In [None]:
target = dataset['Vuln']
excluded_columns = ['name','longname','path','full_repo_path', 'line', 'column', 'endline', 'endcolumn','Vuln']
features = dataset.drop(columns=excluded_columns, axis=1)
print(features.shape)
print(target.shape)

In [None]:
# preprocess via normalizaion
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(features)

In [None]:
train_features, test_features, train_target, test_target = train_test_split(normalized_features, target, test_size=0.3, random_state=32)
validation_features, test_features, validation_target, test_target  = train_test_split(test_features, test_target, test_size=0.5, random_state=32)

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(35,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
model.summary()

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_activations=True, rankdir='LR')

In [None]:
model.fit(train_features, train_target, epochs=10)

In [None]:
test_predicted = (model.predict(test_features) > 0.5).astype(int)
accuracy = accuracy_score(test_target, test_predicted)
print(f"Accuracy: {accuracy * 100: .2f}%")

In [None]:
# precision and recall scores
precision = precision_score(test_target, test_predicted)
recall = recall_score(test_target, test_predicted)
print(f"Recall: {recall * 100: .2f}%")
print(f"Precision: {precision * 100: .2f}%")

In [None]:
# FI Score
f1score = f1_score(test_target, test_predicted)
print(f"F1 Score: {f1score * 100:0.2f}%")