In [3]:
# import necessary modules

In [4]:
from IPython.display import clear_output
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from tensorflow import keras
from tensorflow.keras import layers
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import matplotlib.pyplot as plt

In [5]:
# load dataset

In [6]:
df = pd.read_csv('./sqli.csv')
df = df[df['Label'].notna()]  # remove NaN values
df = df[df['Sentence'].notna()]  # remove NaN values

In [7]:
# define function to convert text to numerical values

In [8]:
def data2char_index(X, max_len):
    alphabet = " abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}"
    result = []
    for data in X:
        mat = []
        for ch in data:
            ch = ch.lower()
            if ch not in alphabet:
                continue
            mat.append(alphabet.index(ch))
        result.append(mat)
    X_char = tf.keras.preprocessing.sequence.pad_sequences(np.array(result, dtype=object), padding='post',
                                                           truncating='post', maxlen=max_len)
    return X_char

In [9]:
# preprocess the data

In [10]:
data = df['Sentence'].values
label = df['Label'].values

data = data2char_index(data, max_len=1000)

trainX, testX, y_train, y_test = train_test_split(data, label, test_size=0.1)
trainX, x_val, y_train, y_val = train_test_split(trainX, y_train, test_size=0.1)

print(trainX.shape)
print(y_train.shape)

(88709, 1000)
(88709,)


In [11]:
# define CNN model 1

In [12]:
def model_v1(max_len):
    main_input = tf.keras.layers.Input(shape=(max_len,))
    
    embedder = tf.keras.layers.Embedding(
        input_dim=70,  
        output_dim=80, 
        input_length=max_len,
        trainable=False
    )
    embed = embedder(main_input)
    
    cnn1 = tf.keras.layers.Conv1D(32, 5, padding='same', strides=1, activation='relu')(embed)
    cnn1 = tf.keras.layers.MaxPooling1D(pool_size=12)(cnn1)
    
    cnn2 = tf.keras.layers.Conv1D(32, 10, padding='same', strides=1, activation='relu')(embed)
    cnn2 = tf.keras.layers.MaxPooling1D(pool_size=11)(cnn2)
    
    cnn3 = tf.keras.layers.Conv1D(32, 15, padding='same', strides=1, activation='relu')(embed)
    cnn3 = tf.keras.layers.MaxPooling1D(pool_size=10)(cnn3)
    
    cnn = tf.keras.layers.concatenate([cnn1, cnn2, cnn3], axis=1)
    flat = tf.keras.layers.Flatten()(cnn)
    drop = tf.keras.layers.Dropout(0.2)(flat)
    dense1 = tf.keras.layers.Dense(1024, activation='relu')(drop)
    dense2 = tf.keras.layers.Dense(128, activation='relu')(dense1)
    main_output = tf.keras.layers.Dense(1, activation='sigmoid')(dense2)
    model = tf.keras.Model(inputs=main_input, outputs=main_output)
    return model

In [13]:
#CNN Model 2

In [14]:
def model_v2(max_len):
    # Input layer
    main_input = tf.keras.layers.Input(shape=(max_len,))
    
    # Embedding layer
    embedder = tf.keras.layers.Embedding(
        input_dim=70,  
        output_dim=80, 
        input_length=max_len,
        trainable=False
    )
    embed = embedder(main_input)
    
    # Convolutional layers
    cnn1 = tf.keras.layers.Conv1D(32, 3, padding='same', strides=1, activation='relu')(embed)
    cnn1 = tf.keras.layers.MaxPooling1D(pool_size=2)(cnn1)
    
    cnn2 = tf.keras.layers.Conv1D(64, 3, padding='same', strides=1, activation='relu')(cnn1)
    cnn2 = tf.keras.layers.MaxPooling1D(pool_size=2)(cnn2)
    
    cnn3 = tf.keras.layers.Conv1D(128, 3, padding='same', strides=1, activation='relu')(cnn2)
    cnn3 = tf.keras.layers.MaxPooling1D(pool_size=2)(cnn3)
    
    # Concatenate and flatten
    concatenate = tf.keras.layers.concatenate([cnn3], axis=1)
    flat = tf.keras.layers.Flatten()(concatenate)
    
    # Dropout and dense layers
    drop = tf.keras.layers.Dropout(0.2)(flat)
    dense1 = tf.keras.layers.Dense(512, activation='relu')(drop)
    dense2 = tf.keras.layers.Dense(128, activation='relu')(dense1)
    
    # Output layer
    main_output = tf.keras.layers.Dense(1, activation='sigmoid')(dense2)
    
    # Define and return the model
    model = tf.keras.Model(inputs=main_input, outputs=main_output)
    return model


In [15]:
#CNN Model 3 GRU

In [16]:
def model_v3(max_len):
    # input layer
    main_input = tf.keras.layers.Input(shape=(max_len,))
    
    # embedding layer
    embedder = tf.keras.layers.Embedding(
        input_dim=70,  
        output_dim=70, 
        input_length=max_len,
        trainable=False
    )
    embed = embedder(main_input)
    
    # convolutional layers
    cnn1 = tf.keras.layers.Conv1D(32, 3, padding='same', strides=1, activation='relu')(embed)
    cnn1 = tf.keras.layers.MaxPooling1D(pool_size=2)(cnn1)
    
    cnn2 = tf.keras.layers.Conv1D(64, 3, padding='same', strides=1, activation='relu')(cnn1)
    cnn2 = tf.keras.layers.MaxPooling1D(pool_size=2)(cnn2)
    
    cnn3 = tf.keras.layers.Conv1D(128, 3, padding='same', strides=1, activation='relu')(cnn2)
    cnn3 = tf.keras.layers.MaxPooling1D(pool_size=2)(cnn3)
    flat1 = tf.keras.layers.Flatten()(cnn3)
    
    # fully connected layer for sequential data
    gru = tf.keras.layers.GRU(128)(embed)
    
    # concatenate flattened CNN and GRU outputs
    concatenate = tf.keras.layers.concatenate([flat1, gru], axis=1)
    drop = tf.keras.layers.Dropout(0.2)(concatenate)
    dense1 = tf.keras.layers.Dense(512, activation='relu')(drop)
    dense2 = tf.keras.layers.Dense(128, activation='relu')(dense1)
    main_output = tf.keras.layers.Dense(1, activation='sigmoid')(dense2)
    
    # create the model with input and output layers
    model = tf.keras.Model(inputs=main_input, outputs=main_output)
    
    return model


In [17]:
#CNN Model 4 GRU