In [None]:
import gradio as gr
import keras
import tensorflow as tf
import numpy as np
import json
from urllib.request import Request, urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup as bs
import networkx as nx
from spektral.utils import normalized_adjacency
import requests

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Embedding, Conv1D, MaxPooling1D, Dropout, Input, Flatten
from spektral.layers import GCSConv, GlobalAvgPool, MinCutPool


In [None]:
# Number of classes (e.g., Phishing or Not Phishing)
num_classes = 2
num_nodes, num_features = 140, 140

#gcsconv activation func
gcsconv_act = 'relu'
#output activation func
out_act = 'linear'
#optimizer
optimizer = 'adam'

# Inputs
X_in = Input(shape=(num_nodes,num_features))
A_in = Input(shape=(num_nodes,num_features), sparse=True)
#I_in = Input(shape=(None,),name='segment_ids',dtype=tf.int32)

class GraphConvSkip(GCSConv):
    def __init__(self,channels,activation='relu'):
        super().__init__(channels,activation=activation)

    def get_config(self):
        config = super().get_config()
        return config
#GCSConv(32, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-3), use_bias=True, kernel_initializer='he_uniform', bias_initializer='zeros')
# Graph Convolution and Pooling layers
gc1 = GraphConvSkip(channels=32)([X_in, A_in])
dropout_1 = Dropout(.5)(gc1)
gc2 = GraphConvSkip(channels=32)([dropout_1, A_in])
dropout_2 = Dropout(.5)(gc2)
gc3 = GraphConvSkip(channels=32)([dropout_2, A_in])

# GlobalAvgPool layer
global_avg_pool = GlobalAvgPool()(gc3)

# Classification layer
output = Dense(num_classes, activation='linear')(global_avg_pool)

# Create the model
model = tf.keras.Model(inputs=[X_in, A_in], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
print(model.summary())


In [None]:
def preprocess_graphs(graph):
    # Compute the maximum number of nodes among all graphs
    max_nodes = 140
    max_features = 140

    adj_matrix = nx.to_numpy_array(graph, dtype=np.float32)
    adj_matrix = np.pad(adj_matrix, ((0, max_nodes - adj_matrix.shape[0]), (0, max_nodes - adj_matrix.shape[1])), mode='constant')
    adj_matrix = normalized_adjacency(adj_matrix)


    nodes = np.eye(len(graph.nodes()), dtype=np.float32)
    nodes = np.pad(nodes, ((0, max_nodes - nodes.shape[0]), (0, max_features - nodes.shape[1])), mode='constant')

    return np.array(adj_matrix), np.array(nodes)

def graph_maker(url):
    # Read HTML file and convert it to a NetworkX graph
    try:
        html = Request(url,headers={'User-Agent':'Mozilla/5.0'})
        html_object = urlopen(html).read()
        soup = bs(html_object,'html.parser')
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
    except HTTPError as e:
        print(f'Error{e}')

    #transform html into graph form
    G = nx.Graph()
    for tag in soup.find_all():
        G.add_node(tag.name)
    for tag in soup.find_all():
        for child in tag.find_all():
            G.add_edge(tag.name, child.name)

    adj_matrix, node_features = preprocess_graphs(G)

    return adj_matrix, node_features

In [None]:
def URLDET(url):
    f = open(r'D:\Kerja\UITM\Sem_6\CSP_650\FYP\urldet_vocab.json')
    vocab = json.load(f)
    urldet = keras.models.load_model(r'D:\Kerja\UITM\Sem_6\CSP_650\FYP\urldet.h5')

    #preprocess html
    adj_matrix, node_features = graph_maker(url)
    adj_matrix = np.array([adj_matrix])
    node_features = np.array([node_features])
    #predict html
    try:
        html_prediction = model.predict([node_features,adj_matrix])
        max_html = np.where(html_prediction[0] == np.max(html_prediction[0]))
    except HTTPError as e:
        max_html = np.array([1])
        html_prediction = np.array([[0,0]])
    except ValueError as e:
        max_html = np.array([1])
        html_prediction = np.array([[0,0]])
    
    #url cutter
    if len(url) < 150:
        url = url + (' '*(150-len(url)))
    url = url[:150]

    #convert url into vector
    vector_url = [vocab[char] for char in url]

    #predict the url
    url_prediction = urldet.predict(np.array([vector_url]))
    max_url = np.where(url_prediction[0] == np.max(url_prediction[0]))

    prediction = [max_url,max_html]
    if np.sum(prediction) == 2:
        predict = 'Safe'
    elif np.sum(prediction) == 1:
        predict = "Careful"
    else:
        predict = 'Phishing'
        
    return predict, url_prediction, html_prediction

demo = gr.Interface(URLDET, inputs=[gr.Textbox(label='URL',lines=2,placeholder='url')], 
                    outputs=[gr.Textbox(label='prediction'),gr.Textbox(label='URLDET result'),gr.Textbox(label='HTMLDET prediction')],
                    examples=['https://www.cbsnews.com/news/twitter-rebrand-x-name-change-elon-musk-what-it-means/','https://freefireadvanceserver.info','https://alw4erb.web.app/'],
                    theme=gr.themes.Soft())
demo.launch()