In [None]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
# Load the model
model = load_model('best_model.keras')  # Loads a pre-trained model from the specified file.

In [None]:
# Set maximum length and tokenizer settings
max_length = 964  # Maximum length of the sequences.
tokenizer = Tokenizer(char_level=True)  # Initializes a tokenizer that works at the character level.

In [None]:
# Function to load the dataset and train the tokenizer
def load_and_train_tokenizer(file_path):
    import pandas as pd

    try:
        df = pd.read_csv(file_path, header=None, quotechar='"')  # Reads CSV file into a pandas DataFrame.
        train_urls = df[1].tolist()  # Converts column 1 into a list of URLs.
        tokenizer.fit_on_texts(train_urls)  # Fits the tokenizer on the training URLs.
    except pd.errors.ParserError as e:
        print(f'ParserError: {e}')  # Prints any parsing errors.

load_and_train_tokenizer('updated_train.csv')  # Calls the function with the specified file path.

In [None]:
# URL processing function
def process_url(url):
    sequence = tokenizer.texts_to_sequences([url])  # Converts the URL into a sequence of integers.
    padded_sequence = pad_sequences(sequence, maxlen=max_length)  # Pads the sequence to the maximum length.
    return padded_sequence

# Function to format the prediction result
def format_prediction(prediction):
    probability = prediction[0][0]  # Extracts the probability from the prediction.
    percentage = probability * 100  # Converts the probability to a percentage.
    return f"Probability: %{percentage:.2f}"  # Formats the percentage with two decimal places.

# Function to process the URL and make a prediction
def process_and_predict_url(url):
    processed_url = process_url(url)  # Processes the URL.
    prediction = model.predict(processed_url)  # Predicts the class of the URL.
    formatted_prediction = format_prediction(prediction)  # Formats the prediction result.
    return formatted_prediction

In [None]:
# Initialize Firebase Admin SDK
if not firebase_admin._apps:
    cred = credentials.Certificate("key.json")  # Loads Firebase credentials from a JSON file.
    firebase_admin.initialize_app(cred)  # Initializes the Firebase Admin app with the credentials.

# Get a Firestore instance
db = firestore.client()  # Initializes a Firestore client.

# Function to listen to data changes in Firestore
def on_snapshot(col_snapshot, changes, read_time):
    for change in changes:
        if change.type.name == 'ADDED' or change.type.name == 'MODIFIED':
            doc = change.document  # Gets the changed document.
            doc_data = doc.to_dict()  # Converts the document to a dictionary.
            if 'result' not in doc_data or doc_data['result'] is None:
                url = doc_data.get('url')  # Retrieves the URL from the document.
                if url:
                    # Process the URL and make a prediction
                    print(url)
                    prediction_result = process_and_predict_url(url)

                    # Write the prediction result to Firestore
                    doc.reference.update({'result': prediction_result})

# Listen to changes in the 'URLs' collection
query = db.collection(u'URL')  # Sets up a query on the 'URL' collection.
query_watch = query.on_snapshot(on_snapshot)  # Starts listening to changes in the collection.