In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [None]:
# Load dataset
data = pd.read_csv('data.csv')

# Display the first few rows of the data
data.head()

In [None]:
# Tokenize the IP addresses and URLs
ip_tokenizer = Tokenizer()
url_tokenizer = Tokenizer()

# Fit tokenizers on the IP and URL columns
ip_tokenizer.fit_on_texts(data['IP'])
url_tokenizer.fit_on_texts(data['URL'])

# Convert IP and URL into sequences of tokens
data['ip_seq'] = ip_tokenizer.texts_to_sequences(data['IP'])
data['url_seq'] = url_tokenizer.texts_to_sequences(data['URL'])

# Create sequences for training
sequence_length = 3

def create_sequences(data):
    sequences = []
    next_url = []
    ips = []
    
    for i in range(len(data) - sequence_length):
        sequences.append(data['url_seq'][i:i + sequence_length])
        next_url.append(data['url_seq'][i + sequence_length])
        ips.append(data['ip_seq'][i])
    
    return sequences, next_url, ips

sequences, next_url, ips = create_sequences(data)

# Pad sequences to make sure they are of uniform length
X_url = pad_sequences(sequences, maxlen=sequence_length, padding='post')
X_ip = pad_sequences(ips, maxlen=1)  # Assuming each IP is treated as a single token
y = pad_sequences(next_url, maxlen=1, padding='post')

# Split the dataset into training and testing
X_train_url, X_test_url, X_train_ip, X_test_ip, y_train, y_test = train_test_split(X_url, X_ip, y, test_size=0.2, random_state=42)

In [None]:
from keras.models import Model
from keras.layers import Input, GRU, Dense, Embedding, Concatenate

# Define the input layers
url_input = Input(shape=(sequence_length,), name='url_input')
ip_input = Input(shape=(1,), name='ip_input')

# Embedding layers for URLs and IPs
num_url_tokens = len(url_tokenizer.word_index) + 1
num_ip_tokens = len(ip_tokenizer.word_index) + 1
embedding_dim = 128

url_embedding = Embedding(input_dim=num_url_tokens, output_dim=embedding_dim)(url_input)
ip_embedding = Embedding(input_dim=num_ip_tokens, output_dim=embedding_dim)(ip_input)

# Concatenate URL and IP embeddings
concat = Concatenate()([url_embedding, ip_embedding])

# GRU Layer
gru_output = GRU(256)(concat)

# Output layer (predict next URL)
output = Dense(num_url_tokens, activation='softmax')(gru_output)

# Define the model
model = Model(inputs=[url_input, ip_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

In [None]:
# Train the model
history = model.fit([X_train_url, X_train_ip], y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model on the test data
loss, accuracy = model.evaluate([X_test_url, X_test_ip], y_test)

# Print the evaluation results
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Save the model to a file after training
model.save('url_prediction_model.h5')

In [None]:
from keras.models import load_model
import numpy as np

# Load the model from file
loaded_model = load_model('url_prediction_model.h5')

# Function to make a prediction
def predict_next_url(loaded_model, ip, url_sequence):
    ip_seq = ip_tokenizer.texts_to_sequences([ip])
    url_seq = url_tokenizer.texts_to_sequences(url_sequence)
    
    # Pad sequences
    url_seq = pad_sequences(url_seq, maxlen=sequence_length)
    ip_seq = pad_sequences(ip_seq, maxlen=1)
    
    # Predict next URL
    pred = loaded_model.predict([url_seq, ip_seq])
    
    # Convert prediction back to URL
    pred_url_index = np.argmax(pred, axis=1)[0]
    pred_url = url_tokenizer.index_word.get(pred_url_index, "Unknown URL")
    return pred_url

# Test the prediction using a sample input
sample_ip = "10.119.17.11"
sample_urls = ["/trx_rajal/trx_rajal/rptHasilLabFrame/2408261537/LK2408260198", 
               "/trx_rajal/trx_rajal/rptHasilLab/2408261537/LK2408260198"]

predicted_url = predict_next_url(loaded_model, sample_ip, sample_urls)
print(f"Predicted next URL: {predicted_url}")
