In [19]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the data directly from the same directory as the script
data = pd.read_csv('dataset_backup.csv')

# Define IP to Node label mapping based on your topology connections
ip_to_node = {
    '10.10.1.1': 'A', '10.10.3.1': 'A', '10.10.2.1': 'A',
    '10.10.4.1': 'B', '10.10.5.1': 'B',
    '10.10.6.1': 'C',
    '10.10.7.1': 'D',
    '10.10.8.1': 'E',
    '10.10.9.1': 'F',
    '10.10.10.1': 'G', '10.10.11.1': 'G',
    '10.10.1.2': 'C', '10.10.3.2': 'B', '10.10.2.2': 'D',
    '10.10.4.2': 'D', '10.10.5.2': 'E',
    '10.10.6.2': 'F',
    '10.10.7.2': 'G',
    '10.10.8.2': 'I',
    '10.10.9.2': 'H',
    '10.10.10.2': 'H', '10.10.11.2': 'I'
}

# Strip off any network masks or additional subnet notation if present
data['source'] = data['source'].str.split('/').str[0]
data['destination'] = data['destination'].str.split('/').str[0]

# Map IPs to node labels
data['source'] = data['source'].map(ip_to_node)
data['destination'] = data['destination'].map(ip_to_node)

# Index to node label mapping for paths
index_to_label = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I'}

# Update paths to use node labels instead of indices
def index_to_label_path(path):
    return ' > '.join(index_to_label[int(node)] for node in path.split(' > '))

data['path'] = data['path'].apply(index_to_label_path)

# Initialize label encoder
label_encoder = LabelEncoder()
# Fit the encoder on all unique node labels present
label_encoder.fit(list(index_to_label.values()))

# Encode source, destination, and paths
data['source_encoded'] = label_encoder.transform(data['source'])
data['destination_encoded'] = label_encoder.transform(data['destination'])

# Encode paths
def encode_path(path):
    return label_encoder.transform(path.split(' > ')).tolist()

data['path_encoded'] = data['path'].apply(encode_path)

# Display the preprocessed data
print(data.head())
print(data.tail())

   timestamp source destination  packet id           path  number of hops  \
0    2.00000      A           I          0  A > D > G > I               3   
1    2.00000      B           H          0  B > D > G > H               3   
2    2.00000      D           H          0      D > G > H               2   
3    2.00417      H           D          0      H > G > D               2   
4    2.00625      I           A          0  I > E > B > A               3   

   source_encoded  destination_encoded  path_encoded  
0               0                    8  [0, 3, 6, 8]  
1               1                    7  [1, 3, 6, 7]  
2               3                    7     [3, 6, 7]  
3               7                    3     [7, 6, 3]  
4               8                    0  [8, 4, 1, 0]  
     timestamp source destination  packet id                   path  \
463    79.0000      B           H         19      B > E > I > G > H   
464    79.0000      D           H         39  D > B > E > I > G >

In [25]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_length = max(data['path_encoded'].apply(len))
X = pad_sequences(data['path_encoded'], maxlen=max_length, padding='post')
y = data['destination_encoded']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Define the model
model = Sequential()
model.add(Embedding(input_dim=len(index_to_label), output_dim=50, input_length=max_length))
model.add(LSTM(100, return_sequences=False))
model.add(Dense(len(index_to_label), activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')

model.save('path_prediction_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.8723404407501221


In [21]:
import numpy as np
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Assuming 'data' is your DataFrame and already preprocessed
max_length = max(data['path_encoded'].apply(len))
X = pad_sequences(data['path_encoded'], maxlen=max_length, padding='post')
y = data['destination_encoded'].values

# Define the K-Fold Cross Validator
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=True)

# K-Fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(X, y):
    model = Sequential([
        Embedding(input_dim=len(index_to_label), output_dim=50, input_length=max_length),
        LSTM(100),
        Dense(len(index_to_label), activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    print(f'Training for fold {fold_no} ...')
    model.fit(X[train], y[train], epochs=10, batch_size=32, validation_data=(X[test], y[test]))
    
    # Increase fold number
    fold_no += 1


Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 5 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# Load the trained model
model = load_model('path_prediction_model.h5')

# Assuming `index_to_label` and `label_encoder` are predefined as in your preprocessing script
index_to_label = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I'}
label_encoder = LabelEncoder()
label_encoder.fit(list(index_to_label.values()))

# Function to predict path
def predict_path(source, destination):
    if source not in index_to_label.values() or destination not in index_to_label.values():
        return "Invalid Source or Destination"
    
    source_encoded = label_encoder.transform([source])[0]
    destination_encoded = label_encoder.transform([destination])[0]
    
    # Create a dummy input for LSTM with the correct shape
    max_length = 6  # Adjust this to match your model's expected input length
    dummy_input = np.array([[source_encoded] + [0] * (max_length - 1)])  # Ensure the input length matches the expected shape

    # Predict the path
    predicted_path_encoded = model.predict(dummy_input)
    predicted_path_encoded = np.argmax(predicted_path_encoded, axis=1)
    predicted_path = [index_to_label[idx] for idx in predicted_path_encoded]

    # Remove padding and create actual path
    actual_path = []
    for node in predicted_path:
        if node == destination:
            actual_path.append(node)
            break
        actual_path.append(node)
    if destination not in actual_path:
        actual_path.append(destination)
    
    number_of_hops = len(actual_path) - 1
    return actual_path, number_of_hops

# Main code to take input and provide the output
if __name__ == "__main__":
    source = input("Enter the source node: ").strip()
    destination = input("Enter the destination node: ").strip()
    
    path, hops = predict_path(source, destination)
    
    if isinstance(path, str):  # Error message case
        print(path)
    else:
        print(f"Predicted Path: {' > '.join(path)}")
        print(f"Number of Hops: {hops}")


Enter the source node:  B
Enter the destination node:  H


Predicted Path: I > H
Number of Hops: 1
