# Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
import matplotlib.pyplot as plt

# Load Data

In [None]:
import pandas as pd

# Load the dataset
file_path = r'C:\Users\CombinedALL.csv'
#---------------------------------------------
data = pd.read_csv(file_path)

# Drop the 'Time' column
data = data.drop(columns=['Time'])

data = data[data['SourceISP'] != 'Rogers']

# Make 'Adjusted Time' the first column by reordering
columns = ['AdjustedTime'] + [col for col in data.columns if col != 'AdjustedTime']
data = data[columns]

# Data Mapping

In [None]:
# Create the ServerIP column to identify server IP from SourceIP or DestinationIP
data['ServerIP'] = data['SourceIP']

# Map each unique ServerIP to a ServerID
unique_servers = data['ServerIP'].unique()
server_mapping = {ip: idx for idx, ip in enumerate(unique_servers, start=1)}
data['ServerID'] = data['ServerIP'].map(server_mapping)
data['ServerID'] -= 1  # Zero-based indexing for classification targets

# Convert 'AdjustedTime' to datetime
data['AdjustedTime'] = pd.to_datetime(data['AdjustedTime'])
start_time = data['AdjustedTime'].min()
data['AdjustedTimeSeconds'] = (data['AdjustedTime'] - start_time).dt.total_seconds()

# Select features and the target (ServerID)
features = data[['DataLength', 'ARTT', 'SourceLongitude', 'SourceLatitude', 
                 'DestinationLongitude', 'DestinationLatitude']]
label = data['ServerID']


# Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.15, random_state=42)

print("Number of rows in X_train:", X_train.shape[0])
print("Number of rows in X_test:", X_test.shape[0])
print("Number of rows in y_train:", len(y_train))
print("Number of rows in y_test:", len(y_test))

# Normalization

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Checking for NaN or infinite values in X_train_scaled:", np.isnan(X_train_scaled).any(), np.isinf(X_train_scaled).any())
print("Checking for NaN or infinite values in X_test_scaled:", np.isnan(X_test_scaled).any(), np.isinf(X_test_scaled).any())
print('Input shape:', (X_train_scaled.shape[1],))
print('Unique servers:', (len(unique_servers)))

# DNN Model Defintion

In [None]:
# Define the DNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='tanh', kernel_regularizer=tf.keras.regularizers.l2(0.001), input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(32, activation='tanh', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(16, activation='tanh', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(len(unique_servers), activation='softmax')
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Add EarlyStopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True
)

# Training

In [None]:
# Train the model
history = model.fit(X_train_scaled, y_train, epochs=100, validation_split=0.176, batch_size=32, callbacks=[early_stopping], verbose=1)

# Evaluate

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print("Model accuracy on test set:", accuracy)

# Predict and decode

In [None]:
# Predict and map server IDs for visualization
y_pred = model.predict(X_test_scaled)
y_pred_labels = np.argmax(y_pred, axis=1)  # Map back to ServerIDs
y_test_array = y_test.to_numpy()  # Convert y_test to array for comparison

In [None]:
###################################################
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Scale the entire dataset
features_scaled = scaler.transform(features)

# Predict for the entire dataset
y_pred_full = model.predict(features_scaled)
y_pred_full_labels = np.argmax(y_pred_full, axis=1)  # Predicted ServerIDs
y_true_full = label.to_numpy()  # True ServerIDs

# Create a reverse mapping from ServerID back to ISP
serverid_to_isp = data.set_index('ServerID')['SourceISP'].to_dict()

# Map each unique ISP to a numerical label for the confusion matrix display
unique_isps = sorted(data['SourceISP'].unique())
isp_to_num = {isp: idx for idx, isp in enumerate(unique_isps, start=1)}
num_to_isp = {idx: isp for isp, idx in isp_to_num.items()}

# Map y_true_full and y_pred_full_labels to numerical ISP labels using the reverse and ISP mappings
y_true_isp_full = [isp_to_num[serverid_to_isp.get(server_id, 'Unknown ISP')] for server_id in y_true_full]
y_pred_isp_full = [isp_to_num[serverid_to_isp.get(server_id, 'Unknown ISP')] for server_id in y_pred_full_labels]

# Generate the numerical ISP-based confusion matrix
isp_labels_num = sorted(isp_to_num.values())  # Get unique numerical ISP labels
conf_matrix_full = confusion_matrix(y_true_isp_full, y_pred_isp_full, labels=isp_labels_num)

import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

# Plot the confusion matrix without annotations or legend
plt.figure(figsize=(8, 6))  # Increased figure size for better readability
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=isp_labels_num)
disp.plot(cmap=plt.cm.Blues, ax=plt.gca(), colorbar=False)  # Disable colorbar if not needed

# # Remove the text annotations (numbers inside the squares)
# for text in plt.gca().texts:
#     text.set_visible(False)

# Add title and customize the appearance
plt.xticks(rotation=45, fontsize=12)  # Rotate x-axis labels for clarity
plt.yticks(fontsize=12)  # Adjust y-axis label font size
plt.xlabel("Assigned Server", fontsize=14, labelpad=10)
plt.ylabel("Best Server", fontsize=14, labelpad=10)

plt.tight_layout()
plt.show()


In [None]:
# Save the results
output_file = r'C:\Users\DNNPredictions.csv'
results_df = pd.DataFrame({
    'True ServerID': y_test_array,
    'Predicted ServerID': y_pred_labels
})
results_df.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")