# Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load Data

In [None]:
# Load and preprocess data
file_path = r'C:\Users\File.csv'
data = pd.read_csv(file_path)
data = data.dropna(subset=['Org'])

# Data Mapping

In [None]:
# Map each unique 'Source' IP to a Server ID
unique_servers = data['Source'].unique()
server_mapping = {ip: idx for idx, ip in enumerate(unique_servers)}
data['ServerID'] = data['Source'].map(server_mapping)

# Sort & Sequence

In [None]:
# Convert AdjustedTime to numeric format
data['AdjustedTime'] = pd.to_datetime(data['AdjustedTime']).astype('int64') // 10**9  # Convert to seconds

# Encode categorical columns
label_encoders = {}
categorical_cols = ['Protocol', 'Connection', 'User', 'Org']
for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

# Define feature and target columns
features = ['AdjustedTime', 'Protocol', 'Connection', 'User', 'Length', 'ARTT', 'Longitude', 'Latitude', 'Org']
labels = 'ServerID'

# Scale features
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

# Group by user to create sequences
user_groups = data.groupby('User')
X_sequences = []
y_sequences = []
sequence_length = 60

for user, group in user_groups:
    user_features = group[features].values
    user_labels = group[labels].values
    
    # Create rolling sequences
    for i in range(len(user_features) - sequence_length + 1):
        X_sequences.append(user_features[i:i + sequence_length])
        y_sequences.append(user_labels[i + sequence_length - 1])  # Target is last ID in the sequence

# Convert lists to numpy arrays
X_sequences = pad_sequences(X_sequences, maxlen=sequence_length, dtype='float32')
y_sequences = np.array(y_sequences)



# Train-Test Split

In [None]:
# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.15, random_state=42)


# GRU Model Defintion

In [None]:
# Define GRU model
model = tf.keras.Sequential([
    tf.keras.layers.GRU(64, activation='tanh', input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='tanh'),
    tf.keras.layers.Dense(len(unique_servers), activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_split=0.176, batch_size=32, verbose=1)

# Evaluate

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Model accuracy on test set:", accuracy)