<a href="https://colab.research.google.com/github/a42437-ml/master_thesis_files/blob/main/ml_files/sip_features_tensorflow_pipeline_combined_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from tensorflow.keras.models import load_model

# Load model and files
model = load_model("covert_detector_model.h5")
df = pd.read_csv("new_sip_features.csv")
#expected_cols = pd.read_csv("training_columns.csv", header=None)[0].tolist()
# Correct if each column name is one row (as expected)
expected_cols = pd.read_csv("training_columns.csv", header=None).squeeze().tolist()

print(f"‚úÖ Loaded {len(df)} SIP samples.")

# One-hot encode new data
df_encoded = pd.get_dummies(df)

# Align with training columns
df_encoded = df_encoded.reindex(columns=expected_cols, fill_value=0)

print(f"‚úÖ Data aligned. Shape: {df_encoded.shape}")

# Predict covert traffic
predictions = model.predict(df_encoded)
df['covert_prediction'] = (predictions > 0.7).astype(int)

# Save results
df.to_csv("covert_alerts.csv", index=False)
print("‚úÖ Detection complete. Saved to covert_alerts.csv")

# Optional: Show summary
covert_count = df['covert_prediction'].sum()
print(f"‚ö†Ô∏è Detected {covert_count} covert messages out of {len(df)} total.")




‚úÖ Loaded 584 SIP samples.
‚úÖ Data aligned. Shape: (584, 27969)
[1m19/19[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 11ms/step
‚úÖ Detection complete. Saved to covert_alerts.csv
‚ö†Ô∏è Detected 8 covert messages out of 584 total.


In [2]:
# Save full detection results
df.to_csv("detection_output.csv", index=False)
print("‚úÖ Detection complete. Results saved to detection_output.csv")

# Filter only covert messages
covert_df = df[df['covert_prediction'] == 1]



‚úÖ Detection complete. Results saved to detection_output.csv


In [3]:
# Extract key metadata columns (customize if your CSV uses different column names)
key_fields = ['src_ip', 'dst_ip', 'Call-ID', 'From', 'To']
available_keys = [col for col in key_fields if col in covert_df.columns]

if not available_keys:
    print("‚ö†Ô∏è No key SIP fields like src_ip, dst_ip, from, to found in dataset.")
else:
    # Show detected covert messages with metadata
    print("üö® Covert Messages Detected:")
    print(covert_df[available_keys + ['covert_prediction']].to_string(index=False))

    # Save for IDS rule updating
    covert_df[available_keys].to_csv("covert_alerts.csv", index=False)
    print("‚úÖ Key SIP fields of covert detections saved to covert_alerts.csv")

üö® Covert Messages Detected:
        src_ip          dst_ip                              Call-ID                                                           From                                           To  covert_prediction
38.242.140.216   178.18.244.71               1-14088@38.242.140.216 From: sipp <sip:11100@38.242.140.216:5080>;tag=14088SIPpTag001 To: service <sip:service@178.18.244.71:5080>                  1
 178.18.244.71 109.205.183.238 d2ee69b1-c300-123e-968f-00505658449a      From: "11100" <sip:11100@178.18.244.71>;tag=FD7XrZy62197N              To: <sip:22200@109.205.183.238>                  1
38.242.140.216   178.18.244.71               2-14088@38.242.140.216 From: sipp <sip:11101@38.242.140.216:5080>;tag=14088SIPpTag002 To: service <sip:service@178.18.244.71:5080>                  1
 178.18.244.71 109.205.183.238 d8ec6ba5-c300-123e-968f-00505658449a      From: "11101" <sip:11101@178.18.244.71>;tag=HZSFvN0DXKpDD              To: <sip:22201@109.205.183.238>              