# üìå 05 ‚Äì Predicting Anomalies on New Logs

‚ÄúThis notebook loads the trained models and performs anomaly prediction on new HTTP logs, demonstrating real-world detection capability.‚Äù


*‚Äì Imports :*

In [13]:
import sys, os
sys.path.append(os.path.abspath("../src"))

import pandas as pd
import joblib
import tempfile



*‚Äî Parser Apache int√©gr√© (puis import predict_file seulement) :*

In [14]:
import re

# Apache Common Log Regex
LOG_PATTERN = r'(\S+) - - \[(.*?)\] "(\S+) (.*?) (\S+)" (\d{3}) (\S+) "(.*?)" "(.*?)"'

def parse_apache_log_lines(lines):
    rows = []
    for line in lines:
        match = re.match(LOG_PATTERN, line)
        if match:
            rows.append({
                "method": match.group(3),
                "url": match.group(4),
                "protocol": match.group(5),
                "user_agent": match.group(9),
                "cookie": "",
                "content_type": "",
                "content_length": len(match.group(4)),
                "content": "",
                "body": ""
            })
    return pd.DataFrame(rows)

# Import predict_file from your predict.py
from predict import predict_file
print("predict_file imported successfully.")


predict_file imported successfully.


‚≠ê Test ‚Äî Prediction on real Apache logs :

*‚Äî Lire & parser ton fichier Apache :*

In [15]:
log_path = "../data/sample_access.log"

with open(log_path, "r", encoding="utf-8", errors="ignore") as f:
    lines = f.readlines()

apache_df = parse_apache_log_lines(lines)
print("Parsed rows:", len(apache_df))

apache_df.head()


Parsed rows: 10


Unnamed: 0,method,url,protocol,user_agent,cookie,content_type,content_length,content,body
0,GET,/index.html,HTTP/1.1,Mozilla/5.0 (Windows NT 10.0; Win64; x64),,,11,,
1,GET,/login?user=admin,HTTP/1.1,Mozilla/5.0 (X11; Linux x86_64),,,17,,
2,POST,/login,HTTP/1.1,curl/7.79.1,,,6,,
3,GET,/search?q=shoes,HTTP/1.1,Mozilla/5.0 (Macintosh; Intel Mac OS X 12_4),,,15,,
4,GET,/admin/delete?id=5,HTTP/1.1,Mozilla/5.0 (Windows NT 10.0; Win64; x64),,,18,,


*‚Äì‚Äî Adapter les colonnes pour correspondre √† predict.py*

In [16]:
required_cols = [
    "url", "method", "user_agent",
    "cookie", "content_type", 
    "content_length", "body", "content"
]

apache_df.columns = [c.lower() for c in apache_df.columns]

for col in required_cols:
    if col not in apache_df.columns:
        apache_df[col] = ""

apache_df = apache_df[required_cols]

apache_df.head()



Unnamed: 0,url,method,user_agent,cookie,content_type,content_length,body,content
0,/index.html,GET,Mozilla/5.0 (Windows NT 10.0; Win64; x64),,,11,,
1,/login?user=admin,GET,Mozilla/5.0 (X11; Linux x86_64),,,17,,
2,/login,POST,curl/7.79.1,,,6,,
3,/search?q=shoes,GET,Mozilla/5.0 (Macintosh; Intel Mac OS X 12_4),,,15,,
4,/admin/delete?id=5,GET,Mozilla/5.0 (Windows NT 10.0; Win64; x64),,,18,,


*‚Äî Sauvegarder ‚Üí appeler predict_file() :*

In [17]:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
apache_df.to_csv(temp_file.name, index=False, encoding="utf-8")

print("Temp CSV created at:", temp_file.name)

# Predict using your actual predict.py
results = predict_file(temp_file.name)

print("\n=== Predictions (Preview) ===")
results.head()


Temp CSV created at: C:\Users\ok\AppData\Local\Temp\tmpgkzdeeld.csv

=== Loading file: C:\Users\ok\AppData\Local\Temp\tmpgkzdeeld.csv ===
Extracted 15 features.
Loading trained models...

=== Prediction sample ===
                  url  RF_Prediction  ISO_Prediction
0         /index.html              1               0
1   /login?user=admin              0               0
2              /login              0               0
3     /search?q=shoes              0               0
4  /admin/delete?id=5              1               0

=== Predictions (Preview) ===


Unnamed: 0,url,method,user_agent,cookie,content_type,content_length,body,content,RF_Prediction,ISO_Prediction
0,/index.html,GET,Mozilla/5.0 (Windows NT 10.0; Win64; x64),,,11,,,1,0
1,/login?user=admin,GET,Mozilla/5.0 (X11; Linux x86_64),,,17,,,0,0
2,/login,POST,curl/7.79.1,,,6,,,0,0
3,/search?q=shoes,GET,Mozilla/5.0 (Macintosh; Intel Mac OS X 12_4),,,15,,,0,0
4,/admin/delete?id=5,GET,Mozilla/5.0 (Windows NT 10.0; Win64; x64),,,18,,,1,0
