# Detection 01 — Suspicious Sender Domain

## Phase
Phase 4 — Detection Engineering

## Objective
Identify emails originating from domains that are:
- Rare or uncommon in historical communication
- First-seen in the environment

This detection models early-stage phishing or newly introduced attacker infrastructure.


In [1]:
import pandas as pd
from pathlib import Path


In [2]:
PROJECT_ROOT = Path(r"D:\soc-dashboard-suite-main\soc-dashboard-suite-main")

INPUT_PATH = PROJECT_ROOT / "data" / "enriched" / "email_with_temporal_context.csv"
ALERT_OUTPUT_PATH = PROJECT_ROOT / "data" / "enriched" / "alerts_suspicious_sender.csv"

email_df = pd.read_csv(INPUT_PATH, parse_dates=["event_time"])


In [4]:
email_df["external_sender"] = email_df["sender_domain"] != email_df["recipient_domain"]


In [5]:
suspicious_sender_mask = (
    (
        (email_df["domain_rarity"] == "rare") |
        (email_df["is_first_seen_day"] == True)
    ) &
    (email_df["external_sender"] == True)
)

alerts_df = email_df[suspicious_sender_mask].copy()

len(alerts_df)


54063

In [6]:
alerts_df["detection_id"] = "DET_01_SUSPICIOUS_SENDER_DOMAIN"
alerts_df["severity"] = alerts_df["user_role"].map({
    "admin": "high",
    "executive": "high",
    "normal": "medium"
})

alerts_df["alert_reason"] = (
    "Email from rare or first-seen external domain"
)


In [7]:
alert_fields = [
    "event_time",
    "sender_email",
    "sender_domain",
    "recipient_email",
    "user_role",
    "domain_rarity",
    "is_first_seen_day",
    "time_behavior",
    "severity",
    "alert_reason",
    "detection_id"
]

alerts_df = alerts_df[alert_fields]
alerts_df.head()


Unnamed: 0,event_time,sender_email,sender_domain,recipient_email,user_role,domain_rarity,is_first_seen_day,time_behavior,severity,alert_reason,detection_id
601,NaT,1.11913372.-2@multexinvestornetwork.com,multexinvestornetwork.com,pallen@enron.com,normal,rare,False,off_hours,medium,Email from rare or first-seen external domain,DET_01_SUSPICIOUS_SENDER_DOMAIN
612,NaT,aod@newsdata.com,newsdata.com,western.price.survey.contacts@ren-6.cais.net,admin,rare,False,off_hours,high,Email from rare or first-seen external domain,DET_01_SUSPICIOUS_SENDER_DOMAIN
676,NaT,webmaster@earnings.com,earnings.com,pallen@enron.com,normal,rare,False,off_hours,medium,Email from rare or first-seen external domain,DET_01_SUSPICIOUS_SENDER_DOMAIN
698,NaT,yild@zdemail.zdlists.com,zdemail.zdlists.com,pallen@enron.com,normal,rare,False,off_hours,medium,Email from rare or first-seen external domain,DET_01_SUSPICIOUS_SENDER_DOMAIN
709,NaT,bounce-news-932653@lists.autoweb.com,lists.autoweb.com,pallen@enron.com,normal,rare,False,off_hours,medium,Email from rare or first-seen external domain,DET_01_SUSPICIOUS_SENDER_DOMAIN


In [8]:
alerts_df.to_csv(ALERT_OUTPUT_PATH, index=False)
print("Saved alerts to:", ALERT_OUTPUT_PATH)


Saved alerts to: D:\soc-dashboard-suite-main\soc-dashboard-suite-main\data\enriched\alerts_suspicious_sender.csv


In [9]:
alerts_df["severity"].value_counts()


severity
medium    49381
high       4682
Name: count, dtype: int64