# Alert Scoring & Prioritization

## Phase
Phase 4 â€” Detection Engineering

## Objective
Assign a risk score to correlated alerts based on contributing risk factors,
simulating SOC alert prioritization.


In [2]:
import pandas as pd
from pathlib import Path


In [3]:
PROJECT_ROOT = Path(r"D:\soc-dashboard-suite-main\soc-dashboard-suite-main")

INPUT_PATH = PROJECT_ROOT / "data" / "enriched" / "alerts_correlated_deduplicated.csv"
OUTPUT_PATH = PROJECT_ROOT / "data" / "enriched" / "alerts_scored_prioritized.csv"

alerts_df = pd.read_csv(INPUT_PATH, parse_dates=["event_time"])

alerts_df.head()


Unnamed: 0,event_time,sender_email,sender_domain,recipient_email,user_role,domain_rarity,is_first_seen_day,time_behavior,severity,alert_reason,detection_id,source_rule_x,source_rule_y
0,NaT,cramer@cadvision.com,cadvision.com,"john.zufferli@enron.com, demers.nicolas@enron....",admin,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI..."
1,NaT,mark.shea@bankofamerica.com,bankofamerica.com,"jesus.melendrez@enron.com, john.griffith@enron...",executive,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI..."
2,2000-09-07 02:17:00-07:00,tmcauliff@isda.org,isda.org,"arothrock@pattonboggs.com, csteffensen@isda.or...",executive,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI..."
3,NaT,penn_eric@smtpgate.salkeiz.k12.or.us,smtpgate.salkeiz.k12.or.us,"mark.guzman@enron.com, jones@mca-architects.co...",admin,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI..."
4,NaT,matt.hsu@interwoven.com,interwoven.com,mleslie@amgen.com,admin,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI..."


In [4]:
severity_base = {
    "medium": 40,
    "high": 70,
    "critical": 90
}

alerts_df["risk_score"] = alerts_df["severity"].map(severity_base)


In [5]:
if "triggered_rules" in alerts_df.columns:
    alerts_df["rule_count"] = alerts_df["triggered_rules"].apply(
        lambda x: len(str(x).split(",")) if pd.notna(x) else 1
    )
else:
    alerts_df["rule_count"] = 1

alerts_df["risk_score"] += alerts_df["rule_count"] * 5


In [6]:
if "time_behavior" in alerts_df.columns:
    alerts_df.loc[alerts_df["time_behavior"] == "off_hours", "risk_score"] += 5

if "domain_rarity" in alerts_df.columns:
    alerts_df.loc[alerts_df["domain_rarity"] == "rare", "risk_score"] += 5

if "is_first_seen_day" in alerts_df.columns:
    alerts_df.loc[alerts_df["is_first_seen_day"] == True, "risk_score"] += 5


In [7]:
alerts_df["risk_score"] = alerts_df["risk_score"].clip(upper=100)


In [8]:
def label_priority(score):
    if score >= 90:
        return "P1"
    elif score >= 75:
        return "P2"
    elif score >= 60:
        return "P3"
    else:
        return "P4"

alerts_df["priority"] = alerts_df["risk_score"].apply(label_priority)


In [9]:
alerts_df[["severity", "rule_count", "risk_score", "priority"]].head()


Unnamed: 0,severity,rule_count,risk_score,priority
0,critical,1,100,P1
1,critical,1,100,P1
2,critical,1,100,P1
3,critical,1,100,P1
4,critical,1,100,P1


In [10]:
alerts_df.to_csv(OUTPUT_PATH, index=False)
print("Saved prioritized alert queue to:", OUTPUT_PATH)


Saved prioritized alert queue to: D:\soc-dashboard-suite-main\soc-dashboard-suite-main\data\enriched\alerts_scored_prioritized.csv


In [11]:
alerts_df["priority"].value_counts()


priority
P4    22032
P2    11513
P1     1839
P3     1249
Name: count, dtype: int64

In [12]:
alerts_df[alerts_df["priority"] == "P1"].head()


Unnamed: 0,event_time,sender_email,sender_domain,recipient_email,user_role,domain_rarity,is_first_seen_day,time_behavior,severity,alert_reason,detection_id,source_rule_x,source_rule_y,risk_score,rule_count,priority
0,NaT,cramer@cadvision.com,cadvision.com,"john.zufferli@enron.com, demers.nicolas@enron....",admin,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI...",100,1,P1
1,NaT,mark.shea@bankofamerica.com,bankofamerica.com,"jesus.melendrez@enron.com, john.griffith@enron...",executive,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI...",100,1,P1
2,2000-09-07 02:17:00-07:00,tmcauliff@isda.org,isda.org,"arothrock@pattonboggs.com, csteffensen@isda.or...",executive,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI...",100,1,P1
3,NaT,penn_eric@smtpgate.salkeiz.k12.or.us,smtpgate.salkeiz.k12.or.us,"mark.guzman@enron.com, jones@mca-architects.co...",admin,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI...",100,1,P1
4,NaT,matt.hsu@interwoven.com,interwoven.com,mleslie@amgen.com,admin,rare,False,off_hours,critical,Rare/first-seen external domain + unusual timi...,DET_03_MULTI_SIGNAL_EMAIL_RISK,DET_03_MULTI_SIGNAL_EMAIL_RISK,"DET_01_SUSPICIOUS_SENDER_DOMAIN,DET_02_HIGH_RI...",100,1,P1
