# Cross-Signal Detection — Email + Authentication Correlation

## Phase
Phase 2 — Multi-Source Threat Correlation

## Objective
Identify potential account compromise by correlating suspicious email events with anomalous login behavior.


In [15]:
import pandas as pd
from pathlib import Path


In [16]:
PROJECT_ROOT = Path(r"D:\soc-dashboard-suite-main\soc-dashboard-suite-main")

EMAIL_ALERTS_PATH = PROJECT_ROOT / "data" / "enriched" / "alerts_scored_prioritized.csv"
AUTH_ALERTS_PATH = PROJECT_ROOT / "data" / "enriched" / "auth_alerts.csv"

OUTPUT_PATH = PROJECT_ROOT / "data" / "enriched" / "cross_signal_alerts.csv"

email_alerts = pd.read_csv(EMAIL_ALERTS_PATH, parse_dates=["event_time"])
auth_alerts = pd.read_csv(AUTH_ALERTS_PATH, parse_dates=["event_time"])


In [17]:
# Remove timezone info to make timestamps comparable
email_alerts["event_time"] = pd.to_datetime(email_alerts["event_time"], errors="coerce").dt.tz_localize(None)
auth_alerts["event_time"] = pd.to_datetime(auth_alerts["event_time"], errors="coerce").dt.tz_localize(None)


In [27]:
high_risk_email = email_alerts[email_alerts["priority"].isin(["P1", "P2"])].copy()

high_risk_email = high_risk_email[[
    "recipient_email",
    "event_time",
    "priority",
]]


In [23]:
auth_alerts = auth_alerts[[
    "event_time",
    "user_id",
    "detection_id",
    "severity",
    "alert_reason"
]]
auth_alerts.head()


Unnamed: 0,event_time,user_id,detection_id,severity,alert_reason
0,2025-01-02 00:25:00,user1,AUTH_UNUSUAL_PRIV_LOGIN,high,Privileged user logged in at unusual hour
1,2025-01-13 02:05:00,user10,AUTH_UNUSUAL_PRIV_LOGIN,high,Privileged user logged in at unusual hour
2,2025-01-04 01:21:00,user11,AUTH_UNUSUAL_PRIV_LOGIN,high,Privileged user logged in at unusual hour
3,2025-01-20 03:38:00,user11,AUTH_UNUSUAL_PRIV_LOGIN,high,Privileged user logged in at unusual hour
4,2025-01-23 00:44:00,user13,AUTH_UNUSUAL_PRIV_LOGIN,high,Privileged user logged in at unusual hour


In [28]:
DIRECTORY_PATH = PROJECT_ROOT / "data" / "enriched" / "identity_directory.csv"
directory = pd.read_csv(DIRECTORY_PATH)

# Merge email alerts with identity directory
high_risk_email = high_risk_email.merge(directory, on="recipient_email", how="left")

# Ensure final column is named 'user_id'
if "user_id_y" in high_risk_email.columns:
    high_risk_email.rename(columns={"user_id_y": "user_id"}, inplace=True)

# Drop any accidental duplicates
high_risk_email = high_risk_email[[
    "recipient_email",
    "user_id",
    "event_time",
    "priority"
]]


In [29]:
print(high_risk_email.columns)


Index(['recipient_email', 'user_id', 'event_time', 'priority'], dtype='object')


In [32]:
import random

# Pick 5 random high-risk email alerts
sample_emails = high_risk_email.sample(5, random_state=42)

injected_rows = []

for _, row in sample_emails.iterrows():
    injected_rows.append({
        "event_time": row["event_time"] + pd.Timedelta(hours=random.randint(1, 12)),
        "user_id": row["user_id"],
        "user_role": "admin",
        "source_country": "RU",
        "device_id": "unknown_device",
        "login_status": "success",
        "detection_id": "AUTH_INJECTED_TEST",
        "severity": "high",
        "alert_reason": "Injected test login after phishing email"
    })

# Add to auth_alerts
auth_alerts = pd.concat([auth_alerts, pd.DataFrame(injected_rows)], ignore_index=True)

print("Injected correlated login events:", len(injected_rows))


Injected correlated login events: 5


In [37]:
correlated_alerts = []

for _, email_row in high_risk_email.iterrows():
    user = email_row["user_id"]
    email_time = email_row["event_time"]

    candidate_logins = auth_alerts[
        (auth_alerts["user_id"] == user) &
        (auth_alerts["event_time"] > email_time) &
        (auth_alerts["event_time"] <= email_time + pd.Timedelta(hours=24))
    ]

    for _, login_row in candidate_logins.iterrows():
        correlated_alerts.append({
            "user_id": user,
            "email_alert_time": email_time,
            "login_alert_time": login_row["event_time"],
            "login_detection": login_row["detection_id"],
            "severity": "critical",
            "alert_reason": "Suspicious login activity following high-risk email"
        })

cross_signal_df = pd.DataFrame(correlated_alerts)
cross_signal_df.head(16)


Unnamed: 0,user_id,email_alert_time,login_alert_time,login_detection,severity,alert_reason
0,user43,2001-07-16 13:10:19,2001-07-17 01:10:19,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
1,user43,2001-07-16 14:45:19,2001-07-17 01:10:19,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
2,user43,2001-07-16 15:42:58,2001-07-17 01:10:19,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
3,user43,2001-07-16 04:00:41,2001-07-17 01:10:19,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
4,user43,2001-07-16 06:52:06,2001-07-17 01:10:19,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
5,user43,2001-07-16 05:14:08,2001-07-17 01:10:19,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
6,user43,2001-07-16 16:35:29,2001-07-17 01:10:19,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
7,user43,2001-10-22 19:26:42,2001-10-22 23:15:58,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
8,user43,2001-10-22 13:55:11,2001-10-22 23:15:58,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...
9,user43,2001-10-22 12:15:58,2001-10-22 23:15:58,AUTH_INJECTED_TEST,critical,Suspicious login activity following high-risk ...


In [34]:
len(cross_signal_df)


16

In [35]:
cross_signal_df.to_csv(OUTPUT_PATH, index=False)
print("Saved cross-signal alerts to:", OUTPUT_PATH)


Saved cross-signal alerts to: D:\soc-dashboard-suite-main\soc-dashboard-suite-main\data\enriched\cross_signal_alerts.csv
