# Login Behavior Context Enrichment

## Phase
Phase 2 â€” Identity Context Modeling

## Objective
Build behavioral baselines for authentication activity to support future anomaly detections.


In [9]:
import pandas as pd
from pathlib import Path


In [10]:
PROJECT_ROOT = Path(r"D:\soc-dashboard-suite-main\soc-dashboard-suite-main")

INPUT_PATH = PROJECT_ROOT / "data" / "normalized" / "auth" / "authentication_events_normalized.csv"
OUTPUT_PATH = PROJECT_ROOT / "data" / "enriched" / "authentication_with_context.csv"

auth_df = pd.read_csv(INPUT_PATH, parse_dates=["event_time"])
auth_df.head()


Unnamed: 0,event_time,user_id,user_role,source_ip,source_country,device_id,login_status,event_type
0,2025-01-27 15:48:00,user27,user,192.168.155.89,UK,device_18,success,authentication
1,2025-01-23 11:02:00,user13,user,192.168.122.32,DE,device_7,success,authentication
2,2025-01-12 05:45:00,user21,user,192.168.182.199,SG,device_3,success,authentication
3,2025-01-29 00:09:00,user26,user,192.168.107.224,IN,device_17,success,authentication
4,2025-01-24 01:26:00,user41,executive,192.168.148.27,IN,device_9,failed,authentication


In [11]:
auth_df["hour"] = auth_df["event_time"].dt.hour
auth_df["date"] = auth_df["event_time"].dt.date


In [12]:
user_hour_baseline = (
    auth_df.groupby("user_id")["hour"]
    .agg(["median", "std"])
    .reset_index()
    .rename(columns={"median": "baseline_hour", "std": "hour_std"})
)

auth_df = auth_df.merge(user_hour_baseline, on="user_id", how="left")
auth_df.head()


Unnamed: 0,event_time,user_id,user_role,source_ip,source_country,device_id,login_status,event_type,hour,date,baseline_hour,hour_std
0,2025-01-27 15:48:00,user27,user,192.168.155.89,UK,device_18,success,authentication,15,2025-01-27,13.0,4.77906
1,2025-01-23 11:02:00,user13,user,192.168.122.32,DE,device_7,success,authentication,11,2025-01-23,11.5,4.906
2,2025-01-12 05:45:00,user21,user,192.168.182.199,SG,device_3,success,authentication,5,2025-01-12,12.0,5.223271
3,2025-01-29 00:09:00,user26,user,192.168.107.224,IN,device_17,success,authentication,0,2025-01-29,12.5,4.940157
4,2025-01-24 01:26:00,user41,executive,192.168.148.27,IN,device_9,failed,authentication,1,2025-01-24,11.0,4.945798


In [13]:
auth_df["hour_deviation"] = abs(auth_df["hour"] - auth_df["baseline_hour"])

auth_df["unusual_login_hour"] = auth_df["hour_deviation"] > (auth_df["hour_std"] * 2)


In [14]:
first_seen_location = (
    auth_df.groupby("user_id")["source_country"]
    .first()
    .reset_index()
    .rename(columns={"source_country": "first_seen_country"})
)

auth_df = auth_df.merge(first_seen_location, on="user_id", how="left")

auth_df["new_login_country"] = auth_df["source_country"] != auth_df["first_seen_country"]


In [15]:
auth_df["failed_login"] = auth_df["login_status"].str.lower() == "failed"


In [16]:
auth_df.to_csv(OUTPUT_PATH, index=False)
print("Saved enriched authentication data to:", OUTPUT_PATH)


Saved enriched authentication data to: D:\soc-dashboard-suite-main\soc-dashboard-suite-main\data\enriched\authentication_with_context.csv
