# Stream-Based Email Detection

## Phase
Phase 3 â€” Incremental Detection

## Objective
Run detection logic on newly streamed telemetry events.


In [None]:
from pathlib import Path
import pandas as pd
import time

PROJECT_ROOT = Path("D:/soc-dashboard-suite-main/soc-dashboard-suite-main")

STREAM_INPUT = PROJECT_ROOT / "data" / "stream" / "email_event_stream_enriched.csv"
DETECTION_OUTPUT = PROJECT_ROOT / "data" / "stream" / "email_detection_stream.csv"


In [None]:
empty_df = pd.DataFrame()
empty_df.to_csv(DETECTION_OUTPUT, index=False)


In [None]:
processed_count = 0

while True:
    try:
        stream_df = pd.read_csv(STREAM_INPUT)

        if "event_time" not in stream_df.columns:
            raise Exception("Stream not initialized yet")

        stream_df["event_time"] = pd.to_datetime(
            stream_df["event_time"],
            errors="coerce",
            utc=True
        )

        # Drop incomplete mid-write rows
        stream_df = stream_df.dropna(subset=["event_time"])

        if len(stream_df) > processed_count:
            new_events = stream_df.iloc[processed_count:].copy()

            suspicious_events = new_events[
                (new_events["sender_domain"].astype(str).str.contains("suspicious", na=False)) |
                (new_events["is_first_seen_day"] == True)
            ]

            if not suspicious_events.empty:
                suspicious_events.to_csv(
                    DETECTION_OUTPUT,
                    mode="a",
                    header=False,
                    index=False
                )

            processed_count = len(stream_df)
            print(f"Processed up to {processed_count} events")

        time.sleep(2)

    except Exception as e:
        print("Waiting for stream...", e)
        time.sleep(2)
