<a href="https://colab.research.google.com/github/hawa1983/Traffic-Data-Bank/blob/main/trafficdatabank.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from pathlib import Path

# --------- Config ----------
CSV_PATH = "Port_Jefferson_LP_Count_Cleaned.csv"

PLATE_COL = "Plate"
LOCATION_COL = "Location"
DIRECTION_COL = "Direction"
TIME_COL = "Time"              # <- in file
TIMESTAMP_COL = "Timestamp"    # <- will be created

# Use any single date; you said same date is fine
BASE_DATE = "2025-01-01"

WINDOW_MIN = 15
# ---------------------------

def normalize_text(s):
    if pd.isna(s):
        return s
    return str(s).strip().lower()

def appears_within(df_plate, idx, pairs, window_min=15):
    t0 = df_plate.loc[idx, TIMESTAMP_COL]
    mask = (df_plate[TIMESTAMP_COL] > t0) & (df_plate[TIMESTAMP_COL] <= t0 + pd.Timedelta(minutes=window_min))
    if not mask.any():
        return False
    future = df_plate.loc[mask, [LOCATION_COL, DIRECTION_COL]]
    for loc, direc in pairs:
        if ((future[LOCATION_COL] == loc) & (future[DIRECTION_COL] == direc)).any():
            return True
    return False

def no_other_appearance_within(df_plate, idx, window_min=15):
    t0 = df_plate.loc[idx, TIMESTAMP_COL]
    loc0 = df_plate.loc[idx, LOCATION_COL]
    mask = (df_plate[TIMESTAMP_COL] > t0) & (df_plate[TIMESTAMP_COL] <= t0 + pd.Timedelta(minutes=window_min))
    if not mask.any():
        return True
    future = df_plate.loc[mask, [LOCATION_COL]]
    return (future[LOCATION_COL] == loc0).all()

def classify_observation(row, df_plate, idx):
    loc = row[LOCATION_COL]
    direc = row[DIRECTION_COL]

    # Rule 1
    if ((loc == "woodhull ave" and direc == "northbound") or
        (loc == "lincoln ave" and direc == "northbound")):
        pairs = [("ardmer dr", "eastbound"), ("chereb lane", "eastbound")]
        return "Pass Thru" if appears_within(df_plate, idx, pairs, WINDOW_MIN) else "Resident Entry"

    # Rule 2
    if ((loc == "ardmer dr" and direc == "westbound") or
        (loc == "chereb lane" and direc == "westbound")):
        pairs = [("woodhull ave", "southbound"),
                 ("lincoln ave", "southbound"),
                 ("norwood ave", "westbound")]
        return "Pass Thru" if appears_within(df_plate, idx, pairs, WINDOW_MIN) else "Resident Entry"

    # Rule 3
    if (loc == "norwood ave" and direc == "eastbound"):
        pairs = [("ardmer dr", "eastbound"), ("chereb lane", "eastbound")]
        return "Pass Thru" if appears_within(df_plate, idx, pairs, WINDOW_MIN) else "HWY Bound or Resident Entry"

    # Rule 4 (exits)
    if ((loc == "woodhull ave" and direc == "southbound") or
        (loc == "lincoln ave" and direc == "southbound") or
        (loc == "ardmer dr" and direc == "eastbound") or
        (loc == "chereb lane" and direc == "eastbound")):
        if no_other_appearance_within(df_plate, idx, WINDOW_MIN):
            return "Resident Exit"

    return "Unclassified"

def final_plate_classification(per_obs_classes):
    priority = ["Pass Thru", "Resident Exit", "Resident Entry", "HWY Bound or Resident Entry", "Unclassified"]
    for label in priority:
        if label in per_obs_classes:
            return label
    return "Unclassified"

def main():
    df = pd.read_csv(CSV_PATH)

    # Build Timestamp from single date + Time column (e.g., "6:00 AM")
    # Example result: "2025-01-01 06:00 AM"
    df[TIMESTAMP_COL] = pd.to_datetime(BASE_DATE + " " + df[TIME_COL].astype(str), errors="coerce")

    # Normalize for matching
    df["_loc_norm"] = df[LOCATION_COL].map(normalize_text)
    df["_dir_norm"] = df[DIRECTION_COL].map(normalize_text)

    # Use normalized for rule logic (keep originals in separate cols if you like)
    df[LOCATION_COL] = df["_loc_norm"]
    df[DIRECTION_COL] = df["_dir_norm"]

    # Sort by plate then time
    df = df.sort_values([PLATE_COL, TIMESTAMP_COL]).reset_index(drop=True)

    # Classify each observation
    classifications = []
    for plate, _ in df.groupby(PLATE_COL, sort=False):
        df_plate = df[df[PLATE_COL] == plate]  # already sorted
        for idx in df_plate.index:
            cls = classify_observation(df.loc[idx], df_plate, idx)
            classifications.append((idx, cls))

    df["Observation_Classification"] = pd.Series(dict(classifications))

    # Final per-plate
    plate_final = (
        df.groupby(PLATE_COL)["Observation_Classification"]
          .apply(lambda s: final_plate_classification(set(s.dropna())))
          .rename("Plate_Final_Classification")
          .reset_index()
    )

    # Output
    out_obs = Path(CSV_PATH).with_name("Port_Jefferson_LP_Classified_Observations.csv")
    out_plate = Path(CSV_PATH).with_name("Port_Jefferson_LP_Final_By_Plate.csv")

    df_out = df.drop(columns=["_loc_norm", "_dir_norm"])
    df_out.to_csv(out_obs, index=False)
    plate_final.to_csv(out_plate, index=False)

    print(f"Saved per-observation classifications to: {out_obs}")
    print(f"Saved final per-plate classifications to: {out_plate}")

if __name__ == "__main__":
    main()


Saved per-observation classifications to: Port_Jefferson_LP_Classified_Observations.csv
Saved final per-plate classifications to: Port_Jefferson_LP_Final_By_Plate.csv


## New

In [None]:
import pandas as pd
from pathlib import Path

# --------- Config ----------
CSV_PATH = "Port_Jefferson_LP_Count_Cleaned.csv"

PLATE_COL = "Plate"
LOCATION_COL = "Location"
DIRECTION_COL = "Direction"
PERIOD_COL = "Period"
TIME_COL = "Time"              # <- in file (e.g., "6:00 AM")
TIMESTAMP_COL = "Timestamp"    # <- we will create

# Use any single date; same date for all times is fine
BASE_DATE = "2025-01-01"

WINDOW_MIN = 15
# ---------------------------

def normalize_text(s):
    if pd.isna(s):
        return s
    return str(s).strip().lower()

def find_match_within(df_plate, idx, pairs, window_min=15):
    """
    Return (matched:bool, match_row:Series|None) for any of the (location, direction) pairs
    within the forward time window from this observation.
    """
    t0 = df_plate.loc[idx, TIMESTAMP_COL]
    mask = (df_plate[TIMESTAMP_COL] > t0) & (df_plate[TIMESTAMP_COL] <= t0 + pd.Timedelta(minutes=window_min))
    if not mask.any():
        return False, None
    future = df_plate.loc[mask]
    for loc, direc in pairs:
        hit = future[(future[LOCATION_COL] == loc) & (future[DIRECTION_COL] == direc)]
        if not hit.empty:
            # take earliest match
            j = hit.index[0]
            return True, df_plate.loc[j]
    return False, None

def no_other_appearance_within(df_plate, idx, window_min=15):
    t0 = df_plate.loc[idx, TIMESTAMP_COL]
    loc0 = df_plate.loc[idx, LOCATION_COL]
    mask = (df_plate[TIMESTAMP_COL] > t0) & (df_plate[TIMESTAMP_COL] <= t0 + pd.Timedelta(minutes=window_min))
    if not mask.any():
        return True
    future = df_plate.loc[mask, [LOCATION_COL]]
    return (future[LOCATION_COL] == loc0).all()

def classify_observation(row, df_plate, idx):
    loc = row[LOCATION_COL]
    direc = row[DIRECTION_COL]

    # Defaults for match details
    match = {
        "Match_Location": None,
        "Match_Direction": None,
        "Match_Timestamp": pd.NaT
    }

    # Rule 1: Entry S->N
    if ((loc == "woodhull ave" and direc == "northbound") or
        (loc == "lincoln ave" and direc == "northbound")):
        pairs = [("ardmer dr", "eastbound"), ("chereb lane", "eastbound")]
        ok, mrow = find_match_within(df_plate, idx, pairs, WINDOW_MIN)
        if ok:
            match.update({
                "Match_Location": mrow[LOCATION_COL],
                "Match_Direction": mrow[DIRECTION_COL],
                "Match_Timestamp": mrow[TIMESTAMP_COL]
            })
            return "Pass Thru", match
        else:
            return "Resident Entry", match

    # Rule 2: Entry E->W
    if ((loc == "ardmer dr" and direc == "westbound") or
        (loc == "chereb lane" and direc == "westbound")):
        pairs = [("woodhull ave", "southbound"),
                 ("lincoln ave", "southbound"),
                 ("norwood ave", "westbound")]
        ok, mrow = find_match_within(df_plate, idx, pairs, WINDOW_MIN)
        if ok:
            match.update({
                "Match_Location": mrow[LOCATION_COL],
                "Match_Direction": mrow[DIRECTION_COL],
                "Match_Timestamp": mrow[TIMESTAMP_COL]
            })
            return "Pass Thru", match
        else:
            return "Resident Entry", match

    # Rule 3: Entry from West via Norwood EB
    if (loc == "norwood ave" and direc == "eastbound"):
        pairs = [("ardmer dr", "eastbound"), ("chereb lane", "eastbound")]
        ok, mrow = find_match_within(df_plate, idx, pairs, WINDOW_MIN)
        if ok:
            match.update({
                "Match_Location": mrow[LOCATION_COL],
                "Match_Direction": mrow[DIRECTION_COL],
                "Match_Timestamp": mrow[TIMESTAMP_COL]
            })
            return "Pass Thru", match
        else:
            return "HWY Bound or Resident Entry", match

    # Rule 4: Exit
    if ((loc == "woodhull ave" and direc == "southbound") or
        (loc == "lincoln ave" and direc == "southbound") or
        (loc == "ardmer dr" and direc == "eastbound") or
        (loc == "chereb lane" and direc == "eastbound")):
        if no_other_appearance_within(df_plate, idx, WINDOW_MIN):
            return "Resident Exit", match

    return "Unclassified", match

def final_plate_classification(per_obs_classes):
    # Priority for final roll-up
    priority = ["Pass Thru", "Resident Exit", "Resident Entry", "HWY Bound or Resident Entry", "Unclassified"]
    for label in priority:
        if label in per_obs_classes:
            return label
    return "Unclassified"

def main():
    df = pd.read_csv(CSV_PATH)

    # Build a Timestamp from BASE_DATE + Time (e.g., "6:00 AM")
    df[TIMESTAMP_COL] = pd.to_datetime(BASE_DATE + " " + df[TIME_COL].astype(str), errors="coerce")

    # Normalize for rule matching (keep originals too)
    df["_loc_norm"] = df[LOCATION_COL].map(normalize_text)
    df["_dir_norm"] = df[DIRECTION_COL].map(normalize_text)

    # Use normalized fields for rules
    df[LOCATION_COL] = df["_loc_norm"]
    df[DIRECTION_COL] = df["_dir_norm"]

    # Sort
    df = df.sort_values([PLATE_COL, TIMESTAMP_COL]).reset_index(drop=True)

    # Per-observation classification + match details
    obs_cls = []
    match_loc = []
    match_dir = []
    match_ts = []

    for plate, plate_grp in df.groupby(PLATE_COL, sort=False):
        df_plate = plate_grp  # already sorted slice
        for idx in df_plate.index:
            cls, match = classify_observation(df.loc[idx], df[df[PLATE_COL] == plate], idx)
            obs_cls.append((idx, cls))
            match_loc.append((idx, match["Match_Location"]))
            match_dir.append((idx, match["Match_Direction"]))
            match_ts.append((idx, match["Match_Timestamp"]))

    df["Observation_Classification"] = pd.Series(dict(obs_cls))
    df["Match_Location"] = pd.Series(dict(match_loc))
    df["Match_Direction"] = pd.Series(dict(match_dir))
    df["Match_Timestamp"] = pd.Series(dict(match_ts))

    # Plate-level final classification
    plate_final = (
        df.groupby(PLATE_COL)["Observation_Classification"]
          .apply(lambda s: final_plate_classification(set(s.dropna())))
          .rename("Plate_Final_Classification")
          .reset_index()
    )

    # Merge plate final back to each row
    df = df.merge(plate_final, on=PLATE_COL, how="left")

    # ---------- Output 1: full_tracking_classified.csv ----------
    full_tracking_cols = [
        PLATE_COL, LOCATION_COL, DIRECTION_COL, PERIOD_COL, TIME_COL, TIMESTAMP_COL,
        "Observation_Classification", "Plate_Final_Classification",
        "Match_Location", "Match_Direction", "Match_Timestamp"
    ]
    # Restore original-cased Location/Direction if desired
    # (You can comment these two lines if you prefer normalized in the export)
    if "_loc_norm" in df.columns and "_dir_norm" in df.columns:
        df["Location_Original"] = df["_loc_norm"]
        df["Direction_Original"] = df["_dir_norm"]

    out_full = "full_tracking_classified.csv"
    df[full_tracking_cols].to_csv(out_full, index=False)

    # ---------- Output 2: pass_thru_summary.csv ----------
    pass_thru = df[df["Observation_Classification"] == "Pass Thru"].copy()
    if not pass_thru.empty:
        pass_thru["Entry_Timestamp"] = pass_thru[TIMESTAMP_COL]
        pass_thru["Exit_Timestamp"] = pass_thru["Match_Timestamp"]
        pass_thru["Minutes_Between"] = (pass_thru["Exit_Timestamp"] - pass_thru["Entry_Timestamp"]).dt.total_seconds() / 60.0
        pass_thru["Entry_Point"] = pass_thru[LOCATION_COL].str.title() + " " + pass_thru[DIRECTION_COL].str.title()
        pass_thru["Exit_Point"] = pass_thru["Match_Location"].fillna("").str.title() + " " + pass_thru["Match_Direction"].fillna("").str.title()
        pass_thru["Path"] = pass_thru["Entry_Point"] + " -> " + pass_thru["Exit_Point"]

        pass_thru_cols = [
            PLATE_COL, "Entry_Timestamp", "Exit_Timestamp", "Minutes_Between",
            "Entry_Point", "Exit_Point", "Path"
        ]
        pass_thru = pass_thru.sort_values([PLATE_COL, "Entry_Timestamp"])
        pass_thru[pass_thru_cols].to_csv("pass_thru_summary.csv", index=False)
    else:
        # write empty schema if none
        pd.DataFrame(columns=[
            PLATE_COL, "Entry_Timestamp", "Exit_Timestamp", "Minutes_Between",
            "Entry_Point", "Exit_Point", "Path"
        ]).to_csv("pass_thru_summary.csv", index=False)

    # ---------- Output 3: all_movements_summary.csv ----------
    # Aggregate by Location/Direction/Period/Time
    grp = df.groupby([LOCATION_COL, DIRECTION_COL, PERIOD_COL, TIME_COL], dropna=False)
    summary = grp.agg(
        Observations=("Plate", "count"),
        UniquePlates=("Plate", pd.Series.nunique)
    ).reset_index()

    summary = summary.sort_values([LOCATION_COL, DIRECTION_COL, PERIOD_COL, TIME_COL])
    summary.to_csv("all_movements_summary.csv", index=False)

    print(f"Saved:\n- {out_full}\n- pass_thru_summary.csv\n- all_movements_summary.csv")

if __name__ == "__main__":
    main()


Saved:
- full_tracking_classified.csv
- pass_thru_summary.csv
- all_movements_summary.csv


AM/PM

In [3]:
import pandas as pd
from pathlib import Path

# =========================================================
# CONFIGURATION SECTION
# =========================================================
SRC = "https://raw.githubusercontent.com/hawa1983/Traffic-Data-Bank/refs/heads/main/Port_Jefferson_LP_Count_Cleaned_no_norwood.csv"

PLATE = "Plate"
LOC = "Location"
PERIOD = "Period"     # 'AM' or 'PM'
TIME = "Time"
DIR = "Direction"

BASE_DATE = "2025-01-01"

FULL_OUT = "full_tracking_classified.csv"
PASS_THRU_OUT = "pass_thru_summary.csv"
MOVES_OUT = "all_movements_summary.csv"
# =========================================================

# =========================================================
# HELPERS
# =========================================================
def norm(s):
    """Lowercase + trim, for consistent matching."""
    return None if pd.isna(s) else str(s).strip().lower()

def to_timestamp(df):
    """Build datetime from a fixed date + the Time string (e.g., '6:00 AM')."""
    return pd.to_datetime(BASE_DATE + " " + df[TIME].astype(str), errors="coerce")

def fmt_time(dt):
    """Pretty-print times as 'h:mm AM/PM' for Excel friendliness."""
    if pd.isna(dt):
        return ""
    return pd.to_datetime(dt).strftime("%-I:%M %p")

def find_match_within_period(df_plate, idx, target_pairs):
    """
    Return the earliest FUTURE row for the same plate that:
      - is in the SAME Period (AM/PM) as the current row, and
      - matches ANY (location, direction) pair in target_pairs.
    Otherwise return None.
    """
    t0 = df_plate.loc[idx, "Timestamp"]
    this_period = df_plate.loc[idx, PERIOD]

    # Only future rows in the same AM/PM period
    mask = (df_plate[PERIOD] == this_period) & (df_plate["Timestamp"] > t0)
    if not mask.any():
        return None
    future = df_plate.loc[mask]

    for loc, direc in target_pairs:
        hit = future[(future["loc_norm"] == loc) & (future["dir_norm"] == direc)]
        if not hit.empty:
            return hit.iloc[0]  # earliest such match
    return None

def any_other_location_later_in_same_period(df_plate, idx):
    """
    For Rule 4: is there any FUTURE observation in the same Period at a DIFFERENT location?
    Returns True/False.
    """
    t0 = df_plate.loc[idx, "Timestamp"]
    this_period = df_plate.loc[idx, PERIOD]
    this_loc = df_plate.loc[idx, "loc_norm"]

    mask = (df_plate[PERIOD] == this_period) & (df_plate["Timestamp"] > t0)
    if not mask.any():
        return False
    future_locs = df_plate.loc[mask, "loc_norm"]
    return (future_locs != this_loc).any()

# =========================================================
# CLASSIFIER
# =========================================================
def classify_and_pair(entry_row, df_plate, idx):
    loc = entry_row["loc_norm"]
    direc = entry_row["dir_norm"]

    classification = "Unclassified"
    is_entry_trigger = False
    match_row = None

    # RULE 1: Entry S->N
    if ((loc == "woodhull ave" and direc == "northbound") or
        (loc == "lincoln ave" and direc == "northbound")):
        is_entry_trigger = True
        targets = [("ardmer dr", "eastbound"), ("chereb lane", "eastbound")]
        match_row = find_match_within_period(df_plate, idx, targets)
        classification = "Pass Thru" if match_row is not None else "No Exit"

    # RULE 2: Entry E->W
    elif ((loc == "ardmer dr" and direc == "westbound") or
          (loc == "chereb lane" and direc == "westbound")):
        is_entry_trigger = True
        targets = [("woodhull ave", "southbound"),
                   ("lincoln ave", "southbound"),
                   ("norwood ave", "westbound")]
        match_row = find_match_within_period(df_plate, idx, targets)
        classification = "Pass Thru" if match_row is not None else "No Exit"

    # RULE 3: Entry from West via Norwood EB
    elif (loc == "norwood ave" and direc == "eastbound"):
        is_entry_trigger = True
        targets = [("ardmer dr", "eastbound"), ("chereb lane", "eastbound")]
        match_row = find_match_within_period(df_plate, idx, targets)
        classification = "Pass Thru" if match_row is not None else "HWY Bound or Resident Entry"

    # NEW RULE (COMBINED): Norwood Ave Westbound
    # 1) If any Rule 4 exit seen later in the SAME Period → Unclassified
    # 2) Else if any entry-trigger seen later in the SAME Period → "Loop through <Entry Loc> <Dir>"
    # 3) Else → "From Highway to Norwood Ave"
    elif (loc == "norwood ave" and direc == "westbound"):
        is_entry_trigger = True

        # Step 1: Rule 4 exits
        rule4_targets = [
            ("woodhull ave", "southbound"),
            ("lincoln ave", "southbound"),
            ("ardmer dr", "eastbound"),
            ("chereb lane", "eastbound"),
        ]
        exit_match = find_match_within_period(df_plate, idx, rule4_targets)

        if exit_match is not None:
            classification = "Unclassified"
            match_row = exit_match
        else:
            # Step 2: check entry triggers (Rule 1/2/3)
            entry_targets = [
                ("woodhull ave", "northbound"),  # Rule 1
                ("lincoln ave", "northbound"),   # Rule 1
                ("ardmer dr", "westbound"),      # Rule 2
                ("chereb lane", "westbound"),    # Rule 2
                ("norwood ave", "eastbound"),    # Rule 3
            ]
            entry_match = None
            matched_pair = None
            for pair in entry_targets:
                m = find_match_within_period(df_plate, idx, [pair])
                if m is not None:
                    entry_match = m
                    matched_pair = pair
                    break

            if entry_match is not None:
                classification = f"Loop through {matched_pair[0].title()} {matched_pair[1].title()}"
                match_row = entry_match
            else:
                classification = "From Highway to Norwood Ave"
                match_row = None

    # RULE 4: Exiting the neighborhood (period-based “no other appearance”)
    elif ((loc == "woodhull ave" and direc == "southbound") or
          (loc == "lincoln ave" and direc == "southbound") or
          (loc == "ardmer dr" and direc == "eastbound") or
          (loc == "chereb lane" and direc == "eastbound")):
        # Resident Exit if there is NO different-location sighting later in the same AM/PM
        if not any_other_location_later_in_same_period(df_plate, idx):
            classification = "Exit"

    return classification, is_entry_trigger, match_row

# =========================================================
# MAIN
# =========================================================
def main():
    df = pd.read_csv(SRC)

    # Keep originals for output
    df["Location_orig"] = df[LOC]
    df["Direction_orig"] = df[DIR]

    # Normalize and build Timestamp
    df["loc_norm"] = df[LOC].map(norm)
    df["dir_norm"] = df[DIR].map(norm)
    df["Timestamp"] = to_timestamp(df)

    # Sort by plate/time
    df = df.sort_values([PLATE, "Timestamp"]).reset_index(drop=True)

    # Collect outputs
    classifications = []
    entry_time_col, enter_col = [], []
    exit_time_col, exit_col = [], []
    duration_col, is_entry_trigger_col = [], []

    for plate, g in df.groupby(PLATE, sort=False):
        plate_idx = g.index
        df_plate = df.loc[plate_idx]
        for idx in plate_idx:
            row = df.loc[idx]
            cls, is_entry, match_row = classify_and_pair(row, df_plate, idx)
            classifications.append((idx, cls))
            is_entry_trigger_col.append((idx, is_entry))

            if is_entry:
                entry_time = row["Timestamp"]
                enter = row["Location_orig"]
                if match_row is not None:
                    exit_time = match_row["Timestamp"]
                    exit_loc = match_row["Location_orig"]
                    mins = (exit_time - entry_time).total_seconds() / 60.0
                else:
                    exit_time = pd.NaT
                    exit_loc = "No Exit"
                    mins = 0
                entry_time_col.append((idx, entry_time))
                enter_col.append((idx, enter))
                exit_time_col.append((idx, exit_time))
                exit_col.append((idx, exit_loc))
                duration_col.append((idx, mins))
            else:
                entry_time_col.append((idx, pd.NaT))
                enter_col.append((idx, ""))
                exit_time_col.append((idx, pd.NaT))
                exit_col.append((idx, ""))
                duration_col.append((idx, ""))

    # Attach results
    df["Classification"] = pd.Series(dict(classifications))
    df["Is_Entry_Trigger"] = pd.Series(dict(is_entry_trigger_col))
    df["Entry Time"] = pd.Series(dict(entry_time_col)).apply(fmt_time)
    df["Enter"] = pd.Series(dict(enter_col))
    df["Exit Time"] = pd.Series(dict(exit_time_col)).apply(fmt_time)
    df["Exit"] = pd.Series(dict(exit_col))
    df["Duration"] = pd.Series(dict(duration_col))

    # Restore original case
    df[LOC] = df["Location_orig"]
    df[DIR] = df["Direction_orig"]

    # Save outputs
    full_cols = [LOC, PERIOD, TIME, DIR, PLATE,
                 "Classification", "Entry Time", "Enter", "Exit Time", "Exit", "Duration"]
    df[full_cols].to_csv(FULL_OUT, index=False)

    pt = df[(df["Is_Entry_Trigger"]) & (df["Classification"] == "Pass Thru")].copy()
    if not pt.empty:
        pt_out = pt[[PLATE, PERIOD, "Entry Time", "Enter", "Exit Time", "Exit", "Duration"]].copy()
        pt_out["Duration"] = pd.to_numeric(pt_out["Duration"], errors="coerce").fillna(0).round(2)
        pt_out = pt_out.sort_values([PLATE, "Entry Time"])
    else:
        pt_out = pd.DataFrame(columns=[PLATE, PERIOD, "Entry Time", "Enter", "Exit Time", "Exit", "Duration"])
    pt_out.to_csv(PASS_THRU_OUT, index=False)

    moves = df[df["Is_Entry_Trigger"]].copy()
    if not moves.empty:
        mv = (moves.groupby([PERIOD, "Enter", "Exit"], dropna=False)
                    .size()
                    .reset_index(name="Total")
                    .sort_values([PERIOD, "Enter", "Exit"]))
    else:
        mv = pd.DataFrame(columns=[PERIOD, "Enter", "Exit", "Total"])
    mv.to_csv(MOVES_OUT, index=False)

    print(f"Saved:\n- {FULL_OUT}\n- {PASS_THRU_OUT}\n- {MOVES_OUT}")

if __name__ == "__main__":
    main()


Saved:
- full_tracking_classified.csv
- pass_thru_summary.csv
- all_movements_summary.csv


Chereb Eastbound Tracking

In [13]:
import pandas as pd
from pathlib import Path

# ========= CONFIG =========
SRC = "https://raw.githubusercontent.com/hawa1983/Traffic-Data-Bank/refs/heads/main/Port_Jefferson_LP_Count_Cleaned.csv"   # change if needed
OUT = "CherebEB_PassThru_Tracking.csv"                   # saves to current folder
BASE_DATE = "2025-01-01"                                 # any single date ok; used to build datetimes
# ==========================

def norm(s):
    return None if pd.isna(s) else str(s).strip().lower()

def main():
    # Load & prep
    df = pd.read_csv(SRC)
    # Normalize for reliable matching
    df["loc_norm"] = df["Location"].map(norm)
    df["dir_norm"] = df["Direction"].map(norm)
    # Build a true timestamp (same date + provided time)
    df["Timestamp"] = pd.to_datetime(BASE_DATE + " " + df["Time"].astype(str), errors="coerce")
    # Sort within each plate’s timeline
    df = df.sort_values(["Plate", "Timestamp"]).reset_index(drop=True)

    # Define entry conditions (ANY of these) and the target
    entry_conditions = {
        ("lincoln ave", "northbound"),
        ("woodhull ave", "northbound"),
        ("norwood ave", "eastbound"),
    }
    target_loc, target_dir = "chereb lane", "eastbound"

    results = []

    # Process per plate
    for plate, g in df.groupby("Plate", sort=False):
        g = g.sort_values("Timestamp")
        # Iterate rows as potential entries
        for i, entry in g.iterrows():
            if (entry["loc_norm"], entry["dir_norm"]) not in entry_conditions:
                continue

            # Look for the FIRST later Chereb EB in the SAME Period (AM/PM)
            same_period = (g["Period"] == entry["Period"]) & (g["Timestamp"] > entry["Timestamp"])
            future = g.loc[same_period]

            hit = future[(future["loc_norm"] == target_loc) & (future["dir_norm"] == target_dir)]
            if hit.empty:
                continue

            exit_row = hit.iloc[0]  # earliest Chereb EB in same AM/PM
            duration_min = (exit_row["Timestamp"] - entry["Timestamp"]).total_seconds() / 60.0

            results.append({
                "Plate": plate,
                "Period": entry["Period"],
                "Entry Time": entry["Time"],
                "Entry Location": entry["Location"],
                "Entry Direction": entry["Direction"],
                "Exit Time": exit_row["Time"],
                "Exit Location": exit_row["Location"],
                "Exit Direction": exit_row["Direction"],
                "Duration_Min": round(duration_min, 2)
            })

    out_df = pd.DataFrame(results).sort_values(["Period", "Plate", "Entry Time"])
    out_df.to_csv(OUT, index=False)

    print(f"Saved {len(out_df)} matches to: {Path(OUT).resolve()}")

if __name__ == "__main__":
    main()


Saved 74 matches to: /content/CherebEB_PassThru_Tracking.csv


Ardmer Eastbound Tracking

In [14]:
import pandas as pd
from pathlib import Path

# ========= CONFIG =========
SRC = "https://raw.githubusercontent.com/hawa1983/Traffic-Data-Bank/refs/heads/main/Port_Jefferson_LP_Count_Cleaned.csv"   # change if needed
OUT = "ArdmerEB_PassThru_Tracking.csv"                   # saves to current folder
BASE_DATE = "2025-01-01"                                 # any single date ok; used to build datetimes
# ==========================

def norm(s):
    return None if pd.isna(s) else str(s).strip().lower()

def main():
    # Load & prep
    df = pd.read_csv(SRC)
    # Normalize for reliable matching
    df["loc_norm"] = df["Location"].map(norm)
    df["dir_norm"] = df["Direction"].map(norm)
    # Build a true timestamp (same date + provided time)
    df["Timestamp"] = pd.to_datetime(BASE_DATE + " " + df["Time"].astype(str), errors="coerce")
    # Sort within each plate’s timeline
    df = df.sort_values(["Plate", "Timestamp"]).reset_index(drop=True)

    # Define entry conditions (ANY of these) and the target
    entry_conditions = {
        ("lincoln ave", "northbound"),
        ("woodhull ave", "northbound"),
        ("norwood ave", "eastbound"),
    }
    target_loc, target_dir = "ardmer dr", "eastbound"

    results = []

    # Process per plate
    for plate, g in df.groupby("Plate", sort=False):
        g = g.sort_values("Timestamp")
        # Iterate rows as potential entries
        for i, entry in g.iterrows():
            if (entry["loc_norm"], entry["dir_norm"]) not in entry_conditions:
                continue

            # Look for the FIRST later Ardmer EB in the SAME Period (AM/PM)
            same_period = (g["Period"] == entry["Period"]) & (g["Timestamp"] > entry["Timestamp"])
            future = g.loc[same_period]

            hit = future[(future["loc_norm"] == target_loc) & (future["dir_norm"] == target_dir)]
            if hit.empty:
                continue

            exit_row = hit.iloc[0]  # earliest Ardmer EB in same AM/PM
            duration_min = (exit_row["Timestamp"] - entry["Timestamp"]).total_seconds() / 60.0

            results.append({
                "Plate": plate,
                "Period": entry["Period"],
                "Entry Time": entry["Time"],
                "Entry Location": entry["Location"],
                "Entry Direction": entry["Direction"],
                "Exit Time": exit_row["Time"],
                "Exit Location": exit_row["Location"],
                "Exit Direction": exit_row["Direction"],
                "Duration_Min": round(duration_min, 2)
            })

    out_df = pd.DataFrame(results).sort_values(["Period", "Plate", "Entry Time"])
    out_df.to_csv(OUT, index=False)

    print(f"Saved {len(out_df)} matches to: {Path(OUT).resolve()}")

if __name__ == "__main__":
    main()


Saved 49 matches to: /content/ArdmerEB_PassThru_Tracking.csv


**Lincoln, Woodhull, Norwood Tracking**

his version tracks entries at Ardmer Dr WB or Chereb Lane WB, then keeps only those plates whose exit is Norwood Ave WB and nowhere else later in the same AM/PM period.

In [15]:
import pandas as pd
from pathlib import Path

# ========= CONFIG =========
SRC = "https://raw.githubusercontent.com/hawa1983/Traffic-Data-Bank/refs/heads/main/Port_Jefferson_LP_Count_Cleaned.csv"   # change if needed
OUT = "Entry_ArdmerCherebWB_To_LincolnWoodhullNorwoodWB.csv"  # saves to current folder
BASE_DATE = "2025-01-01"                                # any single date ok; used to build datetimes
# ==========================

def norm(s):
    return None if pd.isna(s) else str(s).strip().lower()

def main():
    # Load & prep
    df = pd.read_csv(SRC)
    # Normalize for reliable matching
    df["loc_norm"] = df["Location"].map(norm)
    df["dir_norm"] = df["Direction"].map(norm)
    # Build a true timestamp (same date + provided time)
    df["Timestamp"] = pd.to_datetime(BASE_DATE + " " + df["Time"].astype(str), errors="coerce")
    # Sort within each plate’s timeline
    df = df.sort_values(["Plate", "Timestamp"]).reset_index(drop=True)

    # Define entry conditions (ANY of these)
    entry_conditions = {
        ("ardmer dr", "westbound"),
        ("chereb lane", "westbound"),
    }

    # Define targets (ANY of these)
    target_conditions = {
        ("lincoln ave", "southbound"),
        ("woodhull ave", "southbound"),
        ("norwood ave", "westbound"),
    }

    results = []

    # Process per plate
    for plate, g in df.groupby("Plate", sort=False):
        g = g.sort_values("Timestamp")
        # Iterate rows as potential entries
        for i, entry in g.iterrows():
            if (entry["loc_norm"], entry["dir_norm"]) not in entry_conditions:
                continue

            # Look for the FIRST later target in the SAME Period (AM/PM)
            same_period = (g["Period"] == entry["Period"]) & (g["Timestamp"] > entry["Timestamp"])
            future = g.loc[same_period]

            hit = future[future.apply(lambda r: (r["loc_norm"], r["dir_norm"]) in target_conditions, axis=1)]
            if hit.empty:
                continue

            exit_row = hit.iloc[0]  # earliest match in same AM/PM
            duration_min = (exit_row["Timestamp"] - entry["Timestamp"]).total_seconds() / 60.0

            results.append({
                "Plate": plate,
                "Period": entry["Period"],
                "Entry Time": entry["Time"],
                "Entry Location": entry["Location"],
                "Entry Direction": entry["Direction"],
                "Exit Time": exit_row["Time"],
                "Exit Location": exit_row["Location"],
                "Exit Direction": exit_row["Direction"],
                "Duration_Min": round(duration_min, 2)
            })

    out_df = pd.DataFrame(results).sort_values(["Period", "Plate", "Entry Time"])
    out_df.to_csv(OUT, index=False)

    print(f"Saved {len(out_df)} matches to: {Path(OUT).resolve()}")

if __name__ == "__main__":
    main()


Saved 93 matches to: /content/Entry_ArdmerCherebWB_To_LincolnWoodhullNorwoodWB.csv


**Tracking at Woodlawn Ave Westbound**

This keeps only entries at Norwood Ave WB where the same plate, within the same AM/PM Period, is not found anywhere at Ardmer Dr WB, Chereb Lane WB, Lincoln Ave SB, or Woodhull Ave SB.

In [16]:
import pandas as pd
from pathlib import Path

# ========= CONFIG =========
SRC = "https://raw.githubusercontent.com/hawa1983/Traffic-Data-Bank/refs/heads/main/Port_Jefferson_LP_Count_Cleaned.csv"   # change if needed
OUT = "NorwoodWB_ONLY_not_ArdmerWB_CherebWB_LincolnSB_WoodhullSB.csv"
BASE_DATE = "2025-01-01"                                 # any single date ok; used to build datetimes
# ==========================

def norm(s):
    return None if pd.isna(s) else str(s).strip().lower()

def main():
    # Load & prep
    df = pd.read_csv(SRC)
    df["loc_norm"] = df["Location"].map(norm)
    df["dir_norm"] = df["Direction"].map(norm)
    df["Timestamp"] = pd.to_datetime(BASE_DATE + " " + df["Time"].astype(str), errors="coerce")
    df = df.sort_values(["Plate", "Timestamp"]).reset_index(drop=True)

    # Entry must be exactly Norwood Ave WB
    entry_required = ("norwood ave", "westbound")

    # Disallowed anywhere in SAME AM/PM Period for that plate
    disallowed_anywhere = {
        ("ardmer dr", "westbound"),
        ("chereb lane", "westbound"),
        ("lincoln ave", "southbound"),
        ("woodhull ave", "southbound"),
    }

    results = []

    for plate, g in df.groupby("Plate", sort=False):
        g = g.sort_values("Timestamp")

        # iterate each Norwood WB sighting as a candidate "entry"
        candidates = g[(g["loc_norm"] == entry_required[0]) & (g["dir_norm"] == entry_required[1])]
        if candidates.empty:
            continue

        for _, entry in candidates.iterrows():
            # All sightings for this plate in the same AM/PM period
            same_period_mask = (g["Period"] == entry["Period"])
            period_rows = g.loc[same_period_mask]

            # Check if ANY row in that period matches the disallowed set
            has_disallowed = period_rows.apply(
                lambda r: (r["loc_norm"], r["dir_norm"]) in disallowed_anywhere, axis=1
            ).any()

            if has_disallowed:
                continue

            # Keep this Norwood WB entry
            results.append({
                "Plate": plate,
                "Period": entry["Period"],
                "Entry Time": entry["Time"],
                "Entry Location": entry["Location"],
                "Entry Direction": entry["Direction"],
            })

    out_df = pd.DataFrame(results).sort_values(["Period", "Plate", "Entry Time"])
    out_df.to_csv(OUT, index=False)
    print(f"Saved {len(out_df)} matches to: {Path(OUT).resolve()}")

if __name__ == "__main__":
    main()


Saved 577 matches to: /content/NorwoodWB_ONLY_not_ArdmerWB_CherebWB_LincolnSB_WoodhullSB.csv


**Norwood Ave EB Trackin**

This keeps only entries at Norwood Ave EB where, in the same AM/PM Period, the plate is not found at Lincoln Ave NB, Woodhull Ave NB, Chereb Lane EB, or Ardmer Dr EB:

In [17]:
import pandas as pd
from pathlib import Path

# ========= CONFIG =========
SRC = "https://raw.githubusercontent.com/hawa1983/Traffic-Data-Bank/refs/heads/main/Port_Jefferson_LP_Count_Cleaned.csv"   # change if needed
OUT = "NorwoodEB_ONLY_not_LincolnNB_WoodhullNB_CherebEB_ArdmerEB.csv"
BASE_DATE = "2025-01-01"                                 # any single date ok; used to build datetimes
# ==========================

def norm(s):
    return None if pd.isna(s) else str(s).strip().lower()

def main():
    # Load & prep
    df = pd.read_csv(SRC)
    df["loc_norm"] = df["Location"].map(norm)
    df["dir_norm"] = df["Direction"].map(norm)
    df["Timestamp"] = pd.to_datetime(BASE_DATE + " " + df["Time"].astype(str), errors="coerce")
    df = df.sort_values(["Plate", "Timestamp"]).reset_index(drop=True)

    # Entry must be exactly Norwood Ave EB
    entry_required = ("norwood ave", "eastbound")

    # Disallowed anywhere in SAME AM/PM Period for that plate
    disallowed_anywhere = {
        ("lincoln ave", "northbound"),
        ("woodhull ave", "northbound"),
        ("chereb lane", "eastbound"),
        ("ardmer dr", "eastbound"),
    }

    results = []

    for plate, g in df.groupby("Plate", sort=False):
        g = g.sort_values("Timestamp")

        # All Norwood EB sightings as candidates
        candidates = g[(g["loc_norm"] == entry_required[0]) & (g["dir_norm"] == entry_required[1])]
        if candidates.empty:
            continue

        for _, entry in candidates.iterrows():
            # Rows for this plate in the same AM/PM period
            period_rows = g[g["Period"] == entry["Period"]]

            # Exclude if any disallowed location/direction occurs in that period
            has_disallowed = period_rows.apply(
                lambda r: (r["loc_norm"], r["dir_norm"]) in disallowed_anywhere, axis=1
            ).any()
            if has_disallowed:
                continue

            # Keep this Norwood EB entry
            results.append({
                "Plate": plate,
                "Period": entry["Period"],
                "Entry Time": entry["Time"],
                "Entry Location": entry["Location"],
                "Entry Direction": entry["Direction"],
            })

    out_df = pd.DataFrame(results).sort_values(["Period", "Plate", "Entry Time"])
    out_df.to_csv(OUT, index=False)
    print(f"Saved {len(out_df)} matches to: {Path(OUT).resolve()}")

if __name__ == "__main__":
    main()


Saved 656 matches to: /content/NorwoodEB_ONLY_not_LincolnNB_WoodhullNB_CherebEB_ArdmerEB.csv


**Exit only**

keep only plates whose entries are at one of these four targets:

***Chereb Lane EB***

***Ardmer Dr EB***

***Woodhull Ave SB***

***Lincoln Ave SB***

…and ensure they are not found at any other location/direction in the same AM/PM period.

In [18]:
import pandas as pd
from pathlib import Path

# ========= CONFIG =========
SRC = "https://raw.githubusercontent.com/hawa1983/Traffic-Data-Bank/refs/heads/main/Port_Jefferson_LP_Count_Cleaned.csv"   # change if neededOUT = "Only_CherebEB_ArdmerEB_WoodhullSB_LincolnSB.csv"
OUT = "Exit_Only.csv"
BASE_DATE = "2025-01-01"   # dummy date for timestamp building
# ==========================

def norm(s):
    return None if pd.isna(s) else str(s).strip().lower()

def main():
    # Load & prep
    df = pd.read_csv(SRC)
    df["loc_norm"] = df["Location"].map(norm)
    df["dir_norm"] = df["Direction"].map(norm)
    df["Timestamp"] = pd.to_datetime(
        BASE_DATE + " " + df["Time"].astype(str), errors="coerce"
    )
    df = df.sort_values(["Plate", "Timestamp"]).reset_index(drop=True)

    # Allowed entries (must be ONLY these)
    allowed_only = {
        ("chereb lane", "eastbound"),
        ("ardmer dr", "eastbound"),
        ("woodhull ave", "southbound"),
        ("lincoln ave", "southbound"),
    }

    results = []

    for plate, g in df.groupby("Plate", sort=False):
        g = g.sort_values("Timestamp")

        # For each row in this plate’s timeline
        for _, entry in g.iterrows():
            if (entry["loc_norm"], entry["dir_norm"]) not in allowed_only:
                continue

            # All rows in the same AM/PM period
            period_rows = g[g["Period"] == entry["Period"]]

            # Check if EVERY row in that period is allowed
            all_allowed = period_rows.apply(
                lambda r: (r["loc_norm"], r["dir_norm"]) in allowed_only, axis=1
            ).all()
            if not all_allowed:
                continue

            # Keep this entry
            results.append({
                "Plate": plate,
                "Period": entry["Period"],
                "Entry Time": entry["Time"],
                "Entry Location": entry["Location"],
                "Entry Direction": entry["Direction"],
            })

    out_df = pd.DataFrame(results).sort_values(["Period", "Plate", "Entry Time"])
    out_df.to_csv(OUT, index=False)
    print(f"Saved {len(out_df)} matches to: {Path(OUT).resolve()}")

if __name__ == "__main__":
    main()


Saved 884 matches to: /content/Exit_Only.csv
