In [1]:
import pandas as pd

# Always in the working dir
df = pd.read_csv("./Sessions.csv")

# Expect columns: ep1_host, ep1_port, ep2_host, ep2_port
# Treat ep1 as "source" and ep2 as "destination" (consistent with your sessions DF)
df["src"] = df["ep1_host"].astype(str)
df["dst"] = df["ep2_host"].astype(str)
df["dst_port"] = df["ep2_port"].astype(int)

# Same source -> 5+ different destinations on the same port
suspects = (
    df.groupby(["src", "dst_port"])["dst"]
      .nunique()
      .reset_index(name="unique_dsts")
      .query("unique_dsts >= 5")
      .sort_values(["unique_dsts", "dst_port"], ascending=[False, True])
)

print("Port-sweep (same src to 5+ dsts on same port):")
if suspects.empty:
    print("None found.")
else:
    print(suspects.to_string(index=False))


Port-sweep (same src to 5+ dsts on same port):
           src  dst_port  unique_dsts
192.168.68.105       443            6


In [3]:
import ast
import pandas as pd

df = pd.read_csv("./Sessions.csv")

def parse_flags(x):
    if isinstance(x, dict):
        return x
    if not isinstance(x, str) or not x.strip():
        return {}
    try:
        return ast.literal_eval(x)
    except Exception:
        return {}

def get_count(d, keys):
    return sum(int(d.get(k, 0) or 0) for k in keys)

df["flags"] = df["tcp_flags_counts"].apply(parse_flags)

# "No answer to SYN-ACK" (failed handshake) heuristic
df["synack_cnt"] = df["flags"].apply(lambda d: get_count(d, ["S.", "SA", "SYN,ACK", "SynAck", "SYN-ACK"]))
df["ack_cnt"]    = df["flags"].apply(lambda d: get_count(d, ["A", "ACK", "Ack"]))
df["no_synack_answer"] = (df["synack_cnt"] > 0) & (df["ack_cnt"] == 0)

# ---------- Port scanning: same src -> same dst host -> 5+ different ports (failed handshakes) ----------
tmp = df[df["no_synack_answer"]].copy()
tmp["src"] = tmp["ep1_host"].astype(str)
tmp["dst"] = tmp["ep2_host"].astype(str)
tmp["dst_port"] = tmp["ep2_port"].astype(int)

suspects_scan = (
    tmp.groupby(["src", "dst"])["dst_port"]
       .nunique()
       .reset_index(name="unique_ports_no_synack_answer")
       .query("unique_ports_no_synack_answer >= 5")
       .sort_values("unique_ports_no_synack_answer", ascending=False)
)

print("Port scanning (same src -> same dst, 5+ dst ports with SYN-ACK but no ACK):")
print("None found." if suspects_scan.empty else suspects_scan.to_string(index=False))


Port scanning (same src -> same dst, 5+ dst ports with SYN-ACK but no ACK):
           src            dst  unique_ports_no_synack_answer
 142.250.75.78 192.168.68.105                             10
142.250.75.142 192.168.68.105                              8
142.250.75.206 192.168.68.105                              6
