In [1]:
!python3 -m venv myenv

In [2]:
!source myenv/bin/activate

In [3]:
pip install pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/homebrew/Cellar/jupyterlab/4.3.2_1/libexec/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
print(os.getcwd())

/Users/serena/CS-Academic/Projects/cybersecurity-breach-tracer


In [3]:
import pandas as pd
import os
from collections import defaultdict

# optimized solution with iterative DFS on precomputed user-computer mappings

def trace_breach(user_id, access_logs, activity_logs):
    # Precompute mappings
    user_to_computers = defaultdict(set)
    computer_to_users = defaultdict(set)

    access_logs['timestamp'] = pd.to_datetime(access_logs['timestamp']) # sorting by timestamp for possible pattern detection
    activity_logs['timestamp'] = pd.to_datetime(activity_logs['timestamp'])
    access_logs = access_logs.sort_values(by='timestamp')
    activity_logs = activity_logs.sort_values(by='timestamp')

    for _, row in access_logs.iterrows():
        user_to_computers[row["affected_user_id"]].add(row["computer_id"])
        computer_to_users[row["computer_id"]].add(row["affected_user_id"])
    for _, row in activity_logs.iterrows():
        user_to_computers[row["user_id"]].add(row["computer_id"])
        computer_to_users[row["computer_id"]].add(row["user_id"])

    # Initialize DFS structure
    stack = [user_id]
    visited_users = set()  
    visited_computers = set()
    affected_users = set()
    accessed_computers = set()
    event_log = []  # processed events for output

    # Iterative DFS
    while stack:
        current_user = stack.pop()
        if current_user in visited_users:
            continue
        visited_users.add(current_user)
        event_log.append(f"Processing user: {current_user}")

        # Find all breached computers
        for computer in user_to_computers[current_user]:
            if computer in visited_computers:
                continue 
            visited_computers.add(computer)
            accessed_computers.add(computer)

            access_time = access_logs.loc[access_logs['affected_user_id'] == current_user, 'timestamp'].iloc[0]
            event_log.append(f"User {current_user} accessed computer {computer} at {access_time}")

            # for each breached computer, find linked userss
            for breached_user in computer_to_users[computer]:
                if breached_user not in visited_users:
                    stack.append(breached_user)  # Add the connected user to the stack
                affected_users.add(breached_user)
                access_time = activity_logs.loc[activity_logs['user_id'] == breached_user, 'timestamp'].iloc[0]
                event_log.append(f"Computer {computer} was accessed by user {breached_user} at {access_time}")

    with open('out.txt', 'a') as file: # .txt output
        for event in event_log:
            file.write(event + '\n')
        
    return {
        "event_log": event_log,
        "affected_users": affected_users,
        "accessed_computers": accessed_computers,
    }

# Example Usage
access_logs_df = pd.read_csv('access_logs.csv')
activity_logs_df = pd.read_csv('activity_logs.csv')

suspect_id = "U12"  # suspicious user
result = trace_breach(suspect_id, access_logs_df, activity_logs_df)

print("\nSummary:")
print("Affected Users:", result["affected_users"])
print("Accessed Computers:", result["accessed_computers"])

for event in result["event_log"]:
    print(event)


Processing user: U12
User U12 accessed computer C16 at 2024-12-01 00:11:56
Computer C16 was accessed by user U61 at 2024-12-01 00:42:40
Computer C16 was accessed by user U16 at 2024-12-01 03:56:48
Computer C16 was accessed by user U63 at 2024-12-01 00:56:30
Computer C16 was accessed by user U76 at 2024-12-01 04:56:26
Computer C16 was accessed by user U81 at 2024-12-01 02:34:47
Computer C16 was accessed by user U15 at 2024-12-01 00:54:08
Computer C16 was accessed by user U72 at 2024-12-01 02:02:11
Computer C16 was accessed by user U86 at 2024-12-01 00:20:20
Computer C16 was accessed by user U79 at 2024-12-01 00:45:57
Computer C16 was accessed by user U58 at 2024-12-01 01:49:50
Computer C16 was accessed by user U55 at 2024-12-01 05:29:29
Computer C16 was accessed by user U69 at 2024-12-01 04:21:10
Computer C16 was accessed by user U33 at 2024-12-01 00:12:06
Computer C16 was accessed by user U30 at 2024-12-01 02:20:18
Computer C16 was accessed by user U94 at 2024-12-01 00:04:42
Computer C