# AWS Threat Hunting with Cloudtrail Logs
---

In [1]:
import polars as pl
import gzip
import re
import glob
import orjson
import ray
import io

In [2]:
pl.Config.set_fmt_str_lengths(80)

polars.config.Config

## Load Cloudtrail logs
Docs: https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-read-log-files.html

In [3]:
@ray.remote
def read_json(filepath: str):
    if filepath.endswith(".gz"):
        with gzip.open(filepath, "r") as f:
            bytes = f.read()        
    else:
        with open(filepath, "r") as f:
            bytes = f.read()
    # TODO: Speed up one-level only JSON parsing
    logs = orjson.loads(bytes)["Records"]
    logs = [{k: str(v) for k, v in log.items()} for log in logs]
    data = pl.from_dicts(logs)
    return data


def read_glob_json(pattern: str):
    batches = []
    for filepath in glob.iglob(pattern):
        batch = read_json.remote(filepath=filepath)
        batches.append(batch)
    logs = pl.concat(ray.get(batches), how="diagonal_relaxed")
    return logs

In [4]:
logs = read_glob_json("../data/flaws_2/*.json.gz")
logs.head(5)

2023-11-26 00:37:23,455	INFO worker.py:1664 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


eventVersion,userIdentity,eventTime,eventSource,eventName,awsRegion,sourceIPAddress,userAgent,errorCode,errorMessage,requestParameters,responseElements,requestID,eventID,eventType,recipientAccountId,additionalEventData,readOnly,resources,sharedEventID,managementEvent
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""1.04""","""{'type': 'AssumedRole', 'principalId': 'AROAIBATWWYQXZTTALNCE:level1', 'arn': '…","""2018-11-28T23:03:12Z""","""logs.amazonaws.com""","""CreateLogStream""","""us-east-1""","""34.234.236.212""","""awslambda-worker""","""AccessDenied""","""User: arn:aws:sts::653711331788:assumed-role/level1/level1 is not authorized to…","""None""","""None""","""c80afb02-f361-11e8-b660-f9fa07f60344""","""77b02a51-70c8-465b-94df-579c1fe42a5c""","""AwsApiCall""","""653711331788""",,,,,
"""1.05""","""{'type': 'AWSAccount', 'principalId': '', 'accountId': 'ANONYMOUS_PRINCIPAL'}""","""2018-11-28T23:09:36Z""","""s3.amazonaws.com""","""GetObject""","""us-east-1""","""104.102.221.250""","""[Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, lik…",,,"""{'bucketName': 'the-end-962b72bjahfm5b4wcktm8t9z4sapemjb.flaws2.cloud', 'key': …","""None""","""EDFBFC9CE11E755F""","""ea33682d-0829-40c1-9820-bd721b9aede8""","""AwsApiCall""","""653711331788""","""{'x-amz-id-2': 'AeawUxLi2duvWcRYWMuVYUmtCkBuOvHaktUvODIORlnw+U6VbswU9FfeOAEDq9k…","""True""","""[{'type': 'AWS::S3::Object', 'ARN': 'arn:aws:s3:::the-end-962b72bjahfm5b4wcktm8…","""a59b4ac8-6a51-44ff-ab76-e66f75bd95ce""",
"""1.05""","""{'type': 'AWSAccount', 'principalId': '', 'accountId': 'ANONYMOUS_PRINCIPAL'}""","""2018-11-28T23:09:36Z""","""s3.amazonaws.com""","""GetObject""","""us-east-1""","""104.102.221.250""","""[Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, lik…",,,"""{'bucketName': 'the-end-962b72bjahfm5b4wcktm8t9z4sapemjb.flaws2.cloud', 'key': …","""None""","""9880010F3D39F3AC""","""dee6f6a3-f18a-40db-a6fd-b96d05502266""","""AwsApiCall""","""653711331788""","""{'x-amz-id-2': 'tLMpJDK15z1teLvIzReA3N4IMnNATUrOrGfoPS0kxZ27SPTRVbxUtdmmucw3XfE…","""True""","""[{'type': 'AWS::S3::Object', 'ARN': 'arn:aws:s3:::the-end-962b72bjahfm5b4wcktm8…","""f8c6cdc8-6ec1-4e14-9a0e-f300b16e282e""",
"""1.04""","""{'type': 'AssumedRole', 'principalId': 'AROAIBATWWYQXZTTALNCE:level1', 'arn': '…","""2018-11-28T23:06:17Z""","""ecr.amazonaws.com""","""BatchGetImage""","""us-east-1""","""104.102.221.250""","""aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9""",,,"""{'imageIds': [{'imageTag': 'latest'}], 'repositoryName': 'level2', 'registryId'…","""None""","""35ea9256-f362-11e8-86cf-35c48074ab0a""","""b2867f3e-810c-47d1-9657-edb886e03fe6""","""AwsApiCall""","""653711331788""",,,"""[{'ARN': 'arn:aws:ecr:us-east-1:653711331788:repository/level2', 'accountId': '…",,
"""1.04""","""{'type': 'AssumedRole', 'principalId': 'AROAIBATWWYQXZTTALNCE:level1', 'arn': '…","""2018-11-28T23:06:33Z""","""ecr.amazonaws.com""","""GetDownloadUrlForLayer""","""us-east-1""","""104.102.221.250""","""aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9""",,,"""{'layerDigest': 'sha256:2d73de35b78103fa305bd941424443d520524a050b1e0c78c488646…","""None""","""3f96ec7f-f362-11e8-bf5d-3380094c69db""","""ff4c72f3-4fbd-45d4-9ee3-3834a78f53de""","""AwsApiCall""","""653711331788""",,,"""[{'ARN': 'arn:aws:ecr:us-east-1:653711331788:repository/level2', 'accountId': '…",,


## Log Normalization

- In a previous query, we note that `sourceIPAddress` has no nulls but `arn` does
- So in absence of `arn`, we should fill null with `sourceIPAddress` (suffixed with "source_ip:")

**Operations:**
- Reorganize columns into entity, time panel format
- Make all JSON strings JSONPath parsable: replace `'` (single quote) with `"` (double quote):
  - `userIdentity`
  - `requestParameters`
  - `responseElements`
  - `additionalEventData`
  - `resources`
- Extract `arn` from `UserIdentity`
- Convert time column into datetime
- Create integer index per entity column
- Create window column
- Create `entity_id` column with `arn` if `arn` is not null else `sourceIPAddress`

In [13]:
json_fields = [
    "userIdentity",
    "requestParameters",
    "responseElements",
    "additionalEventData",
    "resources"
]
logs_with_ids = (
    logs.lazy()
    # Prepare panel format
    .with_columns(
        pl.col("eventTime").str.to_datetime(),
        pl.col(json_fields).str.replace_all("'", '"')
    )
    # Unpack identity information from userIdentity JSON
    .with_columns(pl.col("userIdentity").str.json_path_match("$.arn").alias("arn"))
    # Create non-null "entity_id"
    .with_columns(
        pl.col("arn").fill_null("source_ip:" + pl.col("sourceIPAddress")).alias("entity_id")
    )
    .collect(streaming=True)
)
logs_with_ids.head()

eventVersion,userIdentity,eventTime,eventSource,eventName,awsRegion,sourceIPAddress,userAgent,errorCode,errorMessage,requestParameters,responseElements,requestID,eventID,eventType,recipientAccountId,additionalEventData,readOnly,resources,sharedEventID,managementEvent,arn,entity_id
str,str,"datetime[μs, UTC]",str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…",2018-11-28 23:03:12 UTC,"""logs.amazonaws.com""","""CreateLogStream""","""us-east-1""","""34.234.236.212""","""awslambda-worker""","""AccessDenied""","""User: arn:aws:sts::653711331788:assumed-role/level1/level1 is not authorized to…","""None""","""None""","""c80afb02-f361-11e8-b660-f9fa07f60344""","""77b02a51-70c8-465b-94df-579c1fe42a5c""","""AwsApiCall""","""653711331788""",,,,,,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1"""
"""1.05""","""{""type"": ""AWSAccount"", ""principalId"": """", ""accountId"": ""ANONYMOUS_PRINCIPAL""}""",2018-11-28 23:09:36 UTC,"""s3.amazonaws.com""","""GetObject""","""us-east-1""","""104.102.221.250""","""[Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, lik…",,,"""{""bucketName"": ""the-end-962b72bjahfm5b4wcktm8t9z4sapemjb.flaws2.cloud"", ""key"": …","""None""","""EDFBFC9CE11E755F""","""ea33682d-0829-40c1-9820-bd721b9aede8""","""AwsApiCall""","""653711331788""","""{""x-amz-id-2"": ""AeawUxLi2duvWcRYWMuVYUmtCkBuOvHaktUvODIORlnw+U6VbswU9FfeOAEDq9k…","""True""","""[{""type"": ""AWS::S3::Object"", ""ARN"": ""arn:aws:s3:::the-end-962b72bjahfm5b4wcktm8…","""a59b4ac8-6a51-44ff-ab76-e66f75bd95ce""",,,"""source_ip:104.102.221.250"""
"""1.05""","""{""type"": ""AWSAccount"", ""principalId"": """", ""accountId"": ""ANONYMOUS_PRINCIPAL""}""",2018-11-28 23:09:36 UTC,"""s3.amazonaws.com""","""GetObject""","""us-east-1""","""104.102.221.250""","""[Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, lik…",,,"""{""bucketName"": ""the-end-962b72bjahfm5b4wcktm8t9z4sapemjb.flaws2.cloud"", ""key"": …","""None""","""9880010F3D39F3AC""","""dee6f6a3-f18a-40db-a6fd-b96d05502266""","""AwsApiCall""","""653711331788""","""{""x-amz-id-2"": ""tLMpJDK15z1teLvIzReA3N4IMnNATUrOrGfoPS0kxZ27SPTRVbxUtdmmucw3XfE…","""True""","""[{""type"": ""AWS::S3::Object"", ""ARN"": ""arn:aws:s3:::the-end-962b72bjahfm5b4wcktm8…","""f8c6cdc8-6ec1-4e14-9a0e-f300b16e282e""",,,"""source_ip:104.102.221.250"""
"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…",2018-11-28 23:06:17 UTC,"""ecr.amazonaws.com""","""BatchGetImage""","""us-east-1""","""104.102.221.250""","""aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9""",,,"""{""imageIds"": [{""imageTag"": ""latest""}], ""repositoryName"": ""level2"", ""registryId""…","""None""","""35ea9256-f362-11e8-86cf-35c48074ab0a""","""b2867f3e-810c-47d1-9657-edb886e03fe6""","""AwsApiCall""","""653711331788""",,,"""[{""ARN"": ""arn:aws:ecr:us-east-1:653711331788:repository/level2"", ""accountId"": ""…",,,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1"""
"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…",2018-11-28 23:06:33 UTC,"""ecr.amazonaws.com""","""GetDownloadUrlForLayer""","""us-east-1""","""104.102.221.250""","""aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9""",,,"""{""layerDigest"": ""sha256:2d73de35b78103fa305bd941424443d520524a050b1e0c78c488646…","""None""","""3f96ec7f-f362-11e8-bf5d-3380094c69db""","""ff4c72f3-4fbd-45d4-9ee3-3834a78f53de""","""AwsApiCall""","""653711331788""",,,"""[{""ARN"": ""arn:aws:ecr:us-east-1:653711331788:repository/level2"", ""accountId"": ""…",,,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1"""


### Side Quest: Investigate logs without an ARN
- Approximately 72% of logs don't have an associated ARN

In [10]:
with pl.Config(fmt_str_lengths=400):
    result = (
        logs_with_ids.filter(pl.col("arn").is_null())
        .select(pl.col("userIdentity"))
        .select(pl.col("userIdentity").str.json_extract(infer_schema_length=None))
        .get_column("userIdentity").struct.unnest()
    )
    print(len(result) / len(logs_with_ids))
    print(result)
    print(result.null_count())
    print(result.unique())
    print(result.unique().select(pl.all().n_unique()))
    print(result.select("type").unique())

0.7297297297297297
shape: (27, 4)
┌────────────┬─────────────┬─────────────────────┬─────────────────────────┐
│ type       ┆ principalId ┆ accountId           ┆ invokedBy               │
│ ---        ┆ ---         ┆ ---                 ┆ ---                     │
│ str        ┆ str         ┆ str                 ┆ str                     │
╞════════════╪═════════════╪═════════════════════╪═════════════════════════╡
│ AWSAccount ┆             ┆ ANONYMOUS_PRINCIPAL ┆ null                    │
│ AWSAccount ┆             ┆ ANONYMOUS_PRINCIPAL ┆ null                    │
│ AWSService ┆ null        ┆ null                ┆ ecs-tasks.amazonaws.com │
│ AWSService ┆ null        ┆ null                ┆ ecs-tasks.amazonaws.com │
│ …          ┆ …           ┆ …                   ┆ …                       │
│ AWSAccount ┆             ┆ ANONYMOUS_PRINCIPAL ┆ null                    │
│ AWSAccount ┆             ┆ ANONYMOUS_PRINCIPAL ┆ null                    │
│ AWSAccount ┆             ┆ ANONYMOUS_PRI

Okay we've identified four user "types" with null ARNs: `null`, `AWSService`, `AWSAccount`, `IAMUser`. Let's further investigate.

In [11]:
for identity_type in result.select("type").unique().to_series().to_list():
    print(result.filter(pl.col("type") == identity_type).unique())

shape: (3, 4)
┌────────────┬─────────────┬───────────┬──────────────────────────┐
│ type       ┆ principalId ┆ accountId ┆ invokedBy                │
│ ---        ┆ ---         ┆ ---       ┆ ---                      │
│ str        ┆ str         ┆ str       ┆ str                      │
╞════════════╪═════════════╪═══════════╪══════════════════════════╡
│ AWSService ┆ null        ┆ null      ┆ ecs-tasks.amazonaws.com  │
│ AWSService ┆ null        ┆ null      ┆ lambda.amazonaws.com     │
│ AWSService ┆ null        ┆ null      ┆ apigateway.amazonaws.com │
└────────────┴─────────────┴───────────┴──────────────────────────┘
shape: (1, 4)
┌────────────┬─────────────┬─────────────────────┬───────────┐
│ type       ┆ principalId ┆ accountId           ┆ invokedBy │
│ ---        ┆ ---         ┆ ---                 ┆ ---       │
│ str        ┆ str         ┆ str                 ┆ str       │
╞════════════╪═════════════╪═════════════════════╪═══════════╡
│ AWSAccount ┆             ┆ ANONYMOUS_PRINCI

### Group logs into traces / windows

In [28]:
every = "4i"
entity_col = "entity_id"
uuid_col = "eventID"  # Unique event ID column from Cloudtrail
data = (
    logs_with_ids.lazy()
    # Perf: Pre-sort
    .sort([entity_col, "eventTime"])
    .set_sorted([entity_col, "eventTime"])
    # Group into windows
    .with_columns(pl.col("eventTime").arg_sort().over(entity_col).cast(pl.Int64).alias("index"))
    .group_by_dynamic("index", by=entity_col, every=every)
    .agg(pl.all().exclude("index"))
    # Create window ID
    .with_columns(pl.concat_str([entity_col, pl.col("eventTime").list.first()], separator="__").alias("window_id"))
    .with_columns(
        pl.when(pl.col("window_id").is_duplicated())
        # Must use ANOTHER seperator (not just whitespace) to prevent duplicates
        .then(pl.concat_str([pl.col("window_id"), pl.col(uuid_col).list.first()], separator="."))
        .otherwise(pl.col("window_id"))
    )
    # Reset index per window_id
    .drop("index")
    .explode(pl.all().exclude(entity_col, "window_id"))
    # Reset index to start from 1 over window groups
    .with_columns(pl.col("eventTime").arg_sort().cast(pl.Int64).over("window_id").alias("index"))
    # Sort
    .sort(["window_id", "index"])
    .set_sorted(["window_id", "index"])
    # Select relevant columns
    # Unpack identity information from userIdentity JSON
    .select([
        # Window
        "window_id",
        "index",
        # Entity
        "entity_id",
        "arn",
        # Time
        "eventTime",
        # Original columns
        *logs_with_ids.select(pl.all().exclude(["entity_id", "arn", "eventTime"])).columns
    ])
    .collect(streaming=True)
)
data

window_id,index,entity_id,arn,eventTime,eventVersion,userIdentity,eventSource,eventName,awsRegion,sourceIPAddress,userAgent,errorCode,errorMessage,requestParameters,responseElements,requestID,eventID,eventType,recipientAccountId,additionalEventData,readOnly,resources,sharedEventID,managementEvent
str,i64,str,str,"datetime[μs, UTC]",str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:12.00000…",0,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:03:12 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""logs.amazonaws.com""","""CreateLogStream""","""us-east-1""","""34.234.236.212""","""awslambda-worker""","""AccessDenied""","""User: arn:aws:sts::653711331788:assumed-role/level1/level1 is not authorized to…","""None""","""None""","""c80afb02-f361-11e8-b660-f9fa07f60344""","""77b02a51-70c8-465b-94df-579c1fe42a5c""","""AwsApiCall""","""653711331788""",,,,,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:12.00000…",1,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:03:13 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""logs.amazonaws.com""","""CreateLogStream""","""us-east-1""","""34.234.236.212""","""awslambda-worker""","""AccessDenied""","""User: arn:aws:sts::653711331788:assumed-role/level1/level1 is not authorized to…","""None""","""None""","""c8b78e2f-f361-11e8-937e-2325b232ab69""","""794cada7-3c82-4d0c-8b88-521228faba8c""","""AwsApiCall""","""653711331788""",,,,,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:12.00000…",2,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:03:20 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""logs.amazonaws.com""","""CreateLogStream""","""us-east-1""","""34.234.236.212""","""awslambda-worker""","""AccessDenied""","""User: arn:aws:sts::653711331788:assumed-role/level1/level1 is not authorized to…","""None""","""None""","""cc9ae337-f361-11e8-894e-cbc2b0778d92""","""483557d2-2b35-4fc6-b682-ff5dbc96eccf""","""AwsApiCall""","""653711331788""",,,,,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:12.00000…",3,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:03:35 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""logs.amazonaws.com""","""CreateLogStream""","""us-east-1""","""34.234.236.212""","""awslambda-worker""","""AccessDenied""","""User: arn:aws:sts::653711331788:assumed-role/level1/level1 is not authorized to…","""None""","""None""","""d591cf1a-f361-11e8-b278-b74c8fed5c3c""","""c9c01b63-70a2-4322-9f83-8be0f1c9ca1e""","""AwsApiCall""","""653711331788""",,,,,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:50.00000…",0,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:03:50 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""logs.amazonaws.com""","""CreateLogStream""","""us-east-1""","""34.234.236.212""","""awslambda-worker""","""AccessDenied""","""User: arn:aws:sts::653711331788:assumed-role/level1/level1 is not authorized to…","""None""","""None""","""de85fb9c-f361-11e8-8558-710261198352""","""1dcd0cba-c92b-4506-983a-012783817339""","""AwsApiCall""","""653711331788""",,,,,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:50.00000…",1,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:04:54 UTC,"""1.05""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""s3.amazonaws.com""","""ListObjects""","""us-east-1""","""104.102.221.250""","""[aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9]""",,,"""{""list-type"": ""2"", ""bucketName"": ""level1.flaws2.cloud"", ""encoding-type"": ""url"",…","""None""","""8D9133EDD418DB32""","""8b3e9429-989c-4e28-92b3-da8df5603f6d""","""AwsApiCall""","""653711331788""","""{""x-amz-id-2"": ""zUno6UWW1NIjHLVhC3kvw8uE/5erk7GvVVsYEeahpsW37yi99WZ+biqEaRT1Xr9…","""True""","""[{""type"": ""AWS::S3::Object"", ""ARNPrefix"": ""arn:aws:s3:::level1.flaws2.cloud/""},…",,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:50.00000…",2,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:05:53 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""ecr.amazonaws.com""","""ListImages""","""us-east-1""","""104.102.221.250""","""aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9""",,,"""{""repositoryName"": ""level2"", ""registryId"": ""653711331788""}""","""None""","""2780d808-f362-11e8-b13e-dbd4ed9d7936""","""eb0fa4a0-580f-4270-bd37-7e45dfb217aa""","""AwsApiCall""","""653711331788""",,,"""[{""ARN"": ""arn:aws:ecr:us-east-1:653711331788:repository/level2"", ""accountId"": ""…",,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:03:50.00000…",3,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:06:17 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""ecr.amazonaws.com""","""BatchGetImage""","""us-east-1""","""104.102.221.250""","""aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9""",,,"""{""imageIds"": [{""imageTag"": ""latest""}], ""repositoryName"": ""level2"", ""registryId""…","""None""","""35ea9256-f362-11e8-86cf-35c48074ab0a""","""b2867f3e-810c-47d1-9657-edb886e03fe6""","""AwsApiCall""","""653711331788""",,,"""[{""ARN"": ""arn:aws:ecr:us-east-1:653711331788:repository/level2"", ""accountId"": ""…",,
"""arn:aws:sts::653711331788:assumed-role/level1/level1__2018-11-28 23:06:33.00000…",0,"""arn:aws:sts::653711331788:assumed-role/level1/level1""","""arn:aws:sts::653711331788:assumed-role/level1/level1""",2018-11-28 23:06:33 UTC,"""1.04""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAIBATWWYQXZTTALNCE:level1"", ""arn"": ""…","""ecr.amazonaws.com""","""GetDownloadUrlForLayer""","""us-east-1""","""104.102.221.250""","""aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9""",,,"""{""layerDigest"": ""sha256:2d73de35b78103fa305bd941424443d520524a050b1e0c78c488646…","""None""","""3f96ec7f-f362-11e8-bf5d-3380094c69db""","""ff4c72f3-4fbd-45d4-9ee3-3834a78f53de""","""AwsApiCall""","""653711331788""",,,"""[{""ARN"": ""arn:aws:ecr:us-east-1:653711331788:repository/level2"", ""accountId"": ""…",,
"""arn:aws:sts::653711331788:assumed-role/level3/d190d14a-2404-45d6-9113-4eda22d7f…",0,"""arn:aws:sts::653711331788:assumed-role/level3/d190d14a-2404-45d6-9113-4eda22d7f…","""arn:aws:sts::653711331788:assumed-role/level3/d190d14a-2404-45d6-9113-4eda22d7f…",2018-11-28 23:09:28 UTC,"""1.05""","""{""type"": ""AssumedRole"", ""principalId"": ""AROAJQMBDNUMIKLZKMF64:d190d14a-2404-45d…","""s3.amazonaws.com""","""ListBuckets""","""us-east-1""","""104.102.221.250""","""[aws-cli/1.16.19 Python/2.7.10 Darwin/17.7.0 botocore/1.12.9]""",,,"""None""","""None""","""4698593B9338B27F""","""65e111a0-83ae-4ba8-9673-16291a804873""","""AwsApiCall""","""653711331788""",,,,,


In [29]:
# Check cardinality of windows
data["window_id"].n_unique()

13

In [30]:
# Check window sizes are < every
window_sizes = (
    data["window_id"].value_counts()
    .sort(by="counts")
    .get_column("counts")
    .alias("window_size")
    .value_counts()
    .sort(by="window_size", descending=True)
)
window_sizes

window_size,counts
u32,u32
4,7
2,3
1,3


In [31]:
# Visualize distribution of events from entities
data.get_column("entity_id").value_counts().sort("counts", descending=True)

entity_id,counts
str,u32
"""source_ip:104.102.221.250""",22
"""arn:aws:sts::653711331788:assumed-role/level1/level1""",9
"""source_ip:ecs-tasks.amazonaws.com""",2
"""source_ip:apigateway.amazonaws.com""",2
"""arn:aws:sts::653711331788:assumed-role/level3/d190d14a-2404-45d6-9113-4eda22d7f…",1
"""source_ip:lambda.amazonaws.com""",1


## Detection Alerts / Queries

- Root-access attempts
- Steals secrets from Secrets Manager
- Sign-in errors via AWS Console
- Bruteforce `AssumeRole` attempts
- CloudTrail tampering
- GuardDuty tampering
- Listing buckets
- Get S3 objects via web browser

In [34]:
DETECTION_ALERTS = {}

### Root-access attempts
- Tactic: Priviledge Escalation (TA0004)
- Technique: Valid Accounts (T1078)

In [35]:
query = (
    data.lazy()
    .filter(
        (pl.col("eventName") == "ConsoleLogin") &
        (pl.col("userIdentity").str.contains("Root"))
    )
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0004.T1078.root"] = result
result

window_id
str


### Sign-in errors via AWS Console
- Tactic: Credential Access (TA0006)
- Technique: Brute Force (T11100)

In [36]:
query = (
    data.lazy()
    .filter(
        (pl.col("eventSource") == "signin.amazonaws.com") &
        (pl.col("eventName") == "ConsoleLogin") &
        (pl.col("responseElements").str.to_lowercase().str.contains("fail"))
    )
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0006.T11100.sign_in_errors"] = result
result

window_id
str


### Secrets from Secrets Manager
- Tactic: Credential Access (TA0006)
- Technique: Steal Application Access Token (T1528)

In [37]:
query = (
    data.lazy()
    .filter(pl.col("eventName") == "GetSecretValue")
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0006.T1528.secrets_manager"] = result
result

window_id
str


### Bruteforce `AssumeRole` attempts
- Tactic: Credential Access (TA0006)
- Tactic: Brute Force (T1110)

In [38]:
event_blacklist =  [
    "AssumeRole",
    "AssumeRoleWithSAML",
    "AssumeRoleWithWebIdentity"
]
query = (
    data.lazy()
    .filter(
        (pl.col("eventSource").str.contains("sts.amazonaws.com")) &
        (pl.col("eventName").str.contains("|".join(event_blacklist))) &
        (pl.col("errorCode").str.contains("Denied"))
    )
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0006.T1110.brute_assume_role"] = result
result

window_id
str


### CloudTrail tampering
- Tactic: Defense Evasion (TA0005)
- Technique: Impair Defenses (T1562)

In [39]:
event_blacklist =  [
    "DeleteTrail",
    "StopLogging",
    "UpdateTrail"
]
query = (
    data.lazy()
    .filter(pl.col("eventName").str.contains("|".join(event_blacklist)))
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0005.T1562.cloudtrail"] = result
result

window_id
str


### GuardDuty tampering
- Tactic: Defense Evasion (TA0005)
- Technique: Impair Defenses (T1562)

In [40]:
event_blacklist =  [
    "DeleteDetector",
    "DeleteMembers",
    "DeletePublishingDestination",
    "DisassociateMembers",
    "DisassociateFromMasterAccount",
    "RemoveTargets",
    "StopMonitoringMembers"
]
query = (
    data.lazy()
    .filter(pl.col("eventName").str.contains("|".join(event_blacklist)))
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0005.T1562.guardduty"] = result
result

window_id
str


### List Buckets from non AWS IP
- Tactic: Discovery (TA0007)
- Technique: Cloud Storage Object Discovery (T1619)



In [41]:
event_blacklist =  [
    "ListBuckets",
    "GetBucketAcl",
    "GetBucketVersioning",
]
query = (
    data.lazy()
    .filter(
        (pl.col("entity_id").str.contains("Level6")) &  # TODO: Obviously not going to work in production
        (pl.col("userAgent").str.contains("kali")) &
        (pl.col("eventName").str.contains("|".join(event_blacklist)))
    )
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0007.T1619.list_buckets"] = result
result

window_id
str


## Exfiltrate S3 objects via web browser
- Tactic: Exfiltration (TA0010)
- Technique: Over Web Service (T1567)

In [42]:
event_blacklist =  [
    "PutBucketPolicy",
    "PutBucketAcl",
]
query = (
    data.lazy()
    .filter(
        (pl.col("entity_id").str.contains("Level6")) &  # TODO: Obviously not going to work in production
        (pl.col("eventName").str.contains("|".join(event_blacklist)))
    )
    .select("window_id")
    .unique()
)
result = query.collect(streaming=True)
DETECTION_ALERTS["TA0010.T1567.put_policy_acl"] = result
result

window_id
str


## Combine Detection Alerts

In [43]:
alerts = pl.concat([
    df.with_columns(pl.lit(rule_id).alias("rule_id"))
    for rule_id, df in DETECTION_ALERTS.items()
])
alerts

window_id,rule_id
str,str


In [44]:
# Export
alerts.write_parquet("../results/aws_flaws_2_alerts.parquet")

NameError: name 'results' is not defined

## Prepare (entity, time, log) Panel

In [28]:
DURATION = (
    pl.col("eventTime")
    .dt.cast_time_unit("ms")
    .diff()
    .mul(1 / 100)  # To seconds
    .fill_null(0)
    .cast(pl.Int32)
    .over("entity_id").alias("duration")
)
LOG_FIELDS = [
    "eventName",
    (
        pl.when(pl.col("userAgent").str.to_lowercase().str.contains("s3"))
        .then(pl.lit("s3"))
        .otherwise(pl.col("userAgent").str.split(".").list[0].str.replace_all("\\[", "").str.replace_all("\\]", ""))
        .alias("aws_service")
    ),
    "errorMessage"
]
LOG_FORMAT = (
    pl.when(pl.col("errorMessage").is_null())
    .then(pl.col("eventName"))
    .otherwise(
        pl.format(
            "{}: {}",
            pl.col("eventName"),
            pl.col("errorMessage")
        )
    )
    .alias("log")
)
panel = (
    data.select([
        "window_id",
        "entity_id",
        "index",
        "eventTime",
        DURATION,
        LOG_FORMAT,
        *LOG_FIELDS,
    ])
)
panel

window_id,entity_id,index,eventTime,duration,log,eventName,aws_service,errorMessage
str,str,i64,"datetime[μs, UTC]",i32,str,str,str,str
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",0,2017-02-12 19:57:06 UTC,0,"""ListBuckets""","""ListBuckets""","""s3""",
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",1,2017-02-12 19:59:10 UTC,1240,"""GetAccountPasswordPolicy: The Password Policy with domain name 811596193553 can…","""GetAccountPasswordPolicy""","""console""","""The Password Policy with domain name 811596193553 cannot be found."""
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",2,2017-02-12 19:59:10 UTC,0,"""GetAccountSummary""","""GetAccountSummary""","""console""",
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",3,2017-02-12 19:59:10 UTC,0,"""ListAccountAliases""","""ListAccountAliases""","""console""",
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",4,2017-02-12 19:59:10 UTC,0,"""ListMFADevices""","""ListMFADevices""","""console""",
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",5,2017-02-12 19:59:10 UTC,0,"""ListAccessKeys""","""ListAccessKeys""","""console""",
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",6,2017-02-12 19:59:10 UTC,0,"""ListAccessKeys""","""ListAccessKeys""","""console""",
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",7,2017-02-12 19:59:10 UTC,0,"""GetAccountPasswordPolicy: The Password Policy with domain name 811596193553 can…","""GetAccountPasswordPolicy""","""console""","""The Password Policy with domain name 811596193553 cannot be found."""
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",8,2017-02-12 19:59:10 UTC,0,"""GetAccountSummary""","""GetAccountSummary""","""console""",
"""arn:aws:iam::811596193553:root__2017-02-12 19:57:06.000000""","""arn:aws:iam::811596193553:root""",9,2017-02-12 19:59:10 UTC,0,"""ListAccountAliases""","""ListAccountAliases""","""console""",


In [29]:
# Check AWS services
panel.get_column("aws_service").unique().sort()

aws_service
str
""
""""""
"""3Hub/1"""
"""APN/1"""
"""AWS Console Config, aws-internal/3"""
"""AWS Console Lambda, aws-internal/3"""
"""AWS Internal"""
"""AWS Organizations Console, aws-internal/3"""
"""AWS Organizations Console, aws-internal/3 aws-sdk-java/1"""
"""AWS-SupportCenterConsole, aws-internal/3"""


In [30]:
# Export
panel.write_parquet("../results/aws_flaws_2_panel.parquet")

## Cleanup Resources

In [31]:
ray.shutdown()