In [None]:
from pydantic import BaseModel, create_model
import json

# Data validation against schema
from jsonschema import validate
from jsonschema.exceptions import ValidationError

In [None]:
#### Azure AD Data
azure_ad_data = None
with open("../data/caesars/2024-11-20_azure_ad.json", 'r') as f:
    azure_ad_data = json.load(f)

In [None]:
okta_data = None
# Okta data
with open("../data/caesars/okta/2024-11-20_okta_logs_1.json", 'r') as f:
    okta_data = json.load(f)

In [None]:
okta_data[:2]

In [None]:
from typing import Any, Optional, List, Dict, get_origin, get_args, Iterable
from pydantic import BaseModel, create_model

def infer_type(values: List[Any]):
    types = {type(v) for v in values if v is not None}
    nullable = any(v is None for v in values)

    if types.issubset({int}):
        base = int
    elif types.issubset({float, int}):
        base = float
    elif types.issubset({str}):
        base = str
    elif types and all(isinstance(v, dict) for v in values if v is not None):
        base = dict
    elif types and all(isinstance(v, list) for v in values if v is not None):
        base = list
    else:
        base = Any

    if nullable:
        return Optional[base]
    return base

def build_model_from_samples(name: str, samples: List[dict]) -> BaseModel:
    # gather all keys
    all_keys = set().union(*samples)
    fields = {}

    for key in all_keys:
        vals = [d.get(key) for d in samples]
        required = all(d.get(key) is not None for d in samples)

        # nested dict/list handling
        if all(isinstance(v, dict) or v is None for v in vals):
            sub_samples = [v for v in vals if isinstance(v, dict)]
            sub_model = build_model_from_samples(f"{name}_{key}", sub_samples) if sub_samples else Any
            typ = Optional[sub_model] if not required else sub_model
        elif all(isinstance(v, list) or v is None for v in vals):
            inner = [item for v in vals if isinstance(v, list) for item in v]
            elem_type = infer_type(inner) if inner else Any
            typ = List[elem_type]
            if not required:
                typ = Optional[typ]
        else:
            typ = infer_type(vals)

        default = ... if required else None
        fields[key] = (typ, default)

    return create_model(name, **fields)


def dict_with_most_keys(dicts: Iterable[Dict[Any, Any]]) -> Dict[Any, Any]:
    """
    Returns the dictionary from the iterable that has the most keys.
    If multiple dictionaries are tied, returns the first one encountered.
    Raises ValueError if the iterable is empty.
    """
    try:
        # `max` with key=len chooses the dict with the largest number of keys
        return max(dicts, key=len)
    except ValueError as e:
        # This will occur if dicts is empty
        raise ValueError("The input iterable must contain at least one dictionary") from e


In [None]:
#for item in okta_data:
 #   validated = MyModel.model_validate(item)  # Pydantic v2
  #  print(validated)

In [None]:
okta_model = build_model_from_samples("OktaSystemLogs", okta_data)
ad_model = build_model_from_samples("ActiveDirectoryAzureUsers", azure_ad_data['users'])



In [None]:
ad_model.model_json_schema()

In [None]:

def make_model_from_dict(name: str, sample: dict) -> BaseModel:
    fields = {}
    for k, v in sample.items():
        if isinstance(v, dict):
            sub = make_model_from_dict(f"{name}_{k}", v)
            fields[k] = (sub, ...)
        elif isinstance(v, list) and v and isinstance(v[0], dict):
            sub = make_model_from_dict(f"{name}_{k}", v[0])
            from typing import List
            fields[k] = (List[sub], ...)
        else:
            fields[k] = (type(v), ...)
    return create_model(name, **fields)

# Example usage with your data:
# item = {
#     "actor": {"id": "00uiqmz...", "multiFactorMode": "1FA"}
# }
DynamicModel = make_model_from_dict("DynamicModel", okta_data[0])
# instance = DynamicModel(**okta_data[0])

In [None]:
DynamicModel.model_json_schema()

In [None]:
okta_data[0]

In [None]:
from datamodel_code_generator import InputFileType, generate
from pathlib import Path
from tempfile import TemporaryDirectory
from datamodel_code_generator import DataModelType
# o_path = "../data/test/"
# Load your JSON or dict
json_string = json.dumps(azure_ad_data)
# sample = Path("../data/caesars/okta/2024-11-20_okta_logs_1.json").read_text()
# output = Path(o_path / "model.py")
with TemporaryDirectory() as temporary_directory_name:
    temporary_directory = Path(temporary_directory_name)
    output = Path(temporary_directory / "model.py")
    generate(
        json_string,
        input_file_type=InputFileType.Json,
        input_filename="2024-11-20_azure_ad.json",
        output=output,
        # set up the output model types
        output_model_type=DataModelType.PydanticV2BaseModel,
    )
    model: str = output.read_text()
print(model)

In [None]:
import copy
import jmespath
from collections.abc import Mapping, Sequence
from typing import Any, Dict, List, Tuple, Union
from traceback import format_exc
JSON = Union[Dict[str, Any], List[Any]]
from typing import Any, Dict, List, Union

Primitive = Union[str, int, float, bool, None]
# JSONStructure = Union[Primitive, Dict[str, Any], List[Any]]

# Hardcoded mapping from keys to context values
# KEY_CONTEXT_MAP: Dict[str, Any] = az_schema_context

def annotate_with_jmes_paths(
    data: JSON
) -> Tuple[JSON, List[str]]:
    """
    Deepcopy the input (dict or list of dicts), then:
      - Traverse nested dicts/lists recursively.
      - For each dict encountered, collect JMESPaths for all its primitive leaves.
      - Add 'jmes_paths' key with that list.
    Returns (annotated_clone, flat_list_of_paths).
    """
    cloned = copy.deepcopy(data)
    all_paths: List[str] = []

    def recurse(obj: Any, path_parts: List[str]) -> List[str]:
        """
        Returns list of JMESPaths for primitive leaves under this object.
        Also updates nested dicts to include 'jmes_paths'.
        """
        paths_here: List[str] = []

        if isinstance(obj, Mapping):
            for key, value in obj.items():
                # if key != "context":
                child_parts = path_parts + [key.replace("@", "_atSymbol_")]
                # Recursively gather for nested structures
                if isinstance(value, Mapping) or (isinstance(value, Sequence) and not isinstance(value, (str, bytes))):
                    child_paths = recurse(value, child_parts)
                    paths_here.extend(child_paths)
                else:
                    # Primitive leaf: build path, validate, record
                    expr = ".".join(child_parts).replace(".[", "[")
                    try:
                        jmespath.compile(expr)
                    except jmespath.exceptions.JMESPathError as exc:
                        raise ValueError(f"Invalid JMESPath '{expr}': {exc}") from exc
                    paths_here.append(expr)
                    for k, v in KEY_CONTEXT_MAP.items():
                        if k == key:
                            obj["context"] = v
            if paths_here:
                obj["jmes_paths"] = paths_here.copy()
        elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
            for idx, item in enumerate(obj):
                child_paths = recurse(item, path_parts + [f"[{idx}]"])
                paths_here.extend(child_paths)

        return paths_here

    if isinstance(cloned, list):
        for item in cloned:
            all_paths.extend(recurse(item, []))
    else:
        all_paths.extend(recurse(cloned, []))

    return cloned, all_paths

def validate_paths_work(
    original: JSON, annotated: JSON
) -> Tuple[bool, List[Tuple[str, Any, Any]]]:
    """
    Checks each path in 'jmes_paths' of each dict in `annotated`.
    Confirms that jmespath.search(path, original_data) == fetched value.
    Returns (all_ok: bool, failures: List[(path, expected, actual)]).
    """
    failures: List[Tuple[str, Any, Any]] = []

    def recurse(obj: Any):
        if isinstance(obj, Mapping):
            current_keys = obj.keys()
            for key, value in obj.items():

                if key == "jmes_paths":
                    if "@" in key:
                        key = key.replace("@", "_atSymbol_")
                        field_name = field_name.replace("@", "_atSymbol_")
                    for path in value:
                        field_name = path.split(".")[-1]
                        try:
                            # expected = jmespath.search(path, original)
                            # if "@" in field_name:
                            #     field_name.replace("@", "_atSymbol_")
                            actual = jmespath.search(path.replace("@", "_atSymbol_"), annotated)
                            # print(actual)
                            # print(obj[field_name])
                            if obj.get(field_name):

                                if actual != obj.get(field_name):
                                    if actual is not None:
                                        failures.append((path, obj[field_name], actual))
                            # if expected != actual:
                            #     failures.append((path, expected, actual))
                        except KeyError:
                            print(f"key not found, skipping: {path} \n{format_exc()}")
                            # print(obj)
                            continue
                        except jmespath.exceptions.JMESPathError as exc:
                            failures.append((path,obj.get(field_name), None))
                            print(f"Invalid JMESPath '{path}': {exc}")
                            continue
                        except Exception as e:
                            failures.append((path,obj.get(field_name), None))
                            print(f"Error: '{path}': {e}\n{format_exc()}")
                            continue
                else:
                    recurse(value)
        elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
            for item in obj:
                recurse(item)

    recurse(annotated)
    return (len(failures) == 0, failures)


In [None]:
okta_data_jmes, okta_paths = annotate_with_jmes_paths(okta_data[:20])

In [None]:
success_bool, fail_report = validate_paths_work(okta_data[:20], okta_data_jmes)

In [None]:
fail_report

In [None]:
okta_data_jmes[0]

In [None]:
azure_data_jmes, az_paths = annotate_with_jmes_paths(azure_ad_data["users"][:20])


In [None]:
print(json.dumps(azure_data_jmes[0], indent=4))

In [None]:
az_success_bool, az_fail_report = validate_paths_work(azure_ad_data["users"][:20], azure_data_jmes)


In [None]:
az_fail_report

In [None]:
azure_ad_data[0]

#### Okta Data Models
These are the extracted data models from the Okta system logs data from caesars.

In [None]:
# generated by datamodel-codegen:
#   filename:  2024-11-20_okta_logs_1.json
#   timestamp: 2025-06-16T14:12:39+00:00

from __future__ import annotations

from typing import List, Optional

from pydantic import BaseModel, RootModel

class JMESPaths(BaseModel):
    jmes_path: list[str]

class Actor(BaseModel):
    id: str
    type: str
    alternateId: str
    displayName: str
    detailEntry: None
    jmes_path: JMESPaths


class UserAgent(BaseModel):
    rawUserAgent: str
    os: str
    browser: str
    jmes_path: JMESPaths

class Geolocation(BaseModel):
    lat: float
    lon: float
    jmes_path: JMESPaths

class GeographicalContext(BaseModel):
    city: Optional[str] = None
    state: Optional[str] = None
    country: str
    postalCode: Optional[str] = None
    geolocation: Geolocation
    jmes_path: JMESPaths


class Client(BaseModel):
    userAgent: UserAgent
    zone: str
    device: str
    id: Optional[str] = None
    ipAddress: str
    geographicalContext: GeographicalContext
    jmes_path: JMESPaths

class Device(BaseModel):
    id: str
    name: str
    os_platform: str
    os_version: str
    managed: bool
    registered: bool
    device_integrator: Optional[str] = None
    disk_encryption_type: str
    screen_lock_type: str
    jailbreak: Optional[bool] = None
    secure_hardware_present: bool
    jmes_path: JMESPaths

class AuthenticationContext(BaseModel):
    authenticationProvider: Optional[str] = None
    credentialProvider: Optional[str] = None
    credentialType: Optional[str] = None
    issuer: None
    interface: None
    authenticationStep: int
    rootSessionId: str
    externalSessionId: str
    jmes_path: JMESPaths

class Outcome(BaseModel):
    result: str
    reason: Optional[str] = None
    jmes_path: JMESPaths

class SecurityContext(BaseModel):
    asNumber: Optional[int] = None
    asOrg: Optional[str] = None
    isp: Optional[str] = None
    domain: Optional[str] = None
    isProxy: bool
    jmes_path: JMESPaths

class DebugData(BaseModel):
    authnRequestId: Optional[str] = None
    requestId: str
    authMethodFirstVerificationTime: Optional[str] = None
    dtHash: Optional[str] = None
    authMethodFirstType: Optional[str] = None
    authMethodFirstEnrollment: Optional[str] = None
    requestUri: str
    threatSuspected: Optional[str] = None
    url: str
    logOnlySecurityData: Optional[str] = None
    authMethodSecondVerificationTime: Optional[str] = None
    authMethodSecondEnrollment: Optional[str] = None
    authMethodSecondType: Optional[str] = None
    redirectUri: Optional[str] = None
    behaviors: Optional[str] = None
    challengeAuthenticatorsList: Optional[str] = None
    risk: Optional[str] = None
    oktaUserAgentExtended: Optional[str] = None
    deviceFingerprint: Optional[str] = None
    origin: Optional[str] = None
    loginResult: Optional[str] = None
    traceId: Optional[str] = None
    factor: Optional[str] = None
    factorIntent: Optional[str] = None
    authenticatorMethodChallengeTime: Optional[str] = None
    userStatus: Optional[str] = None
    tunnels: Optional[str] = None
    pushOnlyResponseType: Optional[str] = None
    pushWithNumberChallengeResponseType: Optional[str] = None
    authMethodThirdEnrollment: Optional[str] = None
    authMethodThirdType: Optional[str] = None
    authMethodThirdVerificationTime: Optional[str] = None
    threatDetections: Optional[str] = None
    authMethodFourthVerificationTime: Optional[str] = None
    authMethodFourthType: Optional[str] = None
    authMethodFourthEnrollment: Optional[str] = None
    keyTypeUsedForAuthentication: Optional[str] = None
    invalidToken: Optional[str] = None
    jmes_path: JMESPaths

class DebugContext(BaseModel):
    debugData: DebugData
    jmes_path: JMESPaths

class Detail(BaseModel):
    rootApiTokenId: Optional[str] = None
    requestApiTokenId: Optional[str] = None
    requestApiTokenClientId: Optional[str] = None
    jmes_path: JMESPaths

class Transaction(BaseModel):
    type: str
    id: str
    detail: Detail
    jmes_path: JMESPaths

class IpChainItem(BaseModel):
    ip: str
    geographicalContext: GeographicalContext
    version: str
    source: None
    jmes_path: JMESPaths

class Request(BaseModel):
    ipChain: List[IpChainItem]
    jmes_path: JMESPaths

class DetailEntry(BaseModel):
    signOnModeType: Optional[str] = None
    signOnModeEvaluationResult: Optional[str] = None
    policyRuleFactorMode: Optional[str] = None
    deviceIntegrator: Optional[str] = None
    osVersion: Optional[str] = None
    managed: Optional[str] = None
    osPlatform: Optional[str] = None
    tpmPresent: Optional[str] = None
    deviceStatus: Optional[str] = None
    methodTypeUsed: Optional[str] = None
    policyType: Optional[str] = None
    jmes_path: JMESPaths

class TargetItem(BaseModel):
    id: str
    type: str
    alternateId: str
    displayName: str
    detailEntry: Optional[DetailEntry] = None
    jmes_path: JMESPaths

class OktaItemModel(BaseModel):
    actor: Actor
    client: Client
    device: Optional[Device] = None
    authenticationContext: AuthenticationContext
    displayMessage: str
    eventType: str
    outcome: Outcome
    published: str
    securityContext: SecurityContext
    severity: str
    debugContext: DebugContext
    legacyEventType: Optional[str] = None
    transaction: Transaction
    uuid: str
    version: str
    request: Request
    target: Optional[List[TargetItem]] = None
    jmes_path: JMESPaths

class OktaSysLogModel(RootModel[List[OktaItemModel]]):
    root: List[OktaItemModel]



In [None]:
okta_data_model = OktaSysLogModel(okta_data)

In [None]:
okta_data_model.model_json_schema()

### Azure AD Data Models
Data model for Azure AD data categories:
- Users
-

In [None]:
# generated by datamodel-codegen:
#   filename:  2024-11-20_azure_ad.json
#   timestamp: 2025-06-16T14:15:16+00:00

from __future__ import annotations

from typing import List, Optional

from pydantic import BaseModel, Field


class Device(BaseModel):
    id: str
    displayName: str
    operatingSystem: str
    operatingSystemVersion: Optional[str] = None
    profileType: Optional[str] = None
    accountEnabled: bool
    approximateLastSignInDateTime: Optional[str] = None
    createdDateTime: str
    deviceId: str
    isManaged: Optional[bool] = None
    onPremisesSyncEnabled: Optional[bool] = None
    registrationDateTime: Optional[str] = None
    trustType: Optional[str] = None
    deviceVersion: Optional[int] = None
    deviceOwnership: Optional[str] = None
    enrollmentType: Optional[str] = None
    isCompliant: Optional[bool] = None
    isRooted: Optional[bool] = None
    managementType: Optional[str] = None
    manufacturer: Optional[str] = None
    mdmAppId: Optional[str] = None
    model: Optional[str] = None
    hostnames: Optional[List[str]] = None
    systemLabels: Optional[List[str]] = None
    enrollmentProfileName: Optional[str] = None

class DeviceEndpoint(BaseModel):
    displayName: str
    operatingSystem: Optional[str] = None
    operatingSystemVersion: Optional[str] = None
    accountEnabled: bool
    approximateLastSignInDateTime: Optional[str] = None
    createdDateTime: str
    deviceId: str
    deviceVersion: Optional[int] = None
    registrationDateTime: Optional[str] = None
    trustType: Optional[str] = None
    id: str
    registeredOwners_delta: Optional[List[RegisteredOwnersDeltaItem]] = Field(
        None, alias='registeredOwners@delta'
    )
    registeredUsers_delta: Optional[List[RegisteredUsersDeltaItem]] = Field(
        None, alias='registeredUsers@delta'
    )
    profileType: Optional[str] = None
    onPremisesSyncEnabled: Optional[bool] = None
    hostnames: Optional[List[str]] = None
    isCompliant: Optional[bool] = None
    deviceOwnership: Optional[str] = None
    enrollmentType: Optional[str] = None
    isManaged: Optional[bool] = None
    isRooted: Optional[bool] = None
    managementType: Optional[str] = None
    manufacturer: Optional[str] = None
    mdmAppId: Optional[str] = None
    model: Optional[str] = None
    systemLabels: Optional[List[str]] = None
    enrollmentProfileName: Optional[str] = None


class ManagerDeltaItem(BaseModel):
    field_odata_type: str = Field(..., alias='@odata.type')
    id: str


class Identity(BaseModel):
    signInType: str
    issuer: str
    issuerAssignedId: Optional[str] = None


class User(BaseModel):
    displayName: Optional[str] = None
    mailNickname: Optional[str] = None
    givenName: Optional[str] = None
    surname: Optional[str] = None
    mail: Optional[str] = None
    proxyAddresses: Optional[List[str]] = None
    userType: Optional[str] = None
    accountEnabled: Optional[bool] = None
    onPremisesDistinguishedName: Optional[str] = None
    userPrincipalName: Optional[str] = None
    onPremisesSamAccountName: Optional[str] = None
    id: str
    transitiveMemberOf: List[str]
    devices: List[Device]
    jobTitle: Optional[str] = None
    department: Optional[str] = None
    companyName: Optional[str] = None
    manager_delta: Optional[List[ManagerDeltaItem]] = Field(None, alias='manager@delta')
    manager: Optional[str] = None
    passwordPolicies: Optional[str] = None
    city: Optional[str] = None
    state: Optional[str] = None
    streetAddress: Optional[str] = None
    country: Optional[str] = None
    otherMails: Optional[List[str]] = None
    identities: Optional[List[Identity]] = None
    employeeType: Optional[str] = None
    deletedDateTime: None = None


class MembersDeltaItem(BaseModel):
    field_odata_type: str = Field(..., alias='@odata.type')
    id: str


class Group(BaseModel):
    createdDateTime: Optional[str] = None
    displayName: Optional[str] = None
    mailEnabled: Optional[bool] = None
    mailNickname: Optional[str] = None
    onPremisesDomainName: Optional[str] = None
    onPremisesSecurityIdentifier: Optional[str] = None
    onPremisesSyncEnabled: Optional[bool] = None
    renewedDateTime: Optional[str] = None
    securityEnabled: Optional[bool] = None
    securityIdentifier: Optional[str] = None
    id: str
    description: Optional[str] = None
    members_delta: Optional[List[MembersDeltaItem]] = Field(None, alias='members@delta')
    mail: Optional[str] = None
    proxyAddresses: Optional[List[str]] = None
    groupTypes: Optional[List[str]] = None
    deletedDateTime: None = None
    createdByAppId: Optional[str] = None


class RegisteredOwnersDeltaItem(BaseModel):
    field_odata_type: str = Field(..., alias='@odata.type')
    id: str


class RegisteredUsersDeltaItem(BaseModel):
    field_odata_type: str = Field(..., alias='@odata.type')
    id: str

class DeltaUrls(BaseModel):
    users: str
    groups: str
    devices: str


class Model(BaseModel):
    users: List[User]
    groups: List[Group]
    devices: List[DeviceEndpoint]
    delta_urls: DeltaUrls



In [None]:
print(json.dumps(azure_ad_data[0], indent=4))


In [None]:
az_schema_context = {}
with open('../data/data-definitions/ad_azure_user_data.json', 'r') as f:
    az_schema_context=json.load(f)


In [None]:
az_schema_context

In [None]:
sensitive_properties = {
    "accountEnabled": "Disable or enable users - requires administrative privileges",
    "businessPhones": "Update business phone - sensitive action",
    "mobilePhone": "Update mobile phone - sensitive action",
    "onPremisesImmutableId": "Update on-premises immutable ID - sensitive action",
    "otherMails": "Update other emails - sensitive action",
    "passwordProfile": "Update password profile - sensitive action",
    "userPrincipalName": "Update user principal name - sensitive action"
}


In [None]:
from typing import Any, Dict, List, Union

Primitive = Union[str, int, float, bool, None]
JSONStructure = Union[Primitive, Dict[str, Any], List[Any]]

# Hardcoded mapping from keys to context values
KEY_CONTEXT_MAP: Dict[str, Any] = az_schema_context

def enrich_with_context(
    data: List[JSONStructure], *,
    key_map: Dict[str, Any] = KEY_CONTEXT_MAP
) -> None:
    """
    Recursively traverses list-of-structures, adding a 'context' field
    to each dictionary based on key_map, or None if no key matches.
    Modifies dictionaries in-place.
    """
    def process_dict(d: Dict[str, Any]) -> None:
        # Every dict gets a 'context' field
        d["context"] = None

        # Iterate keys snapshot to avoid runtime modification issues
        for k, v in list(d.items()):
            for n, m in list(key_map.items()):
                # Skip newly added 'context'
                if k == n:
                    d["context"] = m

            # # If key is in the mapping, set context to its corresponding value
            # if k in key_map:
            #     d["context"] = key_map[k]

            # Recurse into nested dict
            if isinstance(v, dict):
                process_dict(v)
            # Recurse into nested list
            elif isinstance(v, list):
                process_list(v)
            # Primitives do not require further processing

    def process_list(lst: List[Any]) -> None:
        for idx, elem in enumerate(lst):
            if isinstance(elem, dict):
                process_dict(elem)
            elif isinstance(elem, list):
                process_list(elem)
            # primitives are ignored

    # Expect top-level data to be a list
    process_list(data)


In [None]:
t_data = azure_ad_data["users"][:20]
enrich_with_context(t_data)

In [None]:
t_data[0]

#### Azure User data schema & context

- Mail and proxyAddresses are both email-related properties. The proxyAddresses property is a collection of addresses only relevant to the Microsoft Exchange server. It's used to store a list of mail addresses for a user that are tied to a single mailbox. The mail property is used as the user's email address for various purposes including user sign-in and defines the primary proxy address.
- following properties aren't supported in personal Microsoft accounts and will be null: aboutMe, birthday, interests, mySite, pastProjects, preferredName, responsibilities, schools, skills, streetAddress.
- The following properties are only supported when retrieving a single user: aboutMe, birthday, hireDate, interests, mySite, pastProjects, preferredName, responsibilities, schools, skills, mailboxSettings.

Relationships:
Events can be obtained from the `/events` endpoint:
An event in a user calendar, or the default calendar of a Microsoft 365 group. The user's events. Default is to show Events under the Default Calendar. Read-only. Nullable.
- https://learn.microsoft.com/en-us/graph/api/resources/event?view=graph-rest-1.0

- Sources:
- https://learn.microsoft.com/en-us/graph/api/resources/user?view=graph-rest-1.0#properties
- https://learn.microsoft.com/pdf?url=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fgraph%2Fapi%2Ftoc.json%3Fview%3Dgraph-rest-1.0
- https://learn.microsoft.com/en-us/azure/active-directory/hybrid/reference-connect-sync-attributes-synchronized
- https://learn.microsoft.com/en-us/graph/api/user-get
- https://learn.microsoft.com/en-us/exchange/recipients/mailbox-custom-attributes
- https://learn.microsoft.com/en-us/graph/api/resources/licenseassignmentstate

In [None]:
identity_auth = {
  "id": {
    "type": "string",
    "required": True,
    "readonly": True,
    "description": "The unique identifier for the user. Should be treated as an opaque identifier. Key. Not nullable. Read-only",
    "context": "Primary key for user identification across all Microsoft services",
    "usage": "System-generated GUID used for all API operations and references"
  },
  "userPrincipalName": {
    "type": "string",
    "required": True,
    "description": "The user principal name (UPN) of the user. Required when a user is created",
    "context": "Primary sign-in identifier in the format user@domain.com",
    "usage": "Used for authentication and as the primary identity across Microsoft 365 services"
  },
  "accountEnabled": {
    "type": "boolean",
    "required": True,
    "description": "True if the account is enabled; otherwise, False. Required when a user is created",
    "context": "Controls whether the user can sign in and access services",
    "usage": "Administrative control for user access management and account lifecycle"
  },
  "identities": {
    "type": "array",
    "description": "Represents the identities that can be used to sign in to this user account",
    "context": "Supports multiple authentication methods for B2B/B2C scenarios",
    "usage": "Enables federated authentication and social identity providers"
  }
}
user_information = {
  "displayName": {
    "type": "string",
    "required": True,
    "maxLength": 256,
    "description": "The name displayed in the address book for the user. Required when a user is created. Maximum length is 256 characters",
    "context": "User's full name as shown in directory listings and communications",
    "usage": "Primary display name in applications, email clients, and user interfaces"
  },
  "givenName": {
    "type": "string",
    "maxLength": 64,
    "description": "The given name (first name) of the user. Maximum length is 64 characters",
    "context": "User's first name for personalization and formal communications",
    "usage": "Used in greetings, reports, and name-based sorting or filtering"
  },
  "surname": {
    "type": "string",
    "maxLength": 64,
    "description": "The user's surname (family name or last name). Maximum length is 64 characters",
    "context": "User's family name for identification and directory organization",
    "usage": "Used for alphabetical sorting, formal addressing, and name searches"
  },
  "preferredName": {
    "type": "string",
    "readonly": True,
    "description": "The preferred name for the user. Not Supported. This attribute returns an empty string",
    "context": "Intended for nickname or preferred display name (currently deprecated)",
    "usage": "Reserved for future use - currently returns empty string"
  },
  "birthday": {
    "type": "string",
    "format": "ISO 8601",
    "description": "The birthday of the user in ISO 8601 format",
    "context": "Personal information for HR records and compliance purposes",
    "usage": "Used for age verification, compliance reporting, and optional personalization"
  },
  "ageGroup": {
    "type": "string",
    "enum": ["null", "Minor", "NotAdult", "Adult"],
    "description": "Sets the age group of the user. Allowed values: null, Minor, NotAdult, and Adult",
    "context": "Legal classification for compliance with age-related regulations",
    "usage": "Determines feature availability and consent requirements for minors"
  }
}
contact_information = {
  "mail": {
    "type": "string",
    "description": "The SMTP address for the user, for example, jeff@contoso.com",
    "context": "Primary email address for external and internal communications",
    "usage": "Used for email routing, notifications, and as secondary identifier"
  },
  "mailNickname": {
    "type": "string",
    "required": True,
    "maxLength": 64,
    "description": "The mail alias for the user. Required when a user is created. Maximum length is 64 characters",
    "context": "Email alias used in Exchange and mail routing",
    "usage": "Creates email addresses and distribution lists within the organization"
  },
  "otherMails": {
    "type": "array",
    "maxItems": 250,
    "itemMaxLength": 250,
    "description": "A list of other email addresses for the user. Can store up to 250 values, each with a limit of 250 characters",
    "context": "Additional email addresses for forwarding and communication",
    "usage": "Supports multiple email identities and forwarding scenarios"
  },
  "businessPhones": {
    "type": "array",
    "description": "The telephone numbers for the user. Only one number can be set for this property",
    "context": "Primary business contact number for professional communications",
    "usage": "Used in directory listings, contact cards, and emergency communications"
  },
  "mobilePhone": {
    "type": "string",
    "maxLength": 64,
    "description": "The primary cellular telephone number for the user. Maximum length is 64 characters",
    "context": "Mobile contact for urgent communications and multi-factor authentication",
    "usage": "Used for SMS notifications, MFA, and mobile contact scenarios"
  },
  "faxNumber": {
    "type": "string",
    "description": "The fax number of the user",
    "context": "Legacy communication method for formal document transmission",
    "usage": "Maintained for compliance and traditional business communication needs"
  }
}
address_location_information = {
  "streetAddress": {
    "type": "string",
    "maxLength": 1024,
    "description": "The street address of the user's place of business. Maximum length is 1,024 characters",
    "context": "Physical business address for mail delivery and location services",
    "usage": "Used for shipping, location-based services, and directory information"
  },
  "city": {
    "type": "string",
    "maxLength": 128,
    "description": "The city where the user is located. Maximum length is 128 characters",
    "context": "User's business city location for geographic organization",
    "usage": "Used for location-based filtering, regional policies, and directory searches"
  },
  "state": {
    "type": "string",
    "maxLength": 128,
    "description": "The state or province in the user's address. Maximum length is 128 characters",
    "context": "State or province information for regional compliance and organization",
    "usage": "Used for tax purposes, regional policies, and geographic reporting"
  },
  "postalCode": {
    "type": "string",
    "maxLength": 40,
    "description": "The postal code for the user's postal address. Maximum length is 40 characters",
    "context": "Postal code for mail delivery and location-based services",
    "usage": "Used for shipping calculations, regional grouping, and address validation"
  },
  "country": {
    "type": "string",
    "maxLength": 128,
    "description": "The country or region where the user is located. Maximum length is 128 characters",
    "context": "User's country for compliance, localization, and regulatory purposes",
    "usage": "Determines data residency, compliance requirements, and regional features"
  },
  "officeLocation": {
    "type": "string",
    "description": "The office location in the user's place of business",
    "context": "Specific office or building location within the organization",
    "usage": "Used for meeting scheduling, resource allocation, and physical security"
  }
}
employment_organization_information = {
  "jobTitle": {
    "type": "string",
    "maxLength": 128,
    "description": "The user's job title. Maximum length is 128 characters",
    "context": "Professional role designation for organizational hierarchy",
    "usage": "Used in org charts, business cards, and role-based access control"
  },
  "department": {
    "type": "string",
    "maxLength": 64,
    "description": "The name of the department in which the user works. Maximum length is 64 characters",
    "context": "Organizational unit for reporting and resource management",
    "usage": "Used for organizational reporting, policy application, and team grouping"
  },
  "companyName": {
    "type": "string",
    "maxLength": 64,
    "description": "The name of the company that the user is associated with. Maximum length is 64 characters",
    "context": "Organization name for multi-tenant or partner scenarios",
    "usage": "Used for branding, external communications, and tenant identification"
  },
  "employeeId": {
    "type": "string",
    "maxLength": 16,
    "description": "The employee identifier assigned to the user by the organization. Maximum length is 16 characters",
    "context": "HR system identifier for employee record correlation",
    "usage": "Links to payroll, HR systems, and employee lifecycle management"
  },
  "employeeType": {
    "type": "string",
    "description": "Captures enterprise worker type. For example, Employee, Contractor, Consultant, or Vendor",
    "context": "Classification of employment relationship type",
    "usage": "Determines access levels, policies, and compliance requirements"
  },
  "employeeHireDate": {
    "type": "string",
    "format": "ISO 8601",
    "description": "The date and time when the user was hired or will start work in a future hire",
    "context": "Employment start date for tenure calculations and lifecycle management",
    "usage": "Used for anniversary tracking, benefit eligibility, and access provisioning"
  },
  "employeeLeaveDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "description": "The date and time when the user left or will leave the organization",
    "context": "Employment end date for deprovisioning and access management",
    "usage": "Triggers access removal, data retention policies, and exit procedures"
  },
  "employeeOrgData": {
    "type": "object",
    "description": "Represents organization data (for example, division and costCenter) associated with a user",
    "context": "Extended organizational metadata for reporting and cost allocation",
    "usage": "Used for financial reporting, budget allocation, and organizational analytics"
  }
}
directory_sync = {
  "onPremisesDistinguishedName": {
    "type": "string",
    "readonly": True,
    "description": "Contains the on-premises Active Directory distinguished name or DN",
    "context": "Original AD path for hybrid identity scenarios",
    "usage": "Maintains link to on-premises AD object for synchronization"
  },
  "onPremisesDomainName": {
    "type": "string",
    "readonly": True,
    "description": "Contains the on-premises domainFQDN, also called dnsDomainName synchronized from the on-premises directory",
    "context": "Source domain for synchronized users in hybrid environments",
    "usage": "Identifies originating domain for trust and authentication decisions"
  },
  "onPremisesSamAccountName": {
    "type": "string",
    "readonly": True,
    "description": "Contains the on-premises samAccountName synchronized from the on-premises directory",
    "context": "Legacy Windows logon name from on-premises AD",
    "usage": "Maintains compatibility with legacy systems and applications"
  },
  "onPremisesUserPrincipalName": {
    "type": "string",
    "readonly": True,
    "description": "Contains the on-premises userPrincipalName synchronized from the on-premises directory",
    "context": "Original UPN from on-premises AD before cloud transformation",
    "usage": "Preserves original identity for migration and troubleshooting"
  },
  "onPremisesImmutableId": {
    "type": "string",
    "description": "Used to associate an on-premises Active Directory user account to their Microsoft Entra user object",
    "context": "Unique anchor attribute for hybrid identity synchronization",
    "usage": "Ensures consistent identity mapping between on-premises and cloud"
  },
  "onPremisesSecurityIdentifier": {
    "type": "string",
    "readonly": True,
    "description": "Contains the on-premises security identifier (SID) for the user that was synchronized from on-premises to the cloud",
    "context": "Windows security identifier for legacy system integration",
    "usage": "Maintains security context for Windows-based applications and resources"
  },
  "onPremisesSyncEnabled": {
    "type": "boolean",
    "readonly": True,
    "description": "True if this user object is currently being synced from an on-premises Active Directory",
    "context": "Indicates active synchronization status from on-premises",
    "usage": "Determines whether changes should be made in cloud or on-premises"
  },
  "onPremisesLastSyncDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "readonly": True,
    "description": "Indicates the last time at which the object was synced with the on-premises directory",
    "context": "Timestamp of most recent synchronization event",
    "usage": "Used for troubleshooting sync issues and monitoring sync health"
  },
  "onPremisesProvisioningErrors": {
    "type": "array",
    "readonly": True,
    "description": "Errors when using Microsoft synchronization product during provisioning",
    "context": "Synchronization error details for troubleshooting",
    "usage": "Identifies and resolves hybrid identity synchronization issues"
  },
  "onPremisesExtensionAttributes": {
    "type": "object",
    "description": "Contains extensionAttributes1-15 for the user. These extension attributes are also known as Exchange custom attributes 1-15",
    "context": "Custom attribute storage synchronized from Exchange on-premises",
    "usage": "Extends user schema with organization-specific data and metadata"
  }
}
licensing_provisioning = {
  "assignedLicenses": {
    "type": "array",
    "readonly": True,
    "description": "The licenses that are assigned to the user, including inherited (group-based) licenses",
    "context": "Current license assignments determining service access",
    "usage": "Controls feature availability and service entitlements"
  },
  "assignedPlans": {
    "type": "array",
    "readonly": True,
    "description": "The plans that are assigned to the user. Read-only",
    "context": "Detailed service plan assignments within licensed products",
    "usage": "Determines specific feature and service availability"
  },
  "licenseAssignmentStates": {
    "type": "array",
    "readonly": True,
    "description": "State of license assignments for this user. Read-only",
    "context": "Detailed status of each license assignment including errors",
    "usage": "Troubleshooting license assignment issues and compliance reporting"
  },
  "provisionedPlans": {
    "type": "array",
    "readonly": True,
    "description": "The plans that are provisioned for the user. Read-only. Not nullable",
    "context": "Successfully provisioned service plans and their status",
    "usage": "Confirms which services are active and available to the user"
  },
  "usageLocation": {
    "type": "string",
    "format": "ISO 3166",
    "description": "A two-letter country code (ISO standard 3166). Required for users that are assigned licenses",
    "context": "Legal jurisdiction for license compliance and data residency",
    "usage": "Determines which services can be assigned based on regional availability"
  },
  "serviceProvisioningErrors": {
    "type": "array",
    "readonly": True,
    "description": "Errors published by a federated service describing nontransient, service-specific errors",
    "context": "Service-level provisioning issues that require attention",
    "usage": "Identifies and resolves service provisioning problems"
  }
}
security_auth = {
  "passwordProfile": {
    "type": "object",
    "description": "Specifies the password profile for the user. Required when a user is created",
    "context": "Password settings and temporary password information",
    "usage": "Controls password requirements and initial password assignment"
  },
  "passwordPolicies": {
    "type": "string",
    "description": "Specifies password policies for the user. Can include DisableStrongPassword and DisablePasswordExpiration",
    "context": "Custom password policy overrides for specific user requirements",
    "usage": "Applies exception policies for service accounts or special circumstances"
  },
  "lastPasswordChangeDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "readonly": True,
    "description": "The time when this Microsoft Entra user last changed their password",
    "context": "Security audit information for password lifecycle management",
    "usage": "Used for password age policies and security compliance reporting"
  },
  "refreshTokensValidFromDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "description": "Any refresh tokens or session tokens issued before this time are invalid",
    "context": "Security boundary for token invalidation after security events",
    "usage": "Revokes all existing sessions and forces re-authentication"
  },
  "signInSessionsValidFromDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "description": "Any refresh tokens or session tokens issued before this time are invalid",
    "context": "Session security boundary for forced re-authentication",
    "usage": "Invalidates active sessions for security incidents or policy changes"
  },
  "signInActivity": {
    "type": "object",
    "readonly": True,
    "description": "Get the last signed-in date and request ID of the sign-in for a given user. Read-only",
    "context": "Audit trail of user authentication activity",
    "usage": "Security monitoring, compliance reporting, and inactive account identification"
  },
  "securityIdentifier": {
    "type": "string",
    "readonly": True,
    "description": "Security identifier (SID) of the user, used in Windows scenarios. Read-only",
    "context": "Windows security context identifier for legacy applications",
    "usage": "Maintains Windows security model compatibility"
  }
}
external_users_guests = {
  "userType": {
    "type": "string",
    "enum": ["Member", "Guest"],
    "description": "A string value that can be used to classify user types. Possible values are Member and Guest",
    "context": "Distinguishes internal employees from external guests and partners",
    "usage": "Applies different policies and access levels based on user classification"
  },
  "externalUserState": {
    "type": "string",
    "readonly": True,
    "description": "For a guest invited to the tenant, this property represents the invited user's invitation status",
    "context": "Status of B2B guest invitation workflow",
    "usage": "Tracks guest user onboarding and invitation acceptance"
  },
  "externalUserStateChangeDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "readonly": True,
    "description": "Shows the timestamp for the latest change to the externalUserState property",
    "context": "Audit trail for guest user status changes",
    "usage": "Monitors guest user lifecycle and invitation processing"
  },
  "consentProvidedForMinor": {
    "type": "string",
    "enum": ["null", "Granted", "Denied", "NotRequired"],
    "description": "Sets whether consent was obtained for minors. Allowed values: null, Granted, Denied, and NotRequired",
    "context": "Legal consent status for users under legal age requirements",
    "usage": "Ensures compliance with minor protection regulations and consent laws"
  },
  "legalAgeGroupClassification": {
    "type": "string",
    "readonly": True,
    "description": "Used by enterprise applications to determine the legal age group of the user. Read-only and calculated",
    "context": "Computed legal classification based on age and jurisdiction",
    "usage": "Automatically applies age-appropriate policies and feature restrictions"
  }
}
# The following properties are only supported when retrieving a single user: aboutMe, birthday, hireDate, interests, mySite, pastProjects, preferredName, responsibilities, schools, skills, mailboxSettings.
profile_personal_interest = {
  "aboutMe": {
    "type": "string",
    "description": "A freeform text entry field for the user to describe themselves",
    "context": "User's personal description for profile and networking purposes",
    "usage": "Used in user profiles, team sites, and social networking features"
  },
  "interests": {
    "type": "array",
    "description": "A list for the user to describe their interests",
    "context": "Personal interests for social features and team building",
    "usage": "Facilitates networking, team formation, and personalized experiences"
  },
  "pastProjects": {
    "type": "array",
    "description": "A list for the user to enumerate their past projects",
    "context": "Professional project history for expertise identification",
    "usage": "Used for project staffing, expertise location, and career development"
  },
  "responsibilities": {
    "type": "array",
    "description": "A list for the user to enumerate their responsibilities",
    "context": "Current job responsibilities for role clarity and accountability",
    "usage": "Helps with task assignment, delegation, and organizational clarity"
  },
  "skills": {
    "type": "array",
    "description": "A list for the user to enumerate their skills",
    "context": "Professional competencies for resource allocation and development",
    "usage": "Used for project assignments, training needs, and expertise searches"
  },
  "schools": {
    "type": "array",
    "description": "A list for the user to enumerate the schools they attended",
    "context": "Educational background for professional networking and verification",
    "usage": "Used for alumni networks, background verification, and professional profiles"
  }
}
system_medata = {
  "createdDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "readonly": True,
    "description": "The date and time the user was created, in ISO 8601 format and UTC",
    "context": "Account creation timestamp for lifecycle management",
    "usage": "Used for auditing, lifecycle policies, and account age calculations"
  },
  "deletedDateTime": {
    "type": "string",
    "format": "ISO 8601",
    "readonly": True,
    "description": "The date and time the user was deleted",
    "context": "Soft deletion timestamp for recovery and compliance",
    "usage": "Enables account recovery and maintains audit trail for deleted users"
  },
  "creationType": {
    "type": "string",
    "readonly": True,
    "description": "Indicates whether the user account was created through specific methods. Read-only",
    "context": "Method used for account creation (invitation, self-service, admin)",
    "usage": "Audit trail for compliance and understanding account provisioning methods"
  },
  "customSecurityAttributes": {
    "type": "object",
    "description": "An open complex type that holds the value of a custom security attribute assigned to a directory object",
    "context": "Extensible security metadata for fine-grained access control",
    "usage": "Enables advanced security policies and custom authorization scenarios"
  },
  "isResourceAccount": {
    "type": "boolean",
    "description": "Don't use – reserved for future use",
    "context": "Reserved flag for special account types (currently unused)",
    "usage": "Placeholder for future functionality - not currently implemented"
  }
}
communications_messaging = {
  "imAddresses": {
    "type": "array",
    "description": "The instant message voice-over IP (VOIP) session initiation protocol (SIP) addresses for the user",
    "context": "Instant messaging and VoIP contact information",
    "usage": "Used for Skype for Business, Teams, and other unified communications"
  },
  "mailboxSettings": {
    "type": "object",
    "readonly": True,
    "description": "Settings for the primary mailbox of the signed-in user",
    "context": "Exchange mailbox configuration and preferences",
    "usage": "Controls email behavior, timezone, language, and mailbox features"
  },
  "mySite": {
    "type": "string",
    "description": "The URL for the user's site",
    "context": "Personal SharePoint site URL for document storage and collaboration",
    "usage": "Provides access to user's personal workspace and file storage"
  },
  "proxyAddresses": {
    "type": "array",
    "description": "For example: ['SMTP: bob@contoso.com', 'smtp: bob@sales.contoso.com']. SMTP (capitalized) is the primary proxy address",
    "context": "All email addresses associated with the user's mailbox",
    "usage": "Email routing, alias management, and multi-domain scenarios"
  }
}
localization_preferences = {
  "preferredLanguage": {
    "type": "string",
    "format": "RFC 4646",
    "description": "The preferred language for the user. Format is based on RFC 4646",
    "context": "User's preferred language for UI and communications",
    "usage": "Localizes interface, emails, and content to user's preferred language"
  },
  "preferredDataLocation": {
    "type": "string",
    "description": "The preferred data location for the user",
    "context": "Data residency preference for compliance and performance",
    "usage": "Controls where user data is stored geographically"
  },
  "showInAddressList": {
    "type": "boolean",
    "description": "Do not use in Microsoft Graph. Manage this property through the Microsoft 365 admin center instead",
    "context": "Controls visibility in Exchange address lists (managed elsewhere)",
    "usage": "Determines if user appears in global address list searches"
  },
  "hireDate": {
    "type": "string",
    "format": "ISO 8601",
    "description": "The hire date of the user. Note: This property is specific to SharePoint in Microsoft 365",
    "context": "SharePoint-specific hire date for profile information",
    "usage": "Used in SharePoint profiles and may differ from employeeHireDate"
  }
}





In [None]:
identity_auth.keys()

In [None]:
security_auth.keys()

### Azure AD Device Data

```
GET https://graph.microsoft.com/v1.0/devices/{device-id}/registeredOwners
GET https://graph.microsoft.com/v1.0/devices/{device-id}/registeredUsers
GET https://graph.microsoft.com/v1.0/devices/{device-id}/memberOf
```

```
/users/{id}/ownedDevices – Lists devices owned by a user (registered in Azure AD)
/users/{id}/registeredDevices – Lists devices registered to a user (e.g., personal devices joined via Azure AD)
```
Use ?$expand=manager to include the manager property inline
Supported expanded properties: manager, directReports, memberOf


In [None]:
azure_ad_data["devices"][0]

In [None]:
t_device = dict_with_most_keys(azure_ad_data["devices"])
t_device

In [None]:
# These fields can be used for analysis for attack paths, compliance, management status/type, etc.
ad_security_compliance = ['profileType', 'accountEnabled', 'deviceOwnership', 'enrollmentProfileName', 'enrollmentType', 'isCompliant', 'isManaged', 'isRooted', 'managementType', 'trustType']

okta_debug_context = {'debugContext': {'debugData': {'logOnlySecurityData': '{"risk":{"level":"LOW"},"behaviors":{"New Geo-Location":"NEGATIVE","New Device":"NEGATIVE","New IP":"NEGATIVE","New State":"NEGATIVE","New Country":"NEGATIVE","Velocity":"NEGATIVE","New City":"NEGATIVE"}}'}}}

In [None]:
# AD Device Object Fields
core_device_properties = [
    {
        "field": "accountEnabled",
        "type": "Boolean",
        "description": "True if the account is enabled; otherwise, false. Required. Default is true.",
        "context": "Controls device authentication and access. Requires Cloud Device Administrator role to set.",
        "isRequired": True,
        "security_compliance": "Controls device authentication and access; disabling prevents device sign-in.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "No equivalent property in on-premises AD device objects."
        ]
    },
    {
        "field": "alternativeSecurityIds",
        "type": "Collection",
        "description": "For internal use only. Not nullable.",
        "context": "Used by Microsoft for device security and identification.",
        "isRequired": False,
        "security_compliance": "Internal security identifier for device validation.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "No direct equivalent in on-premises AD device objects."
        ]
    },
    {
        "field": "approximateLastSignInDateTime",
        "type": "DateTimeOffset",
        "description": "Timestamp of the last sign-in, in ISO 8601 UTC. Read-only.",
        "context": "Used for auditing and identifying stale devices.",
        "isRequired": False,
        "security_compliance": "Useful for auditing device usage and detecting inactive devices.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD device objects."
        ]
    },
    {
        "field": "complianceExpirationDateTime",
        "type": "DateTimeOffset",
        "description": "When the device is no longer deemed compliant. Read-only.",
        "context": "Used by Intune for compliance tracking and enforcement.",
        "isRequired": False,
        "security_compliance": "Tracks device compliance status for conditional access.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "deviceCategory",
        "type": "String",
        "description": "User-defined property set by Intune to group devices.",
        "context": "Simplifies device management and dynamic grouping.",
        "isRequired": False,
        "security_compliance": "Used for logical grouping, not direct security.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "deviceId",
        "type": "String",
        "description": "Unique identifier set at registration. Alternate key.",
        "context": "Used for referencing the device in API and management operations.",
        "isRequired": False,
        "security_compliance": "Primary identifier for device object; used in security policies.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "DeviceId is Azure AD concept; on-premises AD does not use this property for device objects."
        ]
    },
    {
        "field": "deviceMetadata",
        "type": "String",
        "description": "For internal use only. Set to null.",
        "context": "Reserved for Microsoft internal processes.",
        "isRequired": False,
        "security_compliance": "Not used for compliance or security.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Internal property, not available in on-premises AD."
        ]
    },
    {
        "field": "deviceOwnership",
        "type": "String",
        "description": "Ownership of the device. Possible values: unknown, company, personal.",
        "context": "Set by Intune to distinguish between corporate and personal devices.",
        "isRequired": False,
        "security_compliance": "Affects compliance and access policies.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "deviceVersion",
        "type": "Int32",
        "description": "For internal use only.",
        "context": "Reserved for Microsoft internal versioning.",
        "isRequired": False,
        "security_compliance": "Not relevant for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Internal property, not available in on-premises AD."
        ]
    },
    {
        "field": "displayName",
        "type": "String",
        "description": "Display name for the device (max 256 chars). Required.",
        "context": "Used in directory listings and device management UIs.",
        "isRequired": True,
        "security_compliance": "Used for identification and management.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/windows/win32/adschema/a-cn"
        ]
    },
    {
        "field": "enrollmentProfileName",
        "type": "String",
        "description": "Enrollment profile applied to the device.",
        "context": "Indicates how the device was enrolled, e.g., Apple Device Enrollment Profile.",
        "isRequired": False,
        "security_compliance": "Tracks enrollment method for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "enrollmentType",
        "type": "String",
        "description": "Enrollment type. Possible values: unknown, userEnrollment, etc.",
        "context": "Set by Intune to reflect the method of enrollment.",
        "isRequired": False,
        "security_compliance": "Used for compliance tracking.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "extensionAttributes",
        "type": "Object",
        "description": "Contains extension attributes 1-15 for the device.",
        "context": "Used for custom organization-specific metadata.",
        "isRequired": False,
        "security_compliance": "Can be used for compliance tagging.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/windows/win32/adschema/a-extensionattribute"
        ]
    },
    {
        "field": "id",
        "type": "String",
        "description": "Unique identifier for the device. Read-only.",
        "context": "Primary key for device objects, inherited from directoryObject.",
        "isRequired": False,
        "security_compliance": "System identifier; critical for audit and security.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Azure AD unique identifier; on-premises AD uses objectGUID."
        ]
    },
    {
        "field": "isCompliant",
        "type": "Boolean",
        "description": "True if compliant with MDM policies. Read-only.",
        "context": "Indicates compliance status for conditional access.",
        "isRequired": False,
        "security_compliance": "Key compliance status for device access control.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "isManaged",
        "type": "Boolean",
        "description": "True if managed by an MDM app.",
        "context": "Indicates if the device is under management (e.g., Intune).",
        "isRequired": False,
        "security_compliance": "Indicates management status for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "isRooted",
        "type": "Boolean",
        "description": "True if device is rooted or jail-broken.",
        "context": "Security risk indicator, set by Intune.",
        "isRequired": False,
        "security_compliance": "Critical for detecting compromised devices.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "managementType",
        "type": "String",
        "description": "Management channel of the device. Possible values: eas, mdm, etc.",
        "context": "Identifies how the device is managed (e.g., by Intune, JAMF, etc.).",
        "isRequired": False,
        "security_compliance": "Tracks device management for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "manufacturer",
        "type": "String",
        "description": "Manufacturer of the device. Read-only.",
        "context": "Used for inventory and reporting.",
        "isRequired": False,
        "security_compliance": "Useful for inventory, not direct compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD device objects."
        ]
    },
    {
        "field": "mdmAppId",
        "type": "String",
        "description": "App identifier used for MDM registration. Read-only.",
        "context": "Identifies the app used to enroll the device.",
        "isRequired": False,
        "security_compliance": "Tracks enrollment app for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "model",
        "type": "String",
        "description": "Model of the device. Read-only.",
        "context": "Useful for inventory and support.",
        "isRequired": False,
        "security_compliance": "Inventory only.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD device objects."
        ]
    },
    {
        "field": "onPremisesLastSyncDateTime",
        "type": "DateTimeOffset",
        "description": "Last sync time with on-premises directory. Read-only.",
        "context": "Used for hybrid environments to track sync status.",
        "isRequired": False,
        "security_compliance": "Tracks hybrid sync for compliance.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/azure/active-directory/hybrid/connect/whatis-azure-ad-connect"
        ]
    },
    {
        "field": "onPremisesSecurityIdentifier",
        "type": "String",
        "description": "On-premises security identifier (SID). Read-only.",
        "context": "Used for mapping to on-premises AD objects.",
        "isRequired": False,
        "security_compliance": "Critical for hybrid security mapping.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/windows/win32/adschema/a-objectsid"
        ]
    },
    {
        "field": "onPremisesSyncEnabled",
        "type": "Boolean",
        "description": "True if synced from on-premises directory. Read-only.",
        "context": "Indicates hybrid device status.",
        "isRequired": False,
        "security_compliance": "Tracks hybrid sync status.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/azure/active-directory/hybrid/connect/whatis-azure-ad-connect"
        ]
    },
    {
        "field": "operatingSystem",
        "type": "String",
        "description": "Type of operating system. Required.",
        "context": "Used for inventory, policy application, and reporting.",
        "isRequired": True,
        "security_compliance": "Key for compliance and policy targeting.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/windows/win32/adschema/a-operatingsystem"
        ]
    },
    {
        "field": "operatingSystemVersion",
        "type": "String",
        "description": "Version of the operating system. Required.",
        "context": "Used for compliance and update management.",
        "isRequired": True,
        "security_compliance": "Critical for patching and compliance.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/windows/win32/adschema/a-operatingsystemversion"
        ]
    },
    {
        "field": "physicalIds",
        "type": "Collection",
        "description": "For internal use only. Not nullable.",
        "context": "Reserved for Microsoft internal processes.",
        "isRequired": False,
        "security_compliance": "Not relevant for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Internal property, not available in on-premises AD."
        ]
    },
    {
        "field": "profileType",
        "type": "String",
        "description": "Profile type. Possible values: RegisteredDevice, SecureVM, Printer, Shared, IoT.",
        "context": "Indicates the device's intended use or deployment scenario.",
        "isRequired": False,
        "security_compliance": "Used for deployment scenario compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "registrationDateTime",
        "type": "DateTimeOffset",
        "description": "Date and time of device registration. Read-only.",
        "context": "Used for auditing, lifecycle management, and reporting.",
        "isRequired": False,
        "security_compliance": "Tracks device lifecycle for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "systemLabels",
        "type": "Collection",
        "description": "Labels applied to the device by the system.",
        "context": "Used for internal categorization and automation.",
        "isRequired": False,
        "security_compliance": "Can be used for compliance automation.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "Not available in on-premises AD; Intune/Azure AD feature."
        ]
    },
    {
        "field": "trustType",
        "type": "String",
        "description": "Type of trust. Possible values: Workplace, AzureAd, ServerAd. Read-only.",
        "context": "Indicates how the device is joined (BYOD, cloud-only, or on-premises domain).",
        "isRequired": False,
        "security_compliance": "Key for access control and compliance.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#properties",
            "https://learn.microsoft.com/en-us/windows/security/identity-protection/hello-for-business/hello-hybrid-cert-whfb-provision"
        ]
    }
]

In [None]:
device_relationships = [
    {
        "field": "extensions",
        "type": "extension collection",
        "description": "Open extensions defined for the device. Read-only. Nullable.",
        "context": "Custom data storage for device objects.",
        "isRequired": False,
        "security_compliance": "Can be used for storing compliance metadata.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#relationships",
            "Open extensions are a Microsoft Graph/Azure AD concept."
        ]
    },
    {
        "field": "memberOf",
        "type": "directoryObject collection",
        "description": "Groups and administrative units the device is a member of. Read-only. Nullable.",
        "context": "Used for access control and group management.",
        "isRequired": False,
        "security_compliance": "Determines group-based access and compliance.",
        "azure_only": False,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#relationships",
            "https://learn.microsoft.com/en-us/windows/win32/adschema/a-memberof"
        ]
    },
    {
        "field": "registeredOwners",
        "type": "directoryObject collection",
        "description": "Users who registered or own the device. Read-only. Nullable.",
        "context": "Tracks device ownership and responsibility.",
        "isRequired": False,
        "security_compliance": "Important for accountability and incident response.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#relationships",
            "Registered owners are a Microsoft Graph/Azure AD concept."
        ]
    },
    {
        "field": "registeredUsers",
        "type": "directoryObject collection",
        "description": "Registered users of the device. Read-only. Nullable.",
        "context": "Identifies who can use the device.",
        "isRequired": False,
        "security_compliance": "Tracks user access for compliance.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#relationships",
            "Registered users are a Microsoft Graph/Azure AD concept."
        ]
    },
    {
        "field": "transitiveMemberOf",
        "type": "directoryObject collection",
        "description": "Groups/admin units the device is a transitive member of. Read-only.",
        "context": "Used for advanced access and policy scenarios.",
        "isRequired": False,
        "security_compliance": "Determines indirect group-based access.",
        "azure_only": True,
        "sources": [
            "https://learn.microsoft.com/en-us/graph/api/resources/device?view=graph-rest-1.0#relationships",
            "Transitive membership is a Microsoft Graph/Azure AD concept."
        ]
    }
]

In [None]:
# Azure only fields
[ x.get("field") for x in core_device_properties if x.get("azure_only", False) is True]

In [None]:
[ x.get("field") for x in device_relationships if x.get("azure_only", False) is True]

In [None]:
high_device_count = [x for x in azure_ad_data["users"] if x.get("displayName") == "John Moore"]

In [None]:
for i in azure_ad_data["users"]:
    for k, v in i.items():
        if k == "onPremisesSecurityIdentifier":
            print(i.get("onPremisesSecurityIdentifier"))
            break


In [None]:
high_device_count[0]

### Okta Logs & AD Device Objects

In [None]:
az_device_join_test = dict_with_most_keys(azure_ad_data["devices"])
okta_join_test = dict_with_most_keys(okta_data)

In [None]:
okta_join_test.get("device")

In [None]:
azure_ad_data["groups"][0]

In [None]:
dict_with_most_keys(azure_ad_data["groups"])

In [None]:
[x for x in okta_data if x.get("actor", {}).get("device")]

In [None]:
match_list: list[tuple[dict, dict]] = []
match_found = False
match: tuple[dict[str, Any]] = ()
for o in okta_data:
    try:
        o_alternatId = o.get("alternateId", "").lower()
        o_displayName = o.get("displayName", "").lower()
    except Exception as e:
        continue
    for a in azure_ad_data["groups"]:
        if any([hasattr(a, x) for x in ["mailNickname", "displayName", "mail", "proxyAddresses"]]):
            if hasattr(a, "mailNickname") and a.get("mailNickname", "").lower() in[ x for x in [o_displayName, o_alternatId] if x is not None and len(x)>0]:
                match_list.append(a, o)
                match_found = True
                break
            if hasattr(a, "displayName") and  a.get("displayName", "").lower() in[ x for x in [o_displayName, o_alternatId] if x is not None and len(x)>0]:
                match_list.append(a, o)
                match_found = True
                break
            if hasattr(a, "mail") and  a.get("mail", "").lower() in[ x for x in [o_displayName, o_alternatId] if x is not None and len(x)>0]:
                match_list.append(a, o)
                match_found = True
                break
            if hasattr(a, "proxyAddresses"):
                    for p in a.get("proxyAddresses", []):
                        if p.lstrip("smtp:").lower() in [ x for x in [o_displayName, o_alternatId] if x is not None and len(x)>0]:
                            match_list.append(a, o)
                            match_found = True
                            break

    if match_found:
        break

In [None]:
ad_match, okta_match = match

In [None]:
ad_match

In [None]:
okta_match[1]

In [None]:
len(okta_match)