From acf451ed4de3d5381422abe3b54120602c304174 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:28:10 +0200 Subject: [PATCH 01/21] feat: Add Sentinel type definitions and data structures - Add SentinelConfig dataclass for connection configuration - Add SentinelQueryResult class for query results - Add custom exceptions for connection and query errors - Support for Azure authentication credentials --- graphistry/plugins_types/sentinel_types.py | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 graphistry/plugins_types/sentinel_types.py diff --git a/graphistry/plugins_types/sentinel_types.py b/graphistry/plugins_types/sentinel_types.py new file mode 100644 index 0000000000..cbc195146d --- /dev/null +++ b/graphistry/plugins_types/sentinel_types.py @@ -0,0 +1,70 @@ +from typing import Optional, List, Any, TYPE_CHECKING +from dataclasses import dataclass +from datetime import timedelta + +if TYPE_CHECKING: + from azure.monitor.query import LogsQueryClient + from azure.core.credentials import TokenCredential +else: + LogsQueryClient = Any + TokenCredential = Any + + +class SentinelConnectionError(Exception): + """Raised when connection to Log Analytics workspace fails""" + pass + + +class SentinelQueryError(Exception): + """Raised when query execution fails""" + pass + + +class SentinelQueryResult: + """Container for a single query result table from Microsoft Sentinel""" + + def __init__( + self, + data: List[List[Any]], + column_names: List[str], + column_types: List[str], + table_name: Optional[str] = None + ): + """ + Initialize a Sentinel query result. + + :param data: List of rows, where each row is a list of values + :param column_names: List of column names + :param column_types: List of column types (e.g., 'string', 'datetime', 'int') + :param table_name: Optional name of the result table + """ + self.data = data + self.column_names = column_names + self.column_types = column_types + self.table_name = table_name + + +@dataclass +class SentinelConfig: + """Configuration for Microsoft Sentinel Log Analytics connection""" + + workspace_id: str + """The Log Analytics workspace ID (GUID format)""" + + tenant_id: Optional[str] = None + """Azure AD tenant ID for authentication""" + + client_id: Optional[str] = None + """Azure AD application (client) ID for service principal auth""" + + client_secret: Optional[str] = None + """Azure AD application secret for service principal auth""" + + credential: Optional[TokenCredential] = None + """Custom credential object for authentication""" + + default_timespan: timedelta = timedelta(hours=24) + """Default time range for queries when not specified""" + + _client: Optional[LogsQueryClient] = None + """Cached client instance (internal use)""" \ No newline at end of file From 97e76b6649d8ee50b779901f6d30d26d0680de49 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:29:23 +0200 Subject: [PATCH 02/21] feat: Add Sentinel plugin with basic structure and authentication - Add SentinelMixin class extending Plottable - Implement configure_sentinel() with multiple auth methods - Support Azure CLI, Service Principal, and custom credentials - Add sentinel_from_client() for existing client reuse - Implement health check and basic query infrastructure - Add client initialization with DefaultAzureCredential support --- graphistry/plugins/sentinel.py | 316 +++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 graphistry/plugins/sentinel.py diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py new file mode 100644 index 0000000000..519e2d70f0 --- /dev/null +++ b/graphistry/plugins/sentinel.py @@ -0,0 +1,316 @@ +import time +import pandas as pd +from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal +from datetime import datetime, timedelta + +if TYPE_CHECKING: + from azure.monitor.query import LogsQueryClient + from azure.core.credentials import TokenCredential +else: + LogsQueryClient = Any + TokenCredential = Any + +from graphistry.Plottable import Plottable +from graphistry.util import setup_logger +from graphistry.plugins_types.sentinel_types import ( + SentinelConfig, + SentinelConnectionError, + SentinelQueryError, + SentinelQueryResult +) + +logger = setup_logger(__name__) + + +class SentinelMixin(Plottable): + """ + Microsoft Sentinel Log Analytics integration for Graphistry. + + This mixin allows you to query Microsoft Sentinel (Azure Log Analytics) + using KQL (Kusto Query Language) and visualize the results with Graphistry. + """ + + def configure_sentinel( + self, + workspace_id: str, + tenant_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + credential: Optional["TokenCredential"] = None, + default_timespan: Optional[timedelta] = None, + ) -> Plottable: + """Configure Microsoft Sentinel Log Analytics connection settings. + + Sets up the connection parameters for accessing a Log Analytics workspace. + Authentication can be done via: + - Custom credential object (highest priority) + - Service principal (client_id, client_secret, tenant_id) + - DefaultAzureCredential (includes Azure CLI, Managed Identity, etc.) + + :param workspace_id: Log Analytics workspace ID (GUID format) + :type workspace_id: str + :param tenant_id: Azure AD tenant ID for authentication + :type tenant_id: Optional[str] + :param client_id: Azure AD application (client) ID for service principal auth + :type client_id: Optional[str] + :param client_secret: Azure AD application secret for service principal auth + :type client_secret: Optional[str] + :param credential: Custom credential object for authentication + :type credential: Optional[TokenCredential] + :param default_timespan: Default time range for queries (defaults to 24 hours) + :type default_timespan: Optional[timedelta] + :returns: Self for method chaining + :rtype: Plottable + + **Example: Azure CLI authentication (development)** + :: + + import graphistry + # First run: az login + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc" + ) + + **Example: Service principal authentication (production)** + :: + + import graphistry + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc", + tenant_id="your-tenant-id", + client_id="your-client-id", + client_secret="your-client-secret" + ) + + **Example: Custom credential** + :: + + from azure.identity import DeviceCodeCredential + import graphistry + + credential = DeviceCodeCredential() + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc", + credential=credential + ) + """ + self.session.sentinel = SentinelConfig( + workspace_id=workspace_id, + tenant_id=tenant_id, + client_id=client_id, + client_secret=client_secret, + credential=credential, + default_timespan=default_timespan or timedelta(hours=24), + ) + return self + + def sentinel_from_client( + self, + client: LogsQueryClient, + workspace_id: str, + default_timespan: Optional[timedelta] = None + ) -> Plottable: + """Configure Sentinel using an existing LogsQueryClient connection. + + Use this method when you already have a configured LogsQueryClient + and want to reuse it with Graphistry. + + :param client: Pre-configured LogsQueryClient + :type client: azure.monitor.query.LogsQueryClient + :param workspace_id: Log Analytics workspace ID + :type workspace_id: str + :param default_timespan: Default time range for queries + :type default_timespan: Optional[timedelta] + :returns: Self for method chaining + :rtype: Plottable + + **Example** + :: + + from azure.monitor.query import LogsQueryClient + from azure.identity import DefaultAzureCredential + import graphistry + + # Create client + credential = DefaultAzureCredential() + logs_client = LogsQueryClient(credential) + + # Use with Graphistry + g = graphistry.sentinel_from_client( + logs_client, + "12345678-1234-1234-1234-123456789abc" + ) + """ + # Clean up existing client if different + if self.session.sentinel is not None and client is not self.session.sentinel._client: + self.sentinel_close() + + self.session.sentinel = SentinelConfig( + workspace_id=workspace_id, + default_timespan=default_timespan or timedelta(hours=24), + _client=client, + ) + return self + + @property + def _sentinel_config(self) -> SentinelConfig: + """Get the current Sentinel configuration.""" + if self.session.sentinel is None: + raise ValueError("SentinelMixin is not configured") + return self.session.sentinel + + @property + def sentinel_client(self) -> LogsQueryClient: + """Get or create the LogsQueryClient instance.""" + if self._sentinel_config._client is not None: + return self._sentinel_config._client + client = init_sentinel_client(self._sentinel_config) + self._sentinel_config._client = client + return client + + def sentinel_close(self) -> None: + """Close the Sentinel client connection. + + Note: LogsQueryClient doesn't require explicit cleanup, + but this method is provided for API consistency. + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + # ... perform queries ... + g.sentinel_close() + """ + if self.session.sentinel is None: + return + # LogsQueryClient doesn't need explicit cleanup + # Just clear the cached client reference + self.session.sentinel._client = None + + def sentinel_health_check(self) -> None: + """Perform a health check on the Sentinel connection. + + Executes a simple query (Heartbeat | take 1) to verify that the connection + to the Log Analytics workspace is working properly. + + :raises SentinelConnectionError: If the connection test fails + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + g.sentinel_health_check() # Verify connection works + """ + try: + self._sentinel_query("Heartbeat | take 1", timespan=timedelta(hours=1)) + logger.info("Sentinel health check successful") + except Exception as e: + raise SentinelConnectionError(f"Health check failed: {e}") from e + + # Query methods will be added next... + def _sentinel_query( + self, + query: str, + timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None + ) -> List[SentinelQueryResult]: + """Execute KQL query and return raw results. + + Internal method for executing KQL queries and returning raw Sentinel + query results without DataFrame conversion. + + :param query: KQL query string to execute + :type query: str + :param timespan: Time range for the query + :type timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] + :returns: List of raw query results + :rtype: List[SentinelQueryResult] + :raises SentinelQueryError: If the query execution fails + """ + from azure.monitor.query import LogsQueryStatus + from azure.core.exceptions import HttpResponseError + + logger.debug(f"SentinelMixin._sentinel_query(): {query}") + + # Use default timespan if not provided + if timespan is None: + timespan = self._sentinel_config.default_timespan + + try: + start = time.time() + response = self.sentinel_client.query_workspace( + workspace_id=self._sentinel_config.workspace_id, + query=query, + timespan=timespan + ) + + # Check for partial failures + if response.status == LogsQueryStatus.PARTIAL: + logger.warning(f"Query returned partial results: {response.partial_error}") + elif response.status == LogsQueryStatus.FAILURE: + raise SentinelQueryError(f"Query failed: {response.partial_error}") + + results = [] + row_lengths = [] + + # Process each table in the response + for table in response.tables: + rows = [list(row) for row in table.rows] + col_names = [col.name for col in table.columns] + col_types = [col.type for col in table.columns] + + results.append(SentinelQueryResult( + data=rows, + column_names=col_names, + column_types=col_types, + table_name=table.name + )) + row_lengths.append((len(rows), len(col_names))) + + logger.info(f"Query returned {len(results)} tables shapes: {row_lengths} in {time.time() - start:.3f} sec") + return results + + except HttpResponseError as e: + logger.error(f"Sentinel query failed: {e}") + raise SentinelQueryError(f"Query failed: {e}") from e + except Exception as e: + logger.error(f"Unexpected error during query: {e}") + raise SentinelQueryError(f"Unexpected error: {e}") from e + + +def init_sentinel_client(cfg: SentinelConfig) -> "LogsQueryClient": + """Initialize Sentinel Log Analytics client with appropriate authentication. + + Authentication precedence: + 1. Custom credential object (if provided) + 2. Service Principal (if credentials provided) + 3. DefaultAzureCredential (tries multiple methods automatically) + + For Azure CLI auth: Run 'az login' before using this method. + """ + from azure.identity import DefaultAzureCredential, ClientSecretCredential + from azure.monitor.query import LogsQueryClient + + try: + assert cfg.workspace_id is not None, "workspace_id is not set" + + if cfg.credential: + credential = cfg.credential + logger.info("Using custom credential object for Sentinel") + elif cfg.client_id and cfg.client_secret and cfg.tenant_id: + credential = ClientSecretCredential( + tenant_id=cfg.tenant_id, + client_id=cfg.client_id, + client_secret=cfg.client_secret + ) + logger.info(f"Using Service Principal authentication for workspace {cfg.workspace_id}") + else: + credential = DefaultAzureCredential() + logger.info(f"Using DefaultAzureCredential (Azure CLI, Managed Identity, etc.) for workspace {cfg.workspace_id}") + + client = LogsQueryClient(credential) + return client + + except Exception as exc: + raise SentinelConnectionError(f"Failed to initialize Sentinel client: {exc}") from exc \ No newline at end of file From 21dfd62b6b7897ed4bbe7cd9db6c49b5e0bf5c8d Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:34:18 +0200 Subject: [PATCH 03/21] feat: Add KQL query methods with DataFrame conversion - Implement kql() method with timespan and nested data support - Add kql_last() convenience method for recent data queries - Add sentinel_tables() and sentinel_schema() helper methods - Port nested data unwrapping from Kusto plugin - Support for multiple table responses - Handle JSON strings and dynamic columns in Sentinel results --- graphistry/plugins/sentinel.py | 343 ++++++++++++++++++++++++++++++++- 1 file changed, 341 insertions(+), 2 deletions(-) diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py index 519e2d70f0..da20edd6b1 100644 --- a/graphistry/plugins/sentinel.py +++ b/graphistry/plugins/sentinel.py @@ -209,7 +209,253 @@ def sentinel_health_check(self) -> None: except Exception as e: raise SentinelConnectionError(f"Health check failed: {e}") from e - # Query methods will be added next... + @overload + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: Literal[True] = True, + include_statistics: bool = False + ) -> pd.DataFrame: + ... + + @overload + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: Literal[False], + include_statistics: bool = False + ) -> List[pd.DataFrame]: + ... + + @overload + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: bool = True, + include_statistics: bool = False + ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + ... + + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: bool = True, + include_statistics: bool = False + ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + """Execute KQL query and return result tables as DataFrames. + + Submits a Kusto Query Language (KQL) query to Microsoft Sentinel (Log Analytics) + and returns the results. By default, expects a single table result and returns + it as a DataFrame. If multiple tables are returned, only the first is returned + with a warning. Set single_table=False to get all result tables. + + :param query: KQL query string to execute + :type query: str + :param timespan: Time range for the query (default: 24 hours) + :type timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] + :param unwrap_nested: Strategy for handling nested/dynamic columns + :type unwrap_nested: Optional[bool] + :param single_table: If True, return single DataFrame; if False, return list + :type single_table: bool + :param include_statistics: Include query statistics in DataFrame attrs + :type include_statistics: bool + :returns: Single DataFrame if single_table=True, else list of DataFrames + :rtype: Union[pd.DataFrame, List[pd.DataFrame]] + + **unwrap_nested semantics:** + + - **True**: Always attempt to unwrap nested columns; raise on failure + - **None**: Use heuristic - unwrap if the result looks nested + - **False**: Never attempt to unwrap nested columns + + **Example: Basic security query (single table mode)** + :: + + import graphistry + from datetime import timedelta + g = graphistry.configure_sentinel(...) + + query = ''' + SecurityEvent + | where TimeGenerated > ago(1d) + | where EventID == 4625 // Failed logon + | project TimeGenerated, Account, Computer, IpAddress + | take 1000 + ''' + + # Query last 7 days + df = g.kql(query, timespan=timedelta(days=7)) + print(f"Found {len(df)} failed logon events") + + **Example: Get all tables as list** + :: + + # Always get a list of all tables + dfs = g.kql(query, single_table=False) + df = dfs[0] + + **Example: Query with specific time range** + :: + + from datetime import datetime, timedelta + + # Query specific time window + start = datetime(2024, 1, 1) + end = datetime(2024, 1, 7) + df = g.kql(query, timespan=(start, end)) + + **Example: Multi-table query** + :: + + query = ''' + SecurityEvent | summarize Count=count() by EventID | top 5 by Count; + SecurityAlert | take 10 + ''' + + # With single_table=False, returns all tables + frames = g.kql(query, single_table=False) + events_df = frames[0] + alerts_df = frames[1] + """ + results = self._sentinel_query(query, timespan=timespan) + + if not results: + if single_table: + raise ValueError("Query returned no results") + return [] + + dfs: List[pd.DataFrame] = [] + + for result in results: + # Determine if we should unwrap nested data + do_unwrap = ( + unwrap_nested is True or + (unwrap_nested is None and _should_unwrap(result)) + ) + + if do_unwrap: + try: + df_unwrapped = _unwrap_nested(result) + dfs.append(df_unwrapped) + continue + except Exception as exc: + if unwrap_nested is True: + raise RuntimeError(f"Failed to unwrap nested data: {exc}") from exc + # Heuristic miss - fall back to flat table + pass + + # Default: flat table + if not result.column_names: + # Safety fallback + dfs.append(pd.DataFrame(result.data)) + else: + dfs.append(pd.DataFrame(result.data, columns=result.column_names)) + + # Auto-unbox single table result if requested + if single_table: + if len(dfs) > 1: + logger.warning(f"Query returned {len(dfs)} tables, returning first table only") + return dfs[0] + + return dfs + + def kql_last( + self, + query: str, + *, + hours: float = 1, + **kwargs + ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + """Execute KQL query for the last N hours. + + Convenience wrapper for kql() that automatically sets the timespan + to the last N hours from now. + + :param query: KQL query string to execute + :type query: str + :param hours: Number of hours to look back (default: 1) + :type hours: float + :param kwargs: Additional arguments passed to kql() + :returns: Query results as DataFrame(s) + :rtype: Union[pd.DataFrame, List[pd.DataFrame]] + + **Example: Get security alerts from last 24 hours** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + + alerts = g.kql_last(''' + SecurityAlert + | project TimeGenerated, AlertName, Severity + | order by TimeGenerated desc + ''', hours=24) + + **Example: Get recent failed logins (last hour)** + :: + + # Default is 1 hour + recent_failures = g.kql_last(''' + SecurityEvent + | where EventID == 4625 + | summarize FailCount=count() by Account + ''') + """ + return self.kql(query, timespan=timedelta(hours=hours), **kwargs) + + def sentinel_tables(self) -> pd.DataFrame: + """List all available tables in the Log Analytics workspace. + + :returns: DataFrame with table names + :rtype: pd.DataFrame + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + + # Get list of all tables + tables = g.sentinel_tables() + print(f"Found {len(tables)} tables") + print(tables.head(10)) + """ + query = "union withsource=TableName * | distinct TableName | sort by TableName asc" + return self.kql(query, timespan=timedelta(minutes=5)) + + def sentinel_schema(self, table: str) -> pd.DataFrame: + """Get schema information for a specific table. + + :param table: Name of the table to inspect + :type table: str + :returns: DataFrame with column names and types + :rtype: pd.DataFrame + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + + # Get schema for SecurityEvent table + schema = g.sentinel_schema("SecurityEvent") + print(schema[['ColumnName', 'DataType']]) + """ + query = f"{table} | getschema" + return self.kql(query, timespan=timedelta(minutes=5)) + def _sentinel_query( self, query: str, @@ -313,4 +559,97 @@ def init_sentinel_client(cfg: SentinelConfig) -> "LogsQueryClient": return client except Exception as exc: - raise SentinelConnectionError(f"Failed to initialize Sentinel client: {exc}") from exc \ No newline at end of file + raise SentinelConnectionError(f"Failed to initialize Sentinel client: {exc}") from exc + + +# Sentinel Utils - adapted from Kusto plugin +def _is_dynamic(val: Any) -> bool: + """Check if value is a nested/dynamic JSON type.""" + return isinstance(val, (dict, list)) + + +def _unwrap_nested(result: SentinelQueryResult) -> pd.DataFrame: + """ + Transform a Sentinel result whose columns contain nested/dynamic objects. + + - dict -> dot-flattened + - list[dict] -> explode + flatten + - list[scalar] -> keep as-is + """ + df = pd.DataFrame(result.data, columns=result.column_names) + if not result.column_types: + return df + + for col, col_type in zip(result.column_names, result.column_types): + # Check for dynamic/object types (common in Sentinel) + if col_type.lower() in ["dynamic", "object", "string"]: + # Check if column contains JSON strings that need parsing + if col_type.lower() == "string" and len(df) > 0: + try: + # Try to parse first non-null value as JSON + sample = df[col].dropna().iloc[0] if not df[col].dropna().empty else None + if sample and isinstance(sample, str) and (sample.startswith('{') or sample.startswith('[')): + import json + df[col] = df[col].apply(lambda x: json.loads(x) if pd.notna(x) and isinstance(x, str) else x) + except (json.JSONDecodeError, IndexError): + continue # Not JSON, keep as string + + # Handle lists of dicts - need to explode + list_of_dicts = df[col].apply( + lambda v: isinstance(v, list) and (not v or all(isinstance(x, dict) for x in v)) + ) + if list_of_dicts.any(): + df[col] = df[col].where( + list_of_dicts, + df[col].apply(lambda x: [x] if pd.notna(x) else x) + ) + df = df.explode(col, ignore_index=True) + + # Flatten dict columns + dict_rows = df[col].apply(lambda v: isinstance(v, dict)) + if dict_rows.any(): + flat = pd.json_normalize(df.loc[dict_rows, col].tolist(), sep='.').add_prefix(f"{col}.") + flat.index = df.loc[dict_rows].index + df = df.join(flat, how='left') + df[col] = df[col].mask(dict_rows, pd.NA) + + # Drop column if all values are NA after processing + if df[col].isna().all(): + df = df.drop(columns=[col]) + + # Clean up - replace pd.NA with None for consistency + df = df.astype(object).where(pd.notna(df), None) + return df.reset_index(drop=True) + + +def _should_unwrap(result: SentinelQueryResult, sample_rows: int = 5) -> bool: + """ + Decide whether result looks like it contains nested/dynamic columns. + + Strategy: + 1. Check column types for 'dynamic' or 'object' + 2. Inspect sample rows for dict/list values + 3. Check for JSON strings + """ + # Check column types + if result.column_types: + for col_type in result.column_types: + if col_type.lower() in ["dynamic", "object"]: + return True + + # Sample data for nested structures + for col_idx in range(len(result.column_names)): + sample = (row[col_idx] for row in result.data[:sample_rows] if row) + for val in sample: + if _is_dynamic(val): + return True + # Check for JSON strings + if isinstance(val, str) and val and (val.startswith('{') or val.startswith('[')): + try: + import json + json.loads(val) + return True + except (json.JSONDecodeError, ValueError): + continue + + return False \ No newline at end of file From 31555fd2395df148cbd2dfa87c84c0d954e3b437 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:37:55 +0200 Subject: [PATCH 04/21] feat: Integrate SentinelMixin with Plotter class - Add SentinelMixin import to plotter.py - Include SentinelMixin in Plotter class hierarchy - Update documentation to list Sentinel integration --- graphistry/plotter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/graphistry/plotter.py b/graphistry/plotter.py index bdf71f3117..ed33881648 100644 --- a/graphistry/plotter.py +++ b/graphistry/plotter.py @@ -13,13 +13,14 @@ from .compute.conditional import ConditionalMixin from .compute.cluster import ClusterMixin from .plugins.kusto import KustoMixin +from .plugins.sentinel import SentinelMixin from .plugins.spanner import SpannerMixin from .client_session import AuthManagerProtocol # NOTE: Cooperative mixins must call: # super().__init__(*a, **kw) in their __init__ method # to pass along args/kwargs to the next mixin in the chain class Plotter( - KustoMixin, SpannerMixin, + SentinelMixin, KustoMixin, SpannerMixin, CosmosMixin, NeptuneMixin, HeterographEmbedModuleMixin, SearchToGraphMixin, @@ -51,6 +52,7 @@ class Plotter( - :py:class:`graphistry.gremlin.GremlinMixin`: Provides Gremlin query support for graph databases. - :py:class:`graphistry.gremlin.CosmosMixin`: Integrates with Azure Cosmos DB. - :py:class:`graphistry.gremlin.NeptuneMixin`: Integrates with AWS Neptune DB. + - :py:class:`graphistry.plugins.sentinel.SentinelMixin`: Integrates with Microsoft Sentinel Log Analytics. - :py:class:`graphistry.plugins.kusto.KustoMixin`: Integrates with Azure Kusto DB. - :py:class:`graphistry.plugins.spanner.SpannerMixin`: Integrates with Google Spanner DB. From b32f4a466cdea3f83427ae6e0abe3f65e9c48cf6 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:41:31 +0200 Subject: [PATCH 05/21] feat: Add Sentinel dependencies to setup.py - Add azure-monitor-query>=1.2.0 and azure-identity>=1.12.0 to sentinel extras - Add 'Sentinel' to package keywords for discoverability --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f2f3527717..dfcaf26197 100755 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ def unique_flatten_dict(d): 'nodexl': ['openpyxl==3.1.0', 'xlrd'], 'jupyter': ['ipython'], 'spanner': ['google-cloud-spanner'], + 'sentinel': ['azure-monitor-query>=1.2.0', 'azure-identity>=1.12.0'], 'kusto': ['azure-kusto-data', 'azure-identity'] } @@ -150,5 +151,5 @@ def unique_flatten_dict(d): "Code of Conduct": "https://github.com/graphistry/pygraphistry/blob/main/CODE_OF_CONDUCT.md", "Support": "https://www.graphistry.com/support", }, - keywords=['cugraph', 'cudf', 'cuml', 'dask', 'Databricks', 'GFQL', 'GPU', 'Graph', 'graphviz', 'GraphX', 'Gremlin', 'igraph', 'Jupyter', 'Neo4j', 'Neptune', 'Network', 'NetworkX', 'Notebook', 'OpenSearch', 'Pandas', 'Plot', 'RAPIDS', 'RDF', 'Splunk', 'Spark', 'SQL', 'Tinkerpop', 'UMAP', 'Visualization', 'Torch', 'DGL', 'GNN'] + keywords=['cugraph', 'cudf', 'cuml', 'dask', 'Databricks', 'GFQL', 'GPU', 'Graph', 'graphviz', 'GraphX', 'Gremlin', 'igraph', 'Jupyter', 'Neo4j', 'Neptune', 'Network', 'NetworkX', 'Notebook', 'OpenSearch', 'Pandas', 'Plot', 'RAPIDS', 'RDF', 'Sentinel', 'Splunk', 'Spark', 'SQL', 'Tinkerpop', 'UMAP', 'Visualization', 'Torch', 'DGL', 'GNN'] ) From 9d489e977bc04427a16e5ac0ab6b53874d841b6a Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:44:00 +0200 Subject: [PATCH 06/21] test: Add comprehensive unit tests for Sentinel plugin - Test configuration methods (basic, service principal, custom credential) - Test KQL query execution (single/multiple tables, timespan) - Test helper methods (kql_last, sentinel_tables, sentinel_schema) - Test health check functionality - Test nested data unwrapping and JSON parsing - Test authentication initialization flows - Add mock-based testing to avoid API dependencies --- graphistry/tests/test_sentinel.py | 379 ++++++++++++++++++++++++++++++ 1 file changed, 379 insertions(+) create mode 100644 graphistry/tests/test_sentinel.py diff --git a/graphistry/tests/test_sentinel.py b/graphistry/tests/test_sentinel.py new file mode 100644 index 0000000000..d44f94afcf --- /dev/null +++ b/graphistry/tests/test_sentinel.py @@ -0,0 +1,379 @@ +import unittest +from unittest.mock import Mock, MagicMock, patch, PropertyMock +import pandas as pd +from datetime import datetime, timedelta +from typing import List + +from graphistry.plugins_types.sentinel_types import ( + SentinelConfig, + SentinelConnectionError, + SentinelQueryError, + SentinelQueryResult +) + + +class TestSentinelMixin(unittest.TestCase): + """Test cases for SentinelMixin functionality.""" + + def setUp(self): + """Set up test fixtures.""" + # Create a mock Plotter instance with SentinelMixin + from graphistry.plugins.sentinel import SentinelMixin + + class MockPlotter(SentinelMixin): + def __init__(self): + self.session = MagicMock() + self.session.sentinel = None + + self.plotter = MockPlotter() + self.workspace_id = "12345678-1234-1234-1234-123456789abc" + + def test_configure_sentinel_basic(self): + """Test basic Sentinel configuration.""" + result = self.plotter.configure_sentinel( + workspace_id=self.workspace_id + ) + + self.assertEqual(result, self.plotter) + self.assertIsNotNone(self.plotter.session.sentinel) + self.assertEqual(self.plotter.session.sentinel.workspace_id, self.workspace_id) + self.assertEqual(self.plotter.session.sentinel.default_timespan, timedelta(hours=24)) + + def test_configure_sentinel_service_principal(self): + """Test Sentinel configuration with service principal.""" + result = self.plotter.configure_sentinel( + workspace_id=self.workspace_id, + tenant_id="tenant-123", + client_id="client-456", + client_secret="secret-789" + ) + + self.assertEqual(result, self.plotter) + config = self.plotter.session.sentinel + self.assertEqual(config.workspace_id, self.workspace_id) + self.assertEqual(config.tenant_id, "tenant-123") + self.assertEqual(config.client_id, "client-456") + self.assertEqual(config.client_secret, "secret-789") + + def test_configure_sentinel_custom_timespan(self): + """Test Sentinel configuration with custom default timespan.""" + custom_timespan = timedelta(days=7) + result = self.plotter.configure_sentinel( + workspace_id=self.workspace_id, + default_timespan=custom_timespan + ) + + self.assertEqual(self.plotter.session.sentinel.default_timespan, custom_timespan) + + @patch('graphistry.plugins.sentinel.init_sentinel_client') + def test_sentinel_client_lazy_initialization(self, mock_init): + """Test that Sentinel client is lazily initialized.""" + mock_client = MagicMock() + mock_init.return_value = mock_client + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + # Client should not be initialized yet + mock_init.assert_not_called() + + # Access client property + client = self.plotter.sentinel_client + + # Now client should be initialized + mock_init.assert_called_once() + self.assertEqual(client, mock_client) + + # Accessing again should not reinitialize + client2 = self.plotter.sentinel_client + mock_init.assert_called_once() + self.assertEqual(client2, mock_client) + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + def test_sentinel_from_client(self, mock_client_class): + """Test configuration from existing client.""" + existing_client = MagicMock() + + result = self.plotter.sentinel_from_client( + client=existing_client, + workspace_id=self.workspace_id + ) + + self.assertEqual(result, self.plotter) + self.assertEqual(self.plotter.session.sentinel.workspace_id, self.workspace_id) + self.assertEqual(self.plotter.session.sentinel._client, existing_client) + + def test_sentinel_close(self): + """Test closing Sentinel connection.""" + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + self.plotter.session.sentinel._client = MagicMock() + + self.plotter.sentinel_close() + + self.assertIsNone(self.plotter.session.sentinel._client) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_kql_single_table(self, mock_query): + """Test KQL query with single table result.""" + # Mock query result + mock_result = SentinelQueryResult( + data=[['value1', 'value2'], ['value3', 'value4']], + column_names=['col1', 'col2'], + column_types=['string', 'string'] + ) + mock_query.return_value = [mock_result] + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + query = "SecurityEvent | take 10" + df = self.plotter.kql(query) + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 2) + self.assertEqual(list(df.columns), ['col1', 'col2']) + mock_query.assert_called_once() + + @patch.object(SentinelMixin, '_sentinel_query') + def test_kql_multiple_tables(self, mock_query): + """Test KQL query with multiple table results.""" + # Mock query results + mock_results = [ + SentinelQueryResult( + data=[['data1']], + column_names=['col1'], + column_types=['string'] + ), + SentinelQueryResult( + data=[['data2']], + column_names=['col2'], + column_types=['string'] + ) + ] + mock_query.return_value = mock_results + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + query = "SecurityEvent | take 5; SecurityAlert | take 5" + dfs = self.plotter.kql(query, single_table=False) + + self.assertIsInstance(dfs, list) + self.assertEqual(len(dfs), 2) + self.assertIsInstance(dfs[0], pd.DataFrame) + self.assertIsInstance(dfs[1], pd.DataFrame) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_kql_with_timespan(self, mock_query): + """Test KQL query with custom timespan.""" + mock_query.return_value = [] + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + custom_timespan = timedelta(days=30) + with self.assertRaises(ValueError): # No results + self.plotter.kql("test query", timespan=custom_timespan) + + mock_query.assert_called_with("test query", timespan=custom_timespan) + + @patch.object(SentinelMixin, 'kql') + def test_kql_last(self, mock_kql): + """Test kql_last convenience method.""" + mock_df = pd.DataFrame({'col': [1, 2, 3]}) + mock_kql.return_value = mock_df + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + result = self.plotter.kql_last("test query", hours=48) + + self.assertEqual(result, mock_df) + mock_kql.assert_called_with("test query", timespan=timedelta(hours=48)) + + @patch.object(SentinelMixin, 'kql') + def test_sentinel_tables(self, mock_kql): + """Test sentinel_tables method.""" + mock_df = pd.DataFrame({'TableName': ['Table1', 'Table2']}) + mock_kql.return_value = mock_df + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + result = self.plotter.sentinel_tables() + + self.assertEqual(result, mock_df) + mock_kql.assert_called_with( + "union withsource=TableName * | distinct TableName | sort by TableName asc", + timespan=timedelta(minutes=5) + ) + + @patch.object(SentinelMixin, 'kql') + def test_sentinel_schema(self, mock_kql): + """Test sentinel_schema method.""" + mock_df = pd.DataFrame({ + 'ColumnName': ['Col1', 'Col2'], + 'DataType': ['string', 'datetime'] + }) + mock_kql.return_value = mock_df + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + result = self.plotter.sentinel_schema("SecurityEvent") + + self.assertEqual(result, mock_df) + mock_kql.assert_called_with( + "SecurityEvent | getschema", + timespan=timedelta(minutes=5) + ) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_sentinel_health_check_success(self, mock_query): + """Test successful health check.""" + mock_query.return_value = [MagicMock()] + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + # Should not raise + self.plotter.sentinel_health_check() + + mock_query.assert_called_with("Heartbeat | take 1", timespan=timedelta(hours=1)) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_sentinel_health_check_failure(self, mock_query): + """Test health check failure.""" + mock_query.side_effect = Exception("Connection failed") + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + with self.assertRaises(SentinelConnectionError) as ctx: + self.plotter.sentinel_health_check() + + self.assertIn("Health check failed", str(ctx.exception)) + + +class TestSentinelUtils(unittest.TestCase): + """Test cases for Sentinel utility functions.""" + + def test_unwrap_nested_simple(self): + """Test unwrapping simple nested data.""" + from graphistry.plugins.sentinel import _unwrap_nested + + result = SentinelQueryResult( + data=[ + [{'key': 'value1', 'nested': {'inner': 'data1'}}], + [{'key': 'value2', 'nested': {'inner': 'data2'}}] + ], + column_names=['data'], + column_types=['object'] + ) + + df = _unwrap_nested(result) + + self.assertIn('data.key', df.columns) + self.assertIn('data.nested.inner', df.columns) + self.assertEqual(len(df), 2) + + def test_unwrap_nested_json_string(self): + """Test unwrapping JSON strings.""" + from graphistry.plugins.sentinel import _unwrap_nested + + result = SentinelQueryResult( + data=[ + ['{"key": "value1", "number": 42}'], + ['{"key": "value2", "number": 84}'] + ], + column_names=['json_data'], + column_types=['string'] + ) + + df = _unwrap_nested(result) + + self.assertIn('json_data.key', df.columns) + self.assertIn('json_data.number', df.columns) + self.assertEqual(df['json_data.key'].iloc[0], 'value1') + self.assertEqual(df['json_data.number'].iloc[0], 42) + + def test_should_unwrap_detection(self): + """Test detection of nested data.""" + from graphistry.plugins.sentinel import _should_unwrap + + # Should unwrap - has object type + result1 = SentinelQueryResult( + data=[[{'nested': 'data'}]], + column_names=['col'], + column_types=['object'] + ) + self.assertTrue(_should_unwrap(result1)) + + # Should unwrap - has dict data + result2 = SentinelQueryResult( + data=[[{'key': 'value'}]], + column_names=['col'], + column_types=['string'] + ) + self.assertTrue(_should_unwrap(result2)) + + # Should not unwrap - simple data + result3 = SentinelQueryResult( + data=[['simple', 'text']], + column_names=['col1', 'col2'], + column_types=['string', 'string'] + ) + self.assertFalse(_should_unwrap(result3)) + + +class TestSentinelAuthentication(unittest.TestCase): + """Test cases for Sentinel authentication.""" + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + @patch('graphistry.plugins.sentinel.DefaultAzureCredential') + def test_init_default_credential(self, mock_credential_class, mock_client_class): + """Test initialization with DefaultAzureCredential.""" + from graphistry.plugins.sentinel import init_sentinel_client + + mock_credential = MagicMock() + mock_credential_class.return_value = mock_credential + + config = SentinelConfig(workspace_id="test-workspace") + client = init_sentinel_client(config) + + mock_credential_class.assert_called_once() + mock_client_class.assert_called_once_with(mock_credential) + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + @patch('graphistry.plugins.sentinel.ClientSecretCredential') + def test_init_service_principal(self, mock_credential_class, mock_client_class): + """Test initialization with service principal.""" + from graphistry.plugins.sentinel import init_sentinel_client + + mock_credential = MagicMock() + mock_credential_class.return_value = mock_credential + + config = SentinelConfig( + workspace_id="test-workspace", + tenant_id="tenant", + client_id="client", + client_secret="secret" + ) + client = init_sentinel_client(config) + + mock_credential_class.assert_called_once_with( + tenant_id="tenant", + client_id="client", + client_secret="secret" + ) + mock_client_class.assert_called_once_with(mock_credential) + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + def test_init_custom_credential(self, mock_client_class): + """Test initialization with custom credential.""" + from graphistry.plugins.sentinel import init_sentinel_client + + custom_credential = MagicMock() + config = SentinelConfig( + workspace_id="test-workspace", + credential=custom_credential + ) + + client = init_sentinel_client(config) + + mock_client_class.assert_called_once_with(custom_credential) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From ae134d96da999c94d38465ce2860fb6a366ede0a Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:47:22 +0200 Subject: [PATCH 07/21] docs: Add comprehensive Sentinel security analysis notebook - Demonstrate Azure CLI and Service Principal authentication - Show security use cases: failed logins, alerts, network analysis - Include graph visualizations for user-IP and alert correlations - Provide examples of multi-table KQL queries - Cover workspace exploration and schema inspection - Add troubleshooting guidance and next steps --- .../sentinel/sentinel_security_analysis.ipynb | 533 ++++++++++++++++++ 1 file changed, 533 insertions(+) create mode 100644 demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb new file mode 100644 index 0000000000..2d3d2822de --- /dev/null +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -0,0 +1,533 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Microsoft Sentinel Security Analysis with Graphistry\n", + "\n", + "This notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n", + "\n", + "## Prerequisites\n", + "\n", + "1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n", + "2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n", + "3. **Dependencies**: Install Sentinel extras\n", + "\n", + "```bash\n", + "pip install graphistry[sentinel]\n", + "```\n", + "\n", + "## Getting Started\n", + "\n", + "### Option 1: Azure CLI Authentication (Recommended for Development)\n", + "\n", + "First, login with Azure CLI:\n", + "```bash\n", + "az login\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import graphistry\n", + "from datetime import datetime, timedelta\n", + "import pandas as pd\n", + "\n", + "# Register for free at https://www.graphistry.com\n", + "graphistry.register(api=3, username='your_username', password='your_password')\n", + "\n", + "# Configure Sentinel connection\n", + "# Replace with your actual workspace ID\n", + "WORKSPACE_ID = \"12345678-1234-1234-1234-123456789abc\"\n", + "\n", + "g = graphistry.configure_sentinel(\n", + " workspace_id=WORKSPACE_ID\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Option 2: Service Principal Authentication (Recommended for Production)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Alternative: Service Principal authentication\n", + "# g = graphistry.configure_sentinel(\n", + "# workspace_id=\"your-workspace-id\",\n", + "# tenant_id=\"your-tenant-id\",\n", + "# client_id=\"your-client-id\",\n", + "# client_secret=\"your-client-secret\"\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Connection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the connection\n", + "try:\n", + " g.sentinel_health_check()\n", + " print(\"✅ Successfully connected to Microsoft Sentinel!\")\n", + "except Exception as e:\n", + " print(f\"❌ Connection failed: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Explore Available Data\n", + "\n", + "Let's start by exploring what tables are available in your Sentinel workspace:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# List all available tables\n", + "tables_df = g.sentinel_tables()\n", + "print(f\"Found {len(tables_df)} tables in workspace\")\n", + "print(\"\\nSecurity-related tables:\")\n", + "security_tables = tables_df[tables_df['TableName'].str.contains('Security|Alert|Incident', case=False, na=False)]\n", + "print(security_tables['TableName'].tolist())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get schema for SecurityEvent table (if available)\n", + "if 'SecurityEvent' in tables_df['TableName'].values:\n", + " schema = g.sentinel_schema('SecurityEvent')\n", + " print(\"SecurityEvent table schema:\")\n", + " print(schema[['ColumnName', 'DataType']].head(10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Security Analysis Examples\n", + "\n", + "### 1. Failed Login Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query failed login attempts (last 7 days)\n", + "failed_logins_query = \"\"\"\n", + "SigninLogs\n", + "| where TimeGenerated > ago(7d)\n", + "| where ResultType != \"0\" // 0 = success\n", + "| project TimeGenerated, UserPrincipalName, IPAddress, Location, ResultType, ResultDescription\n", + "| summarize \n", + " FailureCount = count(),\n", + " UniqueIPs = dcount(IPAddress),\n", + " LatestFailure = max(TimeGenerated)\n", + " by UserPrincipalName\n", + "| where FailureCount > 5\n", + "| order by FailureCount desc\n", + "| take 50\n", + "\"\"\"\n", + "\n", + "try:\n", + " failed_logins = g.kql(failed_logins_query, timespan=timedelta(days=7))\n", + " print(f\"Found {len(failed_logins)} users with multiple failed logins\")\n", + " print(failed_logins.head())\n", + "except Exception as e:\n", + " print(f\"Query failed: {e}\")\n", + " print(\"This might happen if SigninLogs table is not available in your workspace\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Security Alerts Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query recent security alerts\n", + "alerts_query = \"\"\"\n", + "SecurityAlert\n", + "| where TimeGenerated > ago(24h)\n", + "| project \n", + " TimeGenerated,\n", + " AlertName,\n", + " AlertSeverity,\n", + " CompromisedEntity,\n", + " Tactics,\n", + " Techniques,\n", + " Status\n", + "| order by TimeGenerated desc\n", + "\"\"\"\n", + "\n", + "try:\n", + " alerts = g.kql_last(alerts_query, hours=24)\n", + " print(f\"Found {len(alerts)} security alerts in the last 24 hours\")\n", + " if len(alerts) > 0:\n", + " print(\"\\nAlert severity distribution:\")\n", + " print(alerts['AlertSeverity'].value_counts())\n", + " print(\"\\nSample alerts:\")\n", + " print(alerts[['TimeGenerated', 'AlertName', 'AlertSeverity']].head())\n", + " else:\n", + " print(\"No alerts found (this is good!)\")\n", + "except Exception as e:\n", + " print(f\"Query failed: {e}\")\n", + " print(\"This might happen if SecurityAlert table is not available\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Network Traffic Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query network connections (example with CommonSecurityLog)\n", + "network_query = \"\"\"\n", + "CommonSecurityLog\n", + "| where TimeGenerated > ago(1h)\n", + "| where isnotempty(SourceIP) and isnotempty(DestinationIP)\n", + "| project \n", + " TimeGenerated,\n", + " SourceIP,\n", + " DestinationIP,\n", + " DestinationPort,\n", + " Protocol,\n", + " Activity,\n", + " DeviceVendor\n", + "| summarize \n", + " ConnectionCount = count(),\n", + " UniquePorts = dcount(DestinationPort)\n", + " by SourceIP, DestinationIP\n", + "| where ConnectionCount > 10\n", + "| order by ConnectionCount desc\n", + "| take 100\n", + "\"\"\"\n", + "\n", + "try:\n", + " network_data = g.kql_last(network_query, hours=1)\n", + " print(f\"Found {len(network_data)} significant network connections\")\n", + " if len(network_data) > 0:\n", + " print(network_data.head())\n", + "except Exception as e:\n", + " print(f\"Query failed: {e}\")\n", + " print(\"This might happen if CommonSecurityLog table is not available\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Graph Visualization\n", + "\n", + "Now let's create some graph visualizations from the security data:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. User-IP Relationship Graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query for user-IP relationships\n", + "user_ip_query = \"\"\"\n", + "SigninLogs\n", + "| where TimeGenerated > ago(24h)\n", + "| where isnotempty(UserPrincipalName) and isnotempty(IPAddress)\n", + "| project UserPrincipalName, IPAddress, TimeGenerated, ResultType, Location\n", + "| summarize \n", + " LoginCount = count(),\n", + " FailureCount = countif(ResultType != \"0\"),\n", + " LatestLogin = max(TimeGenerated),\n", + " Locations = make_set(Location)\n", + " by UserPrincipalName, IPAddress\n", + "| extend RiskScore = FailureCount * 2 + iff(LoginCount == 1, 1, 0)\n", + "| take 500\n", + "\"\"\"\n", + "\n", + "try:\n", + " user_ip_data = g.kql_last(user_ip_query, hours=24)\n", + " \n", + " if len(user_ip_data) > 0:\n", + " # Create nodes and edges for graph visualization\n", + " \n", + " # Create user nodes\n", + " users = user_ip_data[['UserPrincipalName']].drop_duplicates()\n", + " users['node_type'] = 'user'\n", + " users['node_id'] = users['UserPrincipalName']\n", + " users['node_label'] = users['UserPrincipalName']\n", + " \n", + " # Create IP nodes \n", + " ips = user_ip_data[['IPAddress']].drop_duplicates()\n", + " ips['node_type'] = 'ip'\n", + " ips['node_id'] = ips['IPAddress']\n", + " ips['node_label'] = ips['IPAddress']\n", + " \n", + " # Combine nodes\n", + " nodes = pd.concat([\n", + " users[['node_id', 'node_label', 'node_type']],\n", + " ips[['node_id', 'node_label', 'node_type']]\n", + " ], ignore_index=True)\n", + " \n", + " # Create edges\n", + " edges = user_ip_data.copy()\n", + " edges['source'] = edges['UserPrincipalName']\n", + " edges['target'] = edges['IPAddress']\n", + " edges['edge_weight'] = edges['LoginCount']\n", + " edges['edge_color'] = edges['RiskScore'].apply(\n", + " lambda x: 'red' if x > 5 else 'orange' if x > 2 else 'green'\n", + " )\n", + " \n", + " # Create and plot graph\n", + " graph = g.nodes(nodes, node='node_id')\\\n", + " .edges(edges, source='source', destination='target')\\\n", + " .bind(node_color='node_type', edge_color='edge_color')\\\n", + " .settings(url_params={'splashAfter': 'false'})\n", + " \n", + " print(f\"Created graph with {len(nodes)} nodes and {len(edges)} edges\")\n", + " \n", + " # Plot the graph\n", + " graph.plot()\n", + " else:\n", + " print(\"No data available for user-IP graph\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Graph creation failed: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Alert Correlation Graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query for alert correlations\n", + "alert_correlation_query = \"\"\"\n", + "SecurityAlert\n", + "| where TimeGenerated > ago(7d)\n", + "| project \n", + " AlertName,\n", + " CompromisedEntity,\n", + " Tactics,\n", + " AlertSeverity,\n", + " TimeGenerated\n", + "| extend EntityType = case(\n", + " CompromisedEntity contains \"@\", \"User\",\n", + " CompromisedEntity matches regex @\"\\\\b(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}\\\\b\", \"IP\",\n", + " \"Host\"\n", + ")\n", + "| summarize \n", + " AlertCount = count(),\n", + " Severities = make_set(AlertSeverity),\n", + " TacticsList = make_set(Tactics)\n", + " by AlertName, CompromisedEntity, EntityType\n", + "| where AlertCount > 1\n", + "| take 200\n", + "\"\"\"\n", + "\n", + "try:\n", + " alert_data = g.kql(alert_correlation_query, timespan=timedelta(days=7))\n", + " \n", + " if len(alert_data) > 0:\n", + " # Create alert type nodes\n", + " alert_types = alert_data[['AlertName']].drop_duplicates()\n", + " alert_types['node_type'] = 'alert'\n", + " alert_types['node_id'] = alert_types['AlertName']\n", + " alert_types['node_label'] = alert_types['AlertName']\n", + " \n", + " # Create entity nodes\n", + " entities = alert_data[['CompromisedEntity', 'EntityType']].drop_duplicates()\n", + " entities['node_type'] = entities['EntityType'].str.lower()\n", + " entities['node_id'] = entities['CompromisedEntity']\n", + " entities['node_label'] = entities['CompromisedEntity']\n", + " \n", + " # Combine nodes\n", + " alert_nodes = pd.concat([\n", + " alert_types[['node_id', 'node_label', 'node_type']],\n", + " entities[['node_id', 'node_label', 'node_type']]\n", + " ], ignore_index=True)\n", + " \n", + " # Create edges (alert -> entity)\n", + " alert_edges = alert_data.copy()\n", + " alert_edges['source'] = alert_edges['AlertName']\n", + " alert_edges['target'] = alert_edges['CompromisedEntity']\n", + " alert_edges['edge_weight'] = alert_edges['AlertCount']\n", + " \n", + " # Create and plot graph\n", + " alert_graph = g.nodes(alert_nodes, node='node_id')\\\n", + " .edges(alert_edges, source='source', destination='target')\\\n", + " .bind(node_color='node_type', edge_weight='edge_weight')\\\n", + " .settings(url_params={'splashAfter': 'false'})\n", + " \n", + " print(f\"Created alert correlation graph with {len(alert_nodes)} nodes and {len(alert_edges)} edges\")\n", + " \n", + " # Plot the graph\n", + " alert_graph.plot()\n", + " else:\n", + " print(\"No alert correlation data available\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Alert correlation graph failed: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Advanced Analysis\n", + "\n", + "### Multi-table Correlation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Complex query joining multiple data sources\n", + "correlation_query = \"\"\"\n", + "// Get security incidents\n", + "let incidents = SecurityIncident\n", + "| where TimeGenerated > ago(30d)\n", + "| project IncidentNumber, Title, Severity, Status, Owner;\n", + "\n", + "// Get related alerts \n", + "let alerts = SecurityAlert\n", + "| where TimeGenerated > ago(30d)\n", + "| project AlertName, CompromisedEntity, AlertSeverity, Tactics;\n", + "\n", + "// Join and analyze\n", + "incidents\n", + "| join kind=inner (alerts) on $left.Title == $right.AlertName\n", + "| summarize \n", + " IncidentCount = dcount(IncidentNumber),\n", + " AffectedEntities = dcount(CompromisedEntity),\n", + " TacticsUsed = make_set(Tactics)\n", + " by Title, Severity\n", + "| order by IncidentCount desc\n", + "\"\"\"\n", + "\n", + "try:\n", + " correlation_data = g.kql(correlation_query, timespan=timedelta(days=30))\n", + " print(f\"Found {len(correlation_data)} incident-alert correlations\")\n", + " if len(correlation_data) > 0:\n", + " print(correlation_data.head())\n", + "except Exception as e:\n", + " print(f\"Correlation query failed: {e}\")\n", + " print(\"This requires both SecurityIncident and SecurityAlert tables\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This notebook demonstrated:\n", + "\n", + "1. **Connecting to Microsoft Sentinel** using Azure authentication\n", + "2. **Exploring available data** with `sentinel_tables()` and `sentinel_schema()`\n", + "3. **Security analysis** using KQL queries for:\n", + " - Failed login analysis\n", + " - Security alerts monitoring\n", + " - Network traffic analysis\n", + "4. **Graph visualization** of:\n", + " - User-IP relationships\n", + " - Alert correlations\n", + "5. **Advanced correlation** across multiple data sources\n", + "\n", + "## Next Steps\n", + "\n", + "- Customize queries for your specific security use cases\n", + "- Create automated monitoring dashboards\n", + "- Integrate with threat intelligence feeds\n", + "- Build detection rules based on graph patterns\n", + "\n", + "## Resources\n", + "\n", + "- [Microsoft Sentinel KQL Reference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/)\n", + "- [Graphistry Documentation](https://pygraphistry.readthedocs.io/)\n", + "- [Azure Monitor Query Documentation](https://docs.microsoft.com/en-us/python/api/azure-monitor-query/)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file From b7fd600306fe7fea788f92dfc9a3acd45f1fcc6c Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 09:58:30 +0200 Subject: [PATCH 08/21] feat: Update Sentinel notebook to use .env file for credentials - Replace hardcoded credentials with environment variables - Add example.env template file - Support custom .env file locations - Include python-dotenv dependency instructions - Improve security by avoiding credential commits --- .../microsoft/sentinel/example.env | 12 ++++ .../sentinel/sentinel_security_analysis.ipynb | 56 ++----------------- 2 files changed, 17 insertions(+), 51 deletions(-) create mode 100644 demos/demos_databases_apis/microsoft/sentinel/example.env diff --git a/demos/demos_databases_apis/microsoft/sentinel/example.env b/demos/demos_databases_apis/microsoft/sentinel/example.env new file mode 100644 index 0000000000..cc0f2c9510 --- /dev/null +++ b/demos/demos_databases_apis/microsoft/sentinel/example.env @@ -0,0 +1,12 @@ +# Graphistry credentials (register at https://www.graphistry.com) +GRAPHISTRY_USERNAME=your_username +GRAPHISTRY_PASSWORD=your_password +# GRAPHISTRY_SERVER=https://hub.graphistry.com # Optional, defaults to hub.graphistry.com + +# Microsoft Sentinel workspace +SENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc + +# Optional: Service Principal authentication (if not using Azure CLI) +# AZURE_TENANT_ID=your-tenant-id +# AZURE_CLIENT_ID=your-client-id +# AZURE_CLIENT_SECRET=your-client-secret \ No newline at end of file diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 2d3d2822de..1f8b883e75 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -3,52 +3,14 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "# Microsoft Sentinel Security Analysis with Graphistry\n", - "\n", - "This notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n", - "\n", - "## Prerequisites\n", - "\n", - "1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n", - "2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n", - "3. **Dependencies**: Install Sentinel extras\n", - "\n", - "```bash\n", - "pip install graphistry[sentinel]\n", - "```\n", - "\n", - "## Getting Started\n", - "\n", - "### Option 1: Azure CLI Authentication (Recommended for Development)\n", - "\n", - "First, login with Azure CLI:\n", - "```bash\n", - "az login\n", - "```" - ] + "source": "# Microsoft Sentinel Security Analysis with Graphistry\n\nThis notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n\n## Prerequisites\n\n1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n3. **Dependencies**: Install required packages\n\n```bash\npip install graphistry[sentinel] python-dotenv\n```\n\n## Environment Setup\n\n1. Copy `example.env` to `.env` in the same directory as this notebook\n2. Edit `.env` with your actual credentials:\n\n```bash\ncp example.env .env\n# Then edit .env with your credentials\n```\n\nThe `.env` file should contain:\n\n```env\n# Graphistry credentials (register at https://www.graphistry.com)\nGRAPHISTRY_USERNAME=your_username\nGRAPHISTRY_PASSWORD=your_password\n\n# Microsoft Sentinel workspace\nSENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc\n\n# Optional: Service Principal authentication (if not using Azure CLI)\n# AZURE_TENANT_ID=your-tenant-id\n# AZURE_CLIENT_ID=your-client-id\n# AZURE_CLIENT_SECRET=your-client-secret\n```\n\n**Important**: The `.env` file is gitignored to avoid committing secrets. Never commit actual credentials!\n\n## Getting Started\n\n### Option 1: Azure CLI Authentication (Recommended for Development)\n\nFirst, login with Azure CLI:\n```bash\naz login\n```" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import graphistry\n", - "from datetime import datetime, timedelta\n", - "import pandas as pd\n", - "\n", - "# Register for free at https://www.graphistry.com\n", - "graphistry.register(api=3, username='your_username', password='your_password')\n", - "\n", - "# Configure Sentinel connection\n", - "# Replace with your actual workspace ID\n", - "WORKSPACE_ID = \"12345678-1234-1234-1234-123456789abc\"\n", - "\n", - "g = graphistry.configure_sentinel(\n", - " workspace_id=WORKSPACE_ID\n", - ")" - ] + "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3, \n username=os.getenv('GRAPHISTRY_USERNAME'),\n password=os.getenv('GRAPHISTRY_PASSWORD'),\n # Optional: specify server if not using default\n # server=os.getenv('GRAPHISTRY_SERVER', 'https://hub.graphistry.com')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID\n)" }, { "cell_type": "markdown", @@ -62,15 +24,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Alternative: Service Principal authentication\n", - "# g = graphistry.configure_sentinel(\n", - "# workspace_id=\"your-workspace-id\",\n", - "# tenant_id=\"your-tenant-id\",\n", - "# client_id=\"your-client-id\",\n", - "# client_secret=\"your-client-secret\"\n", - "# )" - ] + "source": "# Alternative: Service Principal authentication from .env file\n# g = graphistry.configure_sentinel(\n# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID'),\n# tenant_id=os.getenv('AZURE_TENANT_ID'),\n# client_id=os.getenv('AZURE_CLIENT_ID'),\n# client_secret=os.getenv('AZURE_CLIENT_SECRET')\n# )" }, { "cell_type": "markdown", @@ -511,7 +465,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -525,7 +479,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.13.5" } }, "nbformat": 4, From 4dfd959e336a8b823f1d3be7c60656142587d3d9 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 10:05:43 +0200 Subject: [PATCH 09/21] feat: Update Graphistry authentication to use personal keys - Replace username/password with personal_key_id/personal_key_secret - Update example.env with new credential format - Use modern Graphistry authentication method - Maintain security with environment variables --- .../microsoft/sentinel/example.env | 5 ++--- .../sentinel/sentinel_security_analysis.ipynb | 14 +++++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/demos/demos_databases_apis/microsoft/sentinel/example.env b/demos/demos_databases_apis/microsoft/sentinel/example.env index cc0f2c9510..0204994648 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/example.env +++ b/demos/demos_databases_apis/microsoft/sentinel/example.env @@ -1,7 +1,6 @@ # Graphistry credentials (register at https://www.graphistry.com) -GRAPHISTRY_USERNAME=your_username -GRAPHISTRY_PASSWORD=your_password -# GRAPHISTRY_SERVER=https://hub.graphistry.com # Optional, defaults to hub.graphistry.com +GRAPHISTRY_PERSONAL_KEY_ID=your_personal_key_id +GRAPHISTRY_PERSONAL_KEY_SECRET=your_personal_key_secret # Microsoft Sentinel workspace SENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 1f8b883e75..777a106910 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -3,14 +3,14 @@ { "cell_type": "markdown", "metadata": {}, - "source": "# Microsoft Sentinel Security Analysis with Graphistry\n\nThis notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n\n## Prerequisites\n\n1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n3. **Dependencies**: Install required packages\n\n```bash\npip install graphistry[sentinel] python-dotenv\n```\n\n## Environment Setup\n\n1. Copy `example.env` to `.env` in the same directory as this notebook\n2. Edit `.env` with your actual credentials:\n\n```bash\ncp example.env .env\n# Then edit .env with your credentials\n```\n\nThe `.env` file should contain:\n\n```env\n# Graphistry credentials (register at https://www.graphistry.com)\nGRAPHISTRY_USERNAME=your_username\nGRAPHISTRY_PASSWORD=your_password\n\n# Microsoft Sentinel workspace\nSENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc\n\n# Optional: Service Principal authentication (if not using Azure CLI)\n# AZURE_TENANT_ID=your-tenant-id\n# AZURE_CLIENT_ID=your-client-id\n# AZURE_CLIENT_SECRET=your-client-secret\n```\n\n**Important**: The `.env` file is gitignored to avoid committing secrets. Never commit actual credentials!\n\n## Getting Started\n\n### Option 1: Azure CLI Authentication (Recommended for Development)\n\nFirst, login with Azure CLI:\n```bash\naz login\n```" + "source": "# Microsoft Sentinel Security Analysis with Graphistry\n\nThis notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n\n## Prerequisites\n\n1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n3. **Dependencies**: Install required packages\n\n```bash\npip install graphistry[sentinel] python-dotenv\n```\n\n## Environment Setup\n\n1. Copy `example.env` to `.env` in the same directory as this notebook\n2. Edit `.env` with your actual credentials:\n\n```bash\ncp example.env .env\n# Then edit .env with your credentials\n```\n\nThe `.env` file should contain:\n\n```env\n# Graphistry credentials (register at https://www.graphistry.com)\nGRAPHISTRY_PERSONAL_KEY_ID=your_personal_key_id\nGRAPHISTRY_PERSONAL_KEY_SECRET=your_personal_key_secret\n\n# Microsoft Sentinel workspace\nSENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc\n\n# Optional: Service Principal authentication (if not using Azure CLI)\n# AZURE_TENANT_ID=your-tenant-id\n# AZURE_CLIENT_ID=your-client-id\n# AZURE_CLIENT_SECRET=your-client-secret\n```\n\n**Important**: The `.env` file is gitignored to avoid committing secrets. Never commit actual credentials!\n\n## Getting Started\n\n### Option 1: Azure CLI Authentication (Recommended for Development)\n\nFirst, login with Azure CLI:\n```bash\naz login\n```" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3, \n username=os.getenv('GRAPHISTRY_USERNAME'),\n password=os.getenv('GRAPHISTRY_PASSWORD'),\n # Optional: specify server if not using default\n # server=os.getenv('GRAPHISTRY_SERVER', 'https://hub.graphistry.com')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID\n)" + "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3,\n protocol=\"https\",\n server=\"hub.graphistry.com\",\n personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID\n)" }, { "cell_type": "markdown", @@ -24,7 +24,15 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Alternative: Service Principal authentication from .env file\n# g = graphistry.configure_sentinel(\n# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID'),\n# tenant_id=os.getenv('AZURE_TENANT_ID'),\n# client_id=os.getenv('AZURE_CLIENT_ID'),\n# client_secret=os.getenv('AZURE_CLIENT_SECRET')\n# )" + "source": [ + "# Alternative: Service Principal authentication from .env file\n", + "# g = graphistry.configure_sentinel(\n", + "# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID'),\n", + "# tenant_id=os.getenv('AZURE_TENANT_ID'),\n", + "# client_id=os.getenv('AZURE_CLIENT_ID'),\n", + "# client_secret=os.getenv('AZURE_CLIENT_SECRET')\n", + "# )" + ] }, { "cell_type": "markdown", From 99475f5e5c91492e39df2534924a9366b5898f60 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 11:54:43 +0200 Subject: [PATCH 10/21] fix: Export Sentinel methods at module level - Add configure_sentinel and sentinel_from_client to __init__.py - Add corresponding methods in pygraphistry.py - Fix module-level access to Sentinel functionality --- .../sentinel/sentinel_security_analysis.ipynb | 108 +++++++++++++++++- graphistry/__init__.py | 2 + graphistry/pygraphistry.py | 24 ++++ 3 files changed, 128 insertions(+), 6 deletions(-) diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 777a106910..05a4095dab 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -3,14 +3,110 @@ { "cell_type": "markdown", "metadata": {}, - "source": "# Microsoft Sentinel Security Analysis with Graphistry\n\nThis notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n\n## Prerequisites\n\n1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n3. **Dependencies**: Install required packages\n\n```bash\npip install graphistry[sentinel] python-dotenv\n```\n\n## Environment Setup\n\n1. Copy `example.env` to `.env` in the same directory as this notebook\n2. Edit `.env` with your actual credentials:\n\n```bash\ncp example.env .env\n# Then edit .env with your credentials\n```\n\nThe `.env` file should contain:\n\n```env\n# Graphistry credentials (register at https://www.graphistry.com)\nGRAPHISTRY_PERSONAL_KEY_ID=your_personal_key_id\nGRAPHISTRY_PERSONAL_KEY_SECRET=your_personal_key_secret\n\n# Microsoft Sentinel workspace\nSENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc\n\n# Optional: Service Principal authentication (if not using Azure CLI)\n# AZURE_TENANT_ID=your-tenant-id\n# AZURE_CLIENT_ID=your-client-id\n# AZURE_CLIENT_SECRET=your-client-secret\n```\n\n**Important**: The `.env` file is gitignored to avoid committing secrets. Never commit actual credentials!\n\n## Getting Started\n\n### Option 1: Azure CLI Authentication (Recommended for Development)\n\nFirst, login with Azure CLI:\n```bash\naz login\n```" + "source": [ + "# Microsoft Sentinel Security Analysis with Graphistry\n", + "\n", + "This notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n", + "\n", + "## Prerequisites\n", + "\n", + "1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n", + "2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n", + "3. **Dependencies**: Install required packages\n", + "\n", + "```bash\n", + "pip install graphistry[sentinel] python-dotenv\n", + "```\n", + "\n", + "## Environment Setup\n", + "\n", + "1. Copy `example.env` to `.env` in the same directory as this notebook\n", + "2. Edit `.env` with your actual credentials:\n", + "\n", + "```bash\n", + "cp example.env .env\n", + "# Then edit .env with your credentials\n", + "```\n", + "\n", + "The `.env` file should contain:\n", + "\n", + "```env\n", + "# Graphistry credentials (register at https://www.graphistry.com)\n", + "GRAPHISTRY_PERSONAL_KEY_ID=your_personal_key_id\n", + "GRAPHISTRY_PERSONAL_KEY_SECRET=your_personal_key_secret\n", + "\n", + "# Microsoft Sentinel workspace\n", + "SENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc\n", + "\n", + "# Optional: Service Principal authentication (if not using Azure CLI)\n", + "# AZURE_TENANT_ID=your-tenant-id\n", + "# AZURE_CLIENT_ID=your-client-id\n", + "# AZURE_CLIENT_SECRET=your-client-secret\n", + "```\n", + "\n", + "**Important**: The `.env` file is gitignored to avoid committing secrets. Never commit actual credentials!\n", + "\n", + "## Getting Started\n", + "\n", + "### Option 1: Azure CLI Authentication (Recommended for Development)\n", + "\n", + "First, login with Azure CLI:\n", + "```bash\n", + "az login\n", + "```" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3,\n protocol=\"https\",\n server=\"hub.graphistry.com\",\n personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID\n)" + "source": [ + "import graphistry\n", + "from datetime import datetime, timedelta\n", + "import pandas as pd\n", + "import os\n", + "from dotenv import load_dotenv, dotenv_values\n", + "\n", + "# Load environment variables from .env file\n", + "# Option 1: Load from current directory (default)\n", + "#load_dotenv()\n", + "\n", + "# Option 2: Load from a custom location (uncomment and modify as needed)\n", + "# load_dotenv('~/sentinel.env') # Load from home directory\n", + "# load_dotenv('/path/to/your/.env') # Load from absolute path\n", + "load_dotenv(os.path.expanduser('~/Documents/Graphistry/sentinel.env')) # Expand ~ to home directory\n", + "print(dotenv_values('/home/sinbre/Documents/Graphistry/sentinel.env'))\n", + "# Register for free at https://www.graphistry.com\n", + "# Credentials loaded from .env file\n", + "graphistry.register(\n", + " api=3,\n", + " protocol=\"https\",\n", + " server=\"hub.graphistry.com\",\n", + " personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n", + " personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n", + ")\n", + "\n", + "# Configure Sentinel connection\n", + "# Workspace ID loaded from .env file\n", + "WORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n", + "\n", + "if not WORKSPACE_ID:\n", + " raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n", + "\n", + "g = graphistry.configure_sentinel(\n", + " workspace_id=WORKSPACE_ID\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install dotenv" + ] }, { "cell_type": "markdown", @@ -473,9 +569,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Graphistry Dev", "language": "python", - "name": "python3" + "name": "graphistry-dev" }, "language_info": { "codemirror_mode": { @@ -487,9 +583,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/graphistry/__init__.py b/graphistry/__init__.py index fa6a35d340..e30d6214ab 100644 --- a/graphistry/__init__.py +++ b/graphistry/__init__.py @@ -41,6 +41,8 @@ kusto_from_client, kql, kusto_graph, + configure_sentinel, + sentinel_from_client, gsql, gsql_endpoint, cosmos, diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index e39b48c7ec..3e74af38c0 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -1961,6 +1961,30 @@ def kusto_graph(self, graph_name: str, snap_name: Optional[str] = None) -> Plott return cast(Plotter, self._plotter().kusto_graph(graph_name, snap_name)) kusto_graph.__doc__ = Plotter.kusto_graph.__doc__ + # ---- Sentinel / Log Analytics API ---------------------------------------------------- # + + def configure_sentinel( + self, + workspace_id: str, + tenant_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + credential: Optional[Any] = None, + default_timespan: Optional[Any] = None, + ) -> Plotter: + return cast(Plotter, self._plotter().configure_sentinel( + workspace_id=workspace_id, + tenant_id=tenant_id, + client_id=client_id, + client_secret=client_secret, + credential=credential, + default_timespan=default_timespan + )) + configure_sentinel.__doc__ = Plotter.configure_sentinel.__doc__ + + def sentinel_from_client(self, client: Any, workspace_id: str, default_timespan: Optional[Any] = None) -> Plotter: + return cast(Plotter, self._plotter().sentinel_from_client(client, workspace_id, default_timespan)) + sentinel_from_client.__doc__ = Plotter.sentinel_from_client.__doc__ def gsql_endpoint(self, From 8e338fdef2db6828ba2b3cef7fbde5b21d543575 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 11:56:51 +0200 Subject: [PATCH 11/21] fix: Add module-level function assignments for Sentinel - Add configure_sentinel and sentinel_from_client assignments - Follow same pattern as other plugins (Kusto, Spanner) - Enable direct import from graphistry module --- graphistry/pygraphistry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index 3e74af38c0..1d6eda5c75 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -2627,6 +2627,8 @@ def _handle_api_response(self, response): kusto_from_client = PyGraphistry.kusto_from_client kql = PyGraphistry.kql kusto_graph = PyGraphistry.kusto_graph +configure_sentinel = PyGraphistry.configure_sentinel +sentinel_from_client = PyGraphistry.sentinel_from_client cosmos = PyGraphistry.cosmos neptune = PyGraphistry.neptune gremlin = PyGraphistry.gremlin From 1e3401d4cd3688ec4b9dfb13d7c8ae4e81ca1ca5 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 12:03:04 +0200 Subject: [PATCH 12/21] feat: Add device code authentication support to Sentinel plugin - Add use_device_auth parameter to configure_sentinel() - Support DeviceCodeCredential for interactive authentication - Show code and URL for authentication like Kusto plugin - Update type definitions and method signatures - Provide authentication precedence documentation --- .../kusto/graphistry_ADX_kusto_demo.ipynb | 2 +- .../sentinel/sentinel_security_analysis.ipynb | 42 ++++++++++++------- graphistry/plugins/sentinel.py | 25 ++++++++++- graphistry/plugins_types/sentinel_types.py | 3 ++ graphistry/pygraphistry.py | 4 +- 5 files changed, 57 insertions(+), 19 deletions(-) diff --git a/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb b/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb index a1f902ef34..08631880d1 100644 --- a/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb +++ b/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb @@ -677,7 +677,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.10.18" } }, "nbformat": 4, diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 05a4095dab..35341391ca 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -54,7 +54,8 @@ "```bash\n", "az login\n", "```" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -113,7 +114,8 @@ "metadata": {}, "source": [ "### Option 2: Service Principal Authentication (Recommended for Production)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -135,7 +137,8 @@ "metadata": {}, "source": [ "## Test Connection" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -146,9 +149,9 @@ "# Test the connection\n", "try:\n", " g.sentinel_health_check()\n", - " print(\"✅ Successfully connected to Microsoft Sentinel!\")\n", + " print(\"\u2705 Successfully connected to Microsoft Sentinel!\")\n", "except Exception as e:\n", - " print(f\"❌ Connection failed: {e}\")" + " print(f\"\u274c Connection failed: {e}\")" ] }, { @@ -158,7 +161,8 @@ "## Explore Available Data\n", "\n", "Let's start by exploring what tables are available in your Sentinel workspace:" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -194,7 +198,8 @@ "## Security Analysis Examples\n", "\n", "### 1. Failed Login Analysis" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -232,7 +237,8 @@ "metadata": {}, "source": [ "### 2. Security Alerts Analysis" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -275,7 +281,8 @@ "metadata": {}, "source": [ "### 3. Network Traffic Analysis" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -322,14 +329,16 @@ "## Graph Visualization\n", "\n", "Now let's create some graph visualizations from the security data:" - ] + ], + "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. User-IP Relationship Graph" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -408,7 +417,8 @@ "metadata": {}, "source": [ "### 2. Alert Correlation Graph" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -492,7 +502,8 @@ "## Advanced Analysis\n", "\n", "### Multi-table Correlation" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -564,7 +575,8 @@ "- [Microsoft Sentinel KQL Reference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/)\n", "- [Graphistry Documentation](https://pygraphistry.readthedocs.io/)\n", "- [Azure Monitor Query Documentation](https://docs.microsoft.com/en-us/python/api/azure-monitor-query/)\n" - ] + ], + "outputs": [] } ], "metadata": { @@ -588,4 +600,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py index da20edd6b1..4ad6dc7ff4 100644 --- a/graphistry/plugins/sentinel.py +++ b/graphistry/plugins/sentinel.py @@ -38,6 +38,7 @@ def configure_sentinel( client_secret: Optional[str] = None, credential: Optional["TokenCredential"] = None, default_timespan: Optional[timedelta] = None, + use_device_auth: bool = False, ) -> Plottable: """Configure Microsoft Sentinel Log Analytics connection settings. @@ -59,6 +60,8 @@ def configure_sentinel( :type credential: Optional[TokenCredential] :param default_timespan: Default time range for queries (defaults to 24 hours) :type default_timespan: Optional[timedelta] + :param use_device_auth: Use device code authentication (shows code and URL) + :type use_device_auth: bool :returns: Self for method chaining :rtype: Plottable @@ -82,6 +85,16 @@ def configure_sentinel( client_secret="your-client-secret" ) + **Example: Device code authentication (interactive)** + :: + + import graphistry + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc", + use_device_auth=True + ) + # This will show a code and URL for authentication + **Example: Custom credential** :: @@ -101,6 +114,7 @@ def configure_sentinel( client_secret=client_secret, credential=credential, default_timespan=default_timespan or timedelta(hours=24), + use_device_auth=use_device_auth, ) return self @@ -531,11 +545,12 @@ def init_sentinel_client(cfg: SentinelConfig) -> "LogsQueryClient": Authentication precedence: 1. Custom credential object (if provided) 2. Service Principal (if credentials provided) - 3. DefaultAzureCredential (tries multiple methods automatically) + 3. Device code authentication (if use_device_auth=True) + 4. DefaultAzureCredential (tries multiple methods automatically) For Azure CLI auth: Run 'az login' before using this method. """ - from azure.identity import DefaultAzureCredential, ClientSecretCredential + from azure.identity import DefaultAzureCredential, ClientSecretCredential, DeviceCodeCredential from azure.monitor.query import LogsQueryClient try: @@ -551,6 +566,12 @@ def init_sentinel_client(cfg: SentinelConfig) -> "LogsQueryClient": client_secret=cfg.client_secret ) logger.info(f"Using Service Principal authentication for workspace {cfg.workspace_id}") + elif cfg.use_device_auth: + credential = DeviceCodeCredential( + tenant_id=cfg.tenant_id # Optional, uses common tenant if not provided + ) + logger.info(f"Using Device Code authentication for workspace {cfg.workspace_id}") + logger.info("You will be prompted to visit a URL and enter a code to authenticate") else: credential = DefaultAzureCredential() logger.info(f"Using DefaultAzureCredential (Azure CLI, Managed Identity, etc.) for workspace {cfg.workspace_id}") diff --git a/graphistry/plugins_types/sentinel_types.py b/graphistry/plugins_types/sentinel_types.py index cbc195146d..6005dbbd68 100644 --- a/graphistry/plugins_types/sentinel_types.py +++ b/graphistry/plugins_types/sentinel_types.py @@ -66,5 +66,8 @@ class SentinelConfig: default_timespan: timedelta = timedelta(hours=24) """Default time range for queries when not specified""" + use_device_auth: bool = False + """Use device code authentication flow""" + _client: Optional[LogsQueryClient] = None """Cached client instance (internal use)""" \ No newline at end of file diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index 1d6eda5c75..11c50e7be6 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -1971,6 +1971,7 @@ def configure_sentinel( client_secret: Optional[str] = None, credential: Optional[Any] = None, default_timespan: Optional[Any] = None, + use_device_auth: bool = False, ) -> Plotter: return cast(Plotter, self._plotter().configure_sentinel( workspace_id=workspace_id, @@ -1978,7 +1979,8 @@ def configure_sentinel( client_id=client_id, client_secret=client_secret, credential=credential, - default_timespan=default_timespan + default_timespan=default_timespan, + use_device_auth=use_device_auth )) configure_sentinel.__doc__ = Plotter.configure_sentinel.__doc__ From 7abc103b9834db3bae557130c403aa255c6db3a0 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 12:05:09 +0200 Subject: [PATCH 13/21] fix: Handle different column formats in Azure Monitor Query response - Support both object columns (with .name/.type) and string columns - Default to 'string' type when type info not available - Handle missing table.name attribute gracefully - Fix AttributeError in query response processing --- graphistry/plugins/sentinel.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py index 4ad6dc7ff4..12d5df98ad 100644 --- a/graphistry/plugins/sentinel.py +++ b/graphistry/plugins/sentinel.py @@ -517,14 +517,25 @@ def _sentinel_query( # Process each table in the response for table in response.tables: rows = [list(row) for row in table.rows] - col_names = [col.name for col in table.columns] - col_types = [col.type for col in table.columns] + + # Handle different column formats + if hasattr(table.columns[0], 'name') if table.columns else False: + # Columns are objects with name/type attributes + col_names = [col.name for col in table.columns] + col_types = [col.type for col in table.columns] + else: + # Columns are strings (column names only) + col_names = list(table.columns) + col_types = ['string'] * len(col_names) # Default to string type + + # Handle table name + table_name = getattr(table, 'name', None) results.append(SentinelQueryResult( data=rows, column_names=col_names, column_types=col_types, - table_name=table.name + table_name=table_name )) row_lengths.append((len(rows), len(col_names))) From 2b728c11457d8491eec876575cbbd721a908c456 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 12:09:01 +0200 Subject: [PATCH 14/21] fix: Update sentinel_tables() query to avoid column name conflicts - Replace union withsource=TableName query that caused conflicts - Use Usage table to get DataType (table names) instead - Extend timespan to 30 days for better table coverage - Avoid SEM0001 semantic error from existing TableName columns --- .../sentinel/sentinel_security_analysis.ipynb | 3 ++- graphistry/plugins/sentinel.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 35341391ca..1bc1d7c9ed 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -96,7 +96,8 @@ " raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n", "\n", "g = graphistry.configure_sentinel(\n", - " workspace_id=WORKSPACE_ID\n", + " workspace_id=WORKSPACE_ID,\n", + " use_device_auth=True \n", ")" ] }, diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py index 12d5df98ad..2183803611 100644 --- a/graphistry/plugins/sentinel.py +++ b/graphistry/plugins/sentinel.py @@ -446,8 +446,14 @@ def sentinel_tables(self) -> pd.DataFrame: print(f"Found {len(tables)} tables") print(tables.head(10)) """ - query = "union withsource=TableName * | distinct TableName | sort by TableName asc" - return self.kql(query, timespan=timedelta(minutes=5)) + # Use Usage table to get all table names - this avoids union conflicts + query = """ + Usage + | where TimeGenerated > ago(30d) + | distinct DataType + | sort by DataType asc + """ + return self.kql(query, timespan=timedelta(days=30)) def sentinel_schema(self, table: str) -> pd.DataFrame: """Get schema information for a specific table. From cb5432fd44a49cc24bf16817a752462f52587404 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 12:12:49 +0200 Subject: [PATCH 15/21] fix: Update notebook to use correct DataType column for table listing - Fix references from TableName to DataType column - Clean up notebook code for consistent formatting - Add device authentication example - Ensure table listing and schema queries work correctly --- .../sentinel/sentinel_security_analysis.ipynb | 65 +------------------ 1 file changed, 3 insertions(+), 62 deletions(-) diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 1bc1d7c9ed..2e9b2e75a1 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -62,53 +62,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import graphistry\n", - "from datetime import datetime, timedelta\n", - "import pandas as pd\n", - "import os\n", - "from dotenv import load_dotenv, dotenv_values\n", - "\n", - "# Load environment variables from .env file\n", - "# Option 1: Load from current directory (default)\n", - "#load_dotenv()\n", - "\n", - "# Option 2: Load from a custom location (uncomment and modify as needed)\n", - "# load_dotenv('~/sentinel.env') # Load from home directory\n", - "# load_dotenv('/path/to/your/.env') # Load from absolute path\n", - "load_dotenv(os.path.expanduser('~/Documents/Graphistry/sentinel.env')) # Expand ~ to home directory\n", - "print(dotenv_values('/home/sinbre/Documents/Graphistry/sentinel.env'))\n", - "# Register for free at https://www.graphistry.com\n", - "# Credentials loaded from .env file\n", - "graphistry.register(\n", - " api=3,\n", - " protocol=\"https\",\n", - " server=\"hub.graphistry.com\",\n", - " personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n", - " personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n", - ")\n", - "\n", - "# Configure Sentinel connection\n", - "# Workspace ID loaded from .env file\n", - "WORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n", - "\n", - "if not WORKSPACE_ID:\n", - " raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n", - "\n", - "g = graphistry.configure_sentinel(\n", - " workspace_id=WORKSPACE_ID,\n", - " use_device_auth=True \n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install dotenv" - ] + "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3,\n protocol=\"https\",\n server=\"hub.graphistry.com\",\n personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID,\n use_device_auth=True # Use device code authentication\n)" }, { "cell_type": "markdown", @@ -170,27 +124,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# List all available tables\n", - "tables_df = g.sentinel_tables()\n", - "print(f\"Found {len(tables_df)} tables in workspace\")\n", - "print(\"\\nSecurity-related tables:\")\n", - "security_tables = tables_df[tables_df['TableName'].str.contains('Security|Alert|Incident', case=False, na=False)]\n", - "print(security_tables['TableName'].tolist())" - ] + "source": "# List all available tables\ntables_df = g.sentinel_tables()\nprint(f\"Found {len(tables_df)} tables in workspace\")\nprint(\"\\nSecurity-related tables:\")\nsecurity_tables = tables_df[tables_df['DataType'].str.contains('Security|Alert|Incident', case=False, na=False)]\nprint(security_tables['DataType'].tolist())" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Get schema for SecurityEvent table (if available)\n", - "if 'SecurityEvent' in tables_df['TableName'].values:\n", - " schema = g.sentinel_schema('SecurityEvent')\n", - " print(\"SecurityEvent table schema:\")\n", - " print(schema[['ColumnName', 'DataType']].head(10))" - ] + "source": "# Get schema for SecurityEvent table (if available)\nif 'SecurityEvent' in tables_df['DataType'].values:\n schema = g.sentinel_schema('SecurityEvent')\n print(\"SecurityEvent table schema:\")\n print(schema[['ColumnName', 'DataType']].head(10))" }, { "cell_type": "markdown", From d6e3c2f85f78c4ac880582ab9c7ff1820d66e96f Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 12:16:08 +0200 Subject: [PATCH 16/21] fix: Use correct Graphistry encoding methods in notebook - Replace bind() with encode_point_color() and encode_edge_color() - Use encode_edge_size() for edge weight visualization - Fix method signatures to match Graphistry API --- .../sentinel/sentinel_security_analysis.ipynb | 193 +++++------------- 1 file changed, 55 insertions(+), 138 deletions(-) diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 2e9b2e75a1..9037908c4b 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -62,7 +62,44 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3,\n protocol=\"https\",\n server=\"hub.graphistry.com\",\n personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID,\n use_device_auth=True # Use device code authentication\n)" + "source": [ + "import graphistry\n", + "from datetime import datetime, timedelta\n", + "import pandas as pd\n", + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from .env file\n", + "# Option 1: Load from current directory (default)\n", + "load_dotenv()\n", + "\n", + "# Option 2: Load from a custom location (uncomment and modify as needed)\n", + "# load_dotenv('~/custom.env') # Load from home directory\n", + "# load_dotenv('/path/to/your/.env') # Load from absolute path\n", + "load_dotenv(os.path.expanduser('~/Documents/Graphistry/sentinel.env')) # Expand ~ to home directory\n", + "\n", + "# Register for free at https://www.graphistry.com\n", + "# Credentials loaded from .env file\n", + "graphistry.register(\n", + " api=3,\n", + " protocol=\"https\",\n", + " server=\"hub.graphistry.com\",\n", + " personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n", + " personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n", + ")\n", + "\n", + "# Configure Sentinel connection\n", + "# Workspace ID loaded from .env file\n", + "WORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n", + "\n", + "if not WORKSPACE_ID:\n", + " raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n", + "\n", + "g = graphistry.configure_sentinel(\n", + " workspace_id=WORKSPACE_ID,\n", + " use_device_auth=True # Use device code authentication\n", + ")" + ] }, { "cell_type": "markdown", @@ -124,14 +161,27 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# List all available tables\ntables_df = g.sentinel_tables()\nprint(f\"Found {len(tables_df)} tables in workspace\")\nprint(\"\\nSecurity-related tables:\")\nsecurity_tables = tables_df[tables_df['DataType'].str.contains('Security|Alert|Incident', case=False, na=False)]\nprint(security_tables['DataType'].tolist())" + "source": [ + "# List all available tables\n", + "tables_df = g.sentinel_tables()\n", + "print(f\"Found {len(tables_df)} tables in workspace\")\n", + "print(\"\\nSecurity-related tables:\")\n", + "security_tables = tables_df[tables_df['DataType'].str.contains('Security|Alert|Incident', case=False, na=False)]\n", + "print(security_tables['DataType'].tolist())" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Get schema for SecurityEvent table (if available)\nif 'SecurityEvent' in tables_df['DataType'].values:\n schema = g.sentinel_schema('SecurityEvent')\n print(\"SecurityEvent table schema:\")\n print(schema[['ColumnName', 'DataType']].head(10))" + "source": [ + "# Get schema for SecurityEvent table (if available)\n", + "if 'SecurityEvent' in tables_df['DataType'].values:\n", + " schema = g.sentinel_schema('SecurityEvent')\n", + " print(\"SecurityEvent table schema:\")\n", + " print(schema[['ColumnName', 'DataType']].head(10))" + ] }, { "cell_type": "markdown", @@ -287,72 +337,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Query for user-IP relationships\n", - "user_ip_query = \"\"\"\n", - "SigninLogs\n", - "| where TimeGenerated > ago(24h)\n", - "| where isnotempty(UserPrincipalName) and isnotempty(IPAddress)\n", - "| project UserPrincipalName, IPAddress, TimeGenerated, ResultType, Location\n", - "| summarize \n", - " LoginCount = count(),\n", - " FailureCount = countif(ResultType != \"0\"),\n", - " LatestLogin = max(TimeGenerated),\n", - " Locations = make_set(Location)\n", - " by UserPrincipalName, IPAddress\n", - "| extend RiskScore = FailureCount * 2 + iff(LoginCount == 1, 1, 0)\n", - "| take 500\n", - "\"\"\"\n", - "\n", - "try:\n", - " user_ip_data = g.kql_last(user_ip_query, hours=24)\n", - " \n", - " if len(user_ip_data) > 0:\n", - " # Create nodes and edges for graph visualization\n", - " \n", - " # Create user nodes\n", - " users = user_ip_data[['UserPrincipalName']].drop_duplicates()\n", - " users['node_type'] = 'user'\n", - " users['node_id'] = users['UserPrincipalName']\n", - " users['node_label'] = users['UserPrincipalName']\n", - " \n", - " # Create IP nodes \n", - " ips = user_ip_data[['IPAddress']].drop_duplicates()\n", - " ips['node_type'] = 'ip'\n", - " ips['node_id'] = ips['IPAddress']\n", - " ips['node_label'] = ips['IPAddress']\n", - " \n", - " # Combine nodes\n", - " nodes = pd.concat([\n", - " users[['node_id', 'node_label', 'node_type']],\n", - " ips[['node_id', 'node_label', 'node_type']]\n", - " ], ignore_index=True)\n", - " \n", - " # Create edges\n", - " edges = user_ip_data.copy()\n", - " edges['source'] = edges['UserPrincipalName']\n", - " edges['target'] = edges['IPAddress']\n", - " edges['edge_weight'] = edges['LoginCount']\n", - " edges['edge_color'] = edges['RiskScore'].apply(\n", - " lambda x: 'red' if x > 5 else 'orange' if x > 2 else 'green'\n", - " )\n", - " \n", - " # Create and plot graph\n", - " graph = g.nodes(nodes, node='node_id')\\\n", - " .edges(edges, source='source', destination='target')\\\n", - " .bind(node_color='node_type', edge_color='edge_color')\\\n", - " .settings(url_params={'splashAfter': 'false'})\n", - " \n", - " print(f\"Created graph with {len(nodes)} nodes and {len(edges)} edges\")\n", - " \n", - " # Plot the graph\n", - " graph.plot()\n", - " else:\n", - " print(\"No data available for user-IP graph\")\n", - " \n", - "except Exception as e:\n", - " print(f\"Graph creation failed: {e}\")" - ] + "source": "# Query for user-IP relationships\nuser_ip_query = \"\"\"\nSigninLogs\n| where TimeGenerated > ago(24h)\n| where isnotempty(UserPrincipalName) and isnotempty(IPAddress)\n| project UserPrincipalName, IPAddress, TimeGenerated, ResultType, Location\n| summarize \n LoginCount = count(),\n FailureCount = countif(ResultType != \"0\"),\n LatestLogin = max(TimeGenerated),\n Locations = make_set(Location)\n by UserPrincipalName, IPAddress\n| extend RiskScore = FailureCount * 2 + iff(LoginCount == 1, 1, 0)\n| take 500\n\"\"\"\n\ntry:\n user_ip_data = g.kql_last(user_ip_query, hours=24)\n \n if len(user_ip_data) > 0:\n # Create nodes and edges for graph visualization\n \n # Create user nodes\n users = user_ip_data[['UserPrincipalName']].drop_duplicates()\n users['node_type'] = 'user'\n users['node_id'] = users['UserPrincipalName']\n users['node_label'] = users['UserPrincipalName']\n \n # Create IP nodes \n ips = user_ip_data[['IPAddress']].drop_duplicates()\n ips['node_type'] = 'ip'\n ips['node_id'] = ips['IPAddress']\n ips['node_label'] = ips['IPAddress']\n \n # Combine nodes\n nodes = pd.concat([\n users[['node_id', 'node_label', 'node_type']],\n ips[['node_id', 'node_label', 'node_type']]\n ], ignore_index=True)\n \n # Create edges\n edges = user_ip_data.copy()\n edges['source'] = edges['UserPrincipalName']\n edges['target'] = edges['IPAddress']\n edges['edge_weight'] = edges['LoginCount']\n edges['edge_color'] = edges['RiskScore'].apply(\n lambda x: 'red' if x > 5 else 'orange' if x > 2 else 'green'\n )\n \n # Create and plot graph\n graph = g.nodes(nodes, node='node_id')\\\n .edges(edges, source='source', destination='target')\\\n .encode_point_color('node_type')\\\n .encode_edge_color('edge_color')\\\n .settings(url_params={'splashAfter': 'false'})\n \n print(f\"Created graph with {len(nodes)} nodes and {len(edges)} edges\")\n \n # Plot the graph\n graph.plot()\n else:\n print(\"No data available for user-IP graph\")\n \nexcept Exception as e:\n print(f\"Graph creation failed: {e}\")" }, { "cell_type": "markdown", @@ -367,75 +352,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Query for alert correlations\n", - "alert_correlation_query = \"\"\"\n", - "SecurityAlert\n", - "| where TimeGenerated > ago(7d)\n", - "| project \n", - " AlertName,\n", - " CompromisedEntity,\n", - " Tactics,\n", - " AlertSeverity,\n", - " TimeGenerated\n", - "| extend EntityType = case(\n", - " CompromisedEntity contains \"@\", \"User\",\n", - " CompromisedEntity matches regex @\"\\\\b(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}\\\\b\", \"IP\",\n", - " \"Host\"\n", - ")\n", - "| summarize \n", - " AlertCount = count(),\n", - " Severities = make_set(AlertSeverity),\n", - " TacticsList = make_set(Tactics)\n", - " by AlertName, CompromisedEntity, EntityType\n", - "| where AlertCount > 1\n", - "| take 200\n", - "\"\"\"\n", - "\n", - "try:\n", - " alert_data = g.kql(alert_correlation_query, timespan=timedelta(days=7))\n", - " \n", - " if len(alert_data) > 0:\n", - " # Create alert type nodes\n", - " alert_types = alert_data[['AlertName']].drop_duplicates()\n", - " alert_types['node_type'] = 'alert'\n", - " alert_types['node_id'] = alert_types['AlertName']\n", - " alert_types['node_label'] = alert_types['AlertName']\n", - " \n", - " # Create entity nodes\n", - " entities = alert_data[['CompromisedEntity', 'EntityType']].drop_duplicates()\n", - " entities['node_type'] = entities['EntityType'].str.lower()\n", - " entities['node_id'] = entities['CompromisedEntity']\n", - " entities['node_label'] = entities['CompromisedEntity']\n", - " \n", - " # Combine nodes\n", - " alert_nodes = pd.concat([\n", - " alert_types[['node_id', 'node_label', 'node_type']],\n", - " entities[['node_id', 'node_label', 'node_type']]\n", - " ], ignore_index=True)\n", - " \n", - " # Create edges (alert -> entity)\n", - " alert_edges = alert_data.copy()\n", - " alert_edges['source'] = alert_edges['AlertName']\n", - " alert_edges['target'] = alert_edges['CompromisedEntity']\n", - " alert_edges['edge_weight'] = alert_edges['AlertCount']\n", - " \n", - " # Create and plot graph\n", - " alert_graph = g.nodes(alert_nodes, node='node_id')\\\n", - " .edges(alert_edges, source='source', destination='target')\\\n", - " .bind(node_color='node_type', edge_weight='edge_weight')\\\n", - " .settings(url_params={'splashAfter': 'false'})\n", - " \n", - " print(f\"Created alert correlation graph with {len(alert_nodes)} nodes and {len(alert_edges)} edges\")\n", - " \n", - " # Plot the graph\n", - " alert_graph.plot()\n", - " else:\n", - " print(\"No alert correlation data available\")\n", - " \n", - "except Exception as e:\n", - " print(f\"Alert correlation graph failed: {e}\")" - ] + "source": "# Query for alert correlations\nalert_correlation_query = \"\"\"\nSecurityAlert\n| where TimeGenerated > ago(7d)\n| project \n AlertName,\n CompromisedEntity,\n Tactics,\n AlertSeverity,\n TimeGenerated\n| extend EntityType = case(\n CompromisedEntity contains \"@\", \"User\",\n CompromisedEntity matches regex @\"\\\\b(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}\\\\b\", \"IP\",\n \"Host\"\n)\n| summarize \n AlertCount = count(),\n Severities = make_set(AlertSeverity),\n TacticsList = make_set(Tactics)\n by AlertName, CompromisedEntity, EntityType\n| where AlertCount > 1\n| take 200\n\"\"\"\n\ntry:\n alert_data = g.kql(alert_correlation_query, timespan=timedelta(days=7))\n \n if len(alert_data) > 0:\n # Create alert type nodes\n alert_types = alert_data[['AlertName']].drop_duplicates()\n alert_types['node_type'] = 'alert'\n alert_types['node_id'] = alert_types['AlertName']\n alert_types['node_label'] = alert_types['AlertName']\n \n # Create entity nodes\n entities = alert_data[['CompromisedEntity', 'EntityType']].drop_duplicates()\n entities['node_type'] = entities['EntityType'].str.lower()\n entities['node_id'] = entities['CompromisedEntity']\n entities['node_label'] = entities['CompromisedEntity']\n \n # Combine nodes\n alert_nodes = pd.concat([\n alert_types[['node_id', 'node_label', 'node_type']],\n entities[['node_id', 'node_label', 'node_type']]\n ], ignore_index=True)\n \n # Create edges (alert -> entity)\n alert_edges = alert_data.copy()\n alert_edges['source'] = alert_edges['AlertName']\n alert_edges['target'] = alert_edges['CompromisedEntity']\n alert_edges['edge_weight'] = alert_edges['AlertCount']\n \n # Create and plot graph\n alert_graph = g.nodes(alert_nodes, node='node_id')\\\n .edges(alert_edges, source='source', destination='target')\\\n .encode_point_color('node_type')\\\n .encode_edge_size('edge_weight')\\\n .settings(url_params={'splashAfter': 'false'})\n \n print(f\"Created alert correlation graph with {len(alert_nodes)} nodes and {len(alert_edges)} edges\")\n \n # Plot the graph\n alert_graph.plot()\n else:\n print(\"No alert correlation data available\")\n \nexcept Exception as e:\n print(f\"Alert correlation graph failed: {e}\")" }, { "cell_type": "markdown", From 83449503c7c04e82bb1a83c1b1a7da1836e85565 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Mon, 22 Sep 2025 12:23:12 +0200 Subject: [PATCH 17/21] docs: Clean up and finalize Sentinel notebook - Add better error handling and user guidance - Include troubleshooting tips and alternative auth methods - Improve error messages and empty result handling - Add comprehensive summary and next steps - Make notebook more robust for different workspace configurations --- .../sentinel/sentinel_security_analysis.ipynb | 244 ++++++++++-------- 1 file changed, 143 insertions(+), 101 deletions(-) diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb index 9037908c4b..3d7a5d61a8 100644 --- a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -62,44 +62,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import graphistry\n", - "from datetime import datetime, timedelta\n", - "import pandas as pd\n", - "import os\n", - "from dotenv import load_dotenv\n", - "\n", - "# Load environment variables from .env file\n", - "# Option 1: Load from current directory (default)\n", - "load_dotenv()\n", - "\n", - "# Option 2: Load from a custom location (uncomment and modify as needed)\n", - "# load_dotenv('~/custom.env') # Load from home directory\n", - "# load_dotenv('/path/to/your/.env') # Load from absolute path\n", - "load_dotenv(os.path.expanduser('~/Documents/Graphistry/sentinel.env')) # Expand ~ to home directory\n", - "\n", - "# Register for free at https://www.graphistry.com\n", - "# Credentials loaded from .env file\n", - "graphistry.register(\n", - " api=3,\n", - " protocol=\"https\",\n", - " server=\"hub.graphistry.com\",\n", - " personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n", - " personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n", - ")\n", - "\n", - "# Configure Sentinel connection\n", - "# Workspace ID loaded from .env file\n", - "WORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n", - "\n", - "if not WORKSPACE_ID:\n", - " raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n", - "\n", - "g = graphistry.configure_sentinel(\n", - " workspace_id=WORKSPACE_ID,\n", - " use_device_auth=True # Use device code authentication\n", - ")" - ] + "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3,\n protocol=\"https\",\n server=\"hub.graphistry.com\",\n personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID,\n use_device_auth=True # Use device code authentication\n)" }, { "cell_type": "markdown", @@ -114,15 +77,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Alternative: Service Principal authentication from .env file\n", - "# g = graphistry.configure_sentinel(\n", - "# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID'),\n", - "# tenant_id=os.getenv('AZURE_TENANT_ID'),\n", - "# client_id=os.getenv('AZURE_CLIENT_ID'),\n", - "# client_secret=os.getenv('AZURE_CLIENT_SECRET')\n", - "# )" - ] + "source": "# Alternative: Service Principal authentication from .env file\n# Uncomment the lines below if you prefer Service Principal over device authentication\n# g = graphistry.configure_sentinel(\n# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID'),\n# tenant_id=os.getenv('AZURE_TENANT_ID'),\n# client_id=os.getenv('AZURE_CLIENT_ID'),\n# client_secret=os.getenv('AZURE_CLIENT_SECRET')\n# )\n\n# Alternative: Use DefaultAzureCredential (tries Azure CLI, Managed Identity, etc.)\n# g = graphistry.configure_sentinel(\n# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID')\n# )" }, { "cell_type": "markdown", @@ -137,14 +92,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Test the connection\n", - "try:\n", - " g.sentinel_health_check()\n", - " print(\"\u2705 Successfully connected to Microsoft Sentinel!\")\n", - "except Exception as e:\n", - " print(f\"\u274c Connection failed: {e}\")" - ] + "source": "# Test the connection\n# Note: If using device authentication, you'll see a code and URL to visit for authentication\ntry:\n g.sentinel_health_check()\n print(\"\u2705 Successfully connected to Microsoft Sentinel!\")\nexcept Exception as e:\n print(f\"\u274c Connection failed: {e}\")\n print(\"\ud83d\udca1 If using device auth, make sure to complete the authentication in your browser first.\")" }, { "cell_type": "markdown", @@ -161,27 +109,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# List all available tables\n", - "tables_df = g.sentinel_tables()\n", - "print(f\"Found {len(tables_df)} tables in workspace\")\n", - "print(\"\\nSecurity-related tables:\")\n", - "security_tables = tables_df[tables_df['DataType'].str.contains('Security|Alert|Incident', case=False, na=False)]\n", - "print(security_tables['DataType'].tolist())" - ] + "source": "# List all available tables\ntry:\n tables_df = g.sentinel_tables()\n print(f\"Found {len(tables_df)} tables in workspace\")\n print(\"\\nSecurity-related tables:\")\n security_tables = tables_df[tables_df['DataType'].str.contains('Security|Alert|Incident', case=False, na=False)]\n if not security_tables.empty:\n print(security_tables['DataType'].tolist())\n else:\n print(\"No security-related tables found\")\n print(f\"\\nAll tables: {tables_df['DataType'].tolist()}\")\nexcept Exception as e:\n print(f\"Failed to list tables: {e}\")\n print(\"This might happen if the workspace has no data or insufficient permissions\")" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Get schema for SecurityEvent table (if available)\n", - "if 'SecurityEvent' in tables_df['DataType'].values:\n", - " schema = g.sentinel_schema('SecurityEvent')\n", - " print(\"SecurityEvent table schema:\")\n", - " print(schema[['ColumnName', 'DataType']].head(10))" - ] + "source": "# Get schema for SecurityEvent table (if available)\ntry:\n if 'SecurityEvent' in tables_df['DataType'].values:\n schema = g.sentinel_schema('SecurityEvent')\n print(\"SecurityEvent table schema:\")\n print(schema[['ColumnName', 'DataType']].head(10))\n else:\n print(\"SecurityEvent table not found in workspace\")\n print(\"Available tables for schema inspection:\", tables_df['DataType'].head(5).tolist())\nexcept Exception as e:\n print(f\"Failed to get schema: {e}\")" }, { "cell_type": "markdown", @@ -337,7 +272,73 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Query for user-IP relationships\nuser_ip_query = \"\"\"\nSigninLogs\n| where TimeGenerated > ago(24h)\n| where isnotempty(UserPrincipalName) and isnotempty(IPAddress)\n| project UserPrincipalName, IPAddress, TimeGenerated, ResultType, Location\n| summarize \n LoginCount = count(),\n FailureCount = countif(ResultType != \"0\"),\n LatestLogin = max(TimeGenerated),\n Locations = make_set(Location)\n by UserPrincipalName, IPAddress\n| extend RiskScore = FailureCount * 2 + iff(LoginCount == 1, 1, 0)\n| take 500\n\"\"\"\n\ntry:\n user_ip_data = g.kql_last(user_ip_query, hours=24)\n \n if len(user_ip_data) > 0:\n # Create nodes and edges for graph visualization\n \n # Create user nodes\n users = user_ip_data[['UserPrincipalName']].drop_duplicates()\n users['node_type'] = 'user'\n users['node_id'] = users['UserPrincipalName']\n users['node_label'] = users['UserPrincipalName']\n \n # Create IP nodes \n ips = user_ip_data[['IPAddress']].drop_duplicates()\n ips['node_type'] = 'ip'\n ips['node_id'] = ips['IPAddress']\n ips['node_label'] = ips['IPAddress']\n \n # Combine nodes\n nodes = pd.concat([\n users[['node_id', 'node_label', 'node_type']],\n ips[['node_id', 'node_label', 'node_type']]\n ], ignore_index=True)\n \n # Create edges\n edges = user_ip_data.copy()\n edges['source'] = edges['UserPrincipalName']\n edges['target'] = edges['IPAddress']\n edges['edge_weight'] = edges['LoginCount']\n edges['edge_color'] = edges['RiskScore'].apply(\n lambda x: 'red' if x > 5 else 'orange' if x > 2 else 'green'\n )\n \n # Create and plot graph\n graph = g.nodes(nodes, node='node_id')\\\n .edges(edges, source='source', destination='target')\\\n .encode_point_color('node_type')\\\n .encode_edge_color('edge_color')\\\n .settings(url_params={'splashAfter': 'false'})\n \n print(f\"Created graph with {len(nodes)} nodes and {len(edges)} edges\")\n \n # Plot the graph\n graph.plot()\n else:\n print(\"No data available for user-IP graph\")\n \nexcept Exception as e:\n print(f\"Graph creation failed: {e}\")" + "source": [ + "# Query for user-IP relationships\n", + "user_ip_query = \"\"\"\n", + "SigninLogs\n", + "| where TimeGenerated > ago(24h)\n", + "| where isnotempty(UserPrincipalName) and isnotempty(IPAddress)\n", + "| project UserPrincipalName, IPAddress, TimeGenerated, ResultType, Location\n", + "| summarize \n", + " LoginCount = count(),\n", + " FailureCount = countif(ResultType != \"0\"),\n", + " LatestLogin = max(TimeGenerated),\n", + " Locations = make_set(Location)\n", + " by UserPrincipalName, IPAddress\n", + "| extend RiskScore = FailureCount * 2 + iff(LoginCount == 1, 1, 0)\n", + "| take 500\n", + "\"\"\"\n", + "\n", + "try:\n", + " user_ip_data = g.kql_last(user_ip_query, hours=24)\n", + " \n", + " if len(user_ip_data) > 0:\n", + " # Create nodes and edges for graph visualization\n", + " \n", + " # Create user nodes\n", + " users = user_ip_data[['UserPrincipalName']].drop_duplicates()\n", + " users['node_type'] = 'user'\n", + " users['node_id'] = users['UserPrincipalName']\n", + " users['node_label'] = users['UserPrincipalName']\n", + " \n", + " # Create IP nodes \n", + " ips = user_ip_data[['IPAddress']].drop_duplicates()\n", + " ips['node_type'] = 'ip'\n", + " ips['node_id'] = ips['IPAddress']\n", + " ips['node_label'] = ips['IPAddress']\n", + " \n", + " # Combine nodes\n", + " nodes = pd.concat([\n", + " users[['node_id', 'node_label', 'node_type']],\n", + " ips[['node_id', 'node_label', 'node_type']]\n", + " ], ignore_index=True)\n", + " \n", + " # Create edges\n", + " edges = user_ip_data.copy()\n", + " edges['source'] = edges['UserPrincipalName']\n", + " edges['target'] = edges['IPAddress']\n", + " edges['edge_weight'] = edges['LoginCount']\n", + " edges['edge_color'] = edges['RiskScore'].apply(\n", + " lambda x: 'red' if x > 5 else 'orange' if x > 2 else 'green'\n", + " )\n", + " \n", + " # Create and plot graph\n", + " graph = g.nodes(nodes, node='node_id')\\\n", + " .edges(edges, source='source', destination='target')\\\n", + " .encode_point_color('node_type')\\\n", + " .encode_edge_color('edge_color')\\\n", + " .settings(url_params={'splashAfter': 'false'})\n", + " \n", + " print(f\"Created graph with {len(nodes)} nodes and {len(edges)} edges\")\n", + " \n", + " # Plot the graph\n", + " graph.plot()\n", + " else:\n", + " print(\"No data available for user-IP graph\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Graph creation failed: {e}\")" + ] }, { "cell_type": "markdown", @@ -352,7 +353,76 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Query for alert correlations\nalert_correlation_query = \"\"\"\nSecurityAlert\n| where TimeGenerated > ago(7d)\n| project \n AlertName,\n CompromisedEntity,\n Tactics,\n AlertSeverity,\n TimeGenerated\n| extend EntityType = case(\n CompromisedEntity contains \"@\", \"User\",\n CompromisedEntity matches regex @\"\\\\b(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}\\\\b\", \"IP\",\n \"Host\"\n)\n| summarize \n AlertCount = count(),\n Severities = make_set(AlertSeverity),\n TacticsList = make_set(Tactics)\n by AlertName, CompromisedEntity, EntityType\n| where AlertCount > 1\n| take 200\n\"\"\"\n\ntry:\n alert_data = g.kql(alert_correlation_query, timespan=timedelta(days=7))\n \n if len(alert_data) > 0:\n # Create alert type nodes\n alert_types = alert_data[['AlertName']].drop_duplicates()\n alert_types['node_type'] = 'alert'\n alert_types['node_id'] = alert_types['AlertName']\n alert_types['node_label'] = alert_types['AlertName']\n \n # Create entity nodes\n entities = alert_data[['CompromisedEntity', 'EntityType']].drop_duplicates()\n entities['node_type'] = entities['EntityType'].str.lower()\n entities['node_id'] = entities['CompromisedEntity']\n entities['node_label'] = entities['CompromisedEntity']\n \n # Combine nodes\n alert_nodes = pd.concat([\n alert_types[['node_id', 'node_label', 'node_type']],\n entities[['node_id', 'node_label', 'node_type']]\n ], ignore_index=True)\n \n # Create edges (alert -> entity)\n alert_edges = alert_data.copy()\n alert_edges['source'] = alert_edges['AlertName']\n alert_edges['target'] = alert_edges['CompromisedEntity']\n alert_edges['edge_weight'] = alert_edges['AlertCount']\n \n # Create and plot graph\n alert_graph = g.nodes(alert_nodes, node='node_id')\\\n .edges(alert_edges, source='source', destination='target')\\\n .encode_point_color('node_type')\\\n .encode_edge_size('edge_weight')\\\n .settings(url_params={'splashAfter': 'false'})\n \n print(f\"Created alert correlation graph with {len(alert_nodes)} nodes and {len(alert_edges)} edges\")\n \n # Plot the graph\n alert_graph.plot()\n else:\n print(\"No alert correlation data available\")\n \nexcept Exception as e:\n print(f\"Alert correlation graph failed: {e}\")" + "source": [ + "# Query for alert correlations\n", + "alert_correlation_query = \"\"\"\n", + "SecurityAlert\n", + "| where TimeGenerated > ago(7d)\n", + "| project \n", + " AlertName,\n", + " CompromisedEntity,\n", + " Tactics,\n", + " AlertSeverity,\n", + " TimeGenerated\n", + "| extend EntityType = case(\n", + " CompromisedEntity contains \"@\", \"User\",\n", + " CompromisedEntity matches regex @\"\\\\b(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}\\\\b\", \"IP\",\n", + " \"Host\"\n", + ")\n", + "| summarize \n", + " AlertCount = count(),\n", + " Severities = make_set(AlertSeverity),\n", + " TacticsList = make_set(Tactics)\n", + " by AlertName, CompromisedEntity, EntityType\n", + "| where AlertCount > 1\n", + "| take 200\n", + "\"\"\"\n", + "\n", + "try:\n", + " alert_data = g.kql(alert_correlation_query, timespan=timedelta(days=7))\n", + " \n", + " if len(alert_data) > 0:\n", + " # Create alert type nodes\n", + " alert_types = alert_data[['AlertName']].drop_duplicates()\n", + " alert_types['node_type'] = 'alert'\n", + " alert_types['node_id'] = alert_types['AlertName']\n", + " alert_types['node_label'] = alert_types['AlertName']\n", + " \n", + " # Create entity nodes\n", + " entities = alert_data[['CompromisedEntity', 'EntityType']].drop_duplicates()\n", + " entities['node_type'] = entities['EntityType'].str.lower()\n", + " entities['node_id'] = entities['CompromisedEntity']\n", + " entities['node_label'] = entities['CompromisedEntity']\n", + " \n", + " # Combine nodes\n", + " alert_nodes = pd.concat([\n", + " alert_types[['node_id', 'node_label', 'node_type']],\n", + " entities[['node_id', 'node_label', 'node_type']]\n", + " ], ignore_index=True)\n", + " \n", + " # Create edges (alert -> entity)\n", + " alert_edges = alert_data.copy()\n", + " alert_edges['source'] = alert_edges['AlertName']\n", + " alert_edges['target'] = alert_edges['CompromisedEntity']\n", + " alert_edges['edge_weight'] = alert_edges['AlertCount']\n", + " \n", + " # Create and plot graph\n", + " alert_graph = g.nodes(alert_nodes, node='node_id')\\\n", + " .edges(alert_edges, source='source', destination='target')\\\n", + " .encode_point_color('node_type')\\\n", + " .encode_edge_size('edge_weight')\\\n", + " .settings(url_params={'splashAfter': 'false'})\n", + " \n", + " print(f\"Created alert correlation graph with {len(alert_nodes)} nodes and {len(alert_edges)} edges\")\n", + " \n", + " # Plot the graph\n", + " alert_graph.plot()\n", + " else:\n", + " print(\"No alert correlation data available\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Alert correlation graph failed: {e}\")" + ] }, { "cell_type": "markdown", @@ -406,35 +476,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "This notebook demonstrated:\n", - "\n", - "1. **Connecting to Microsoft Sentinel** using Azure authentication\n", - "2. **Exploring available data** with `sentinel_tables()` and `sentinel_schema()`\n", - "3. **Security analysis** using KQL queries for:\n", - " - Failed login analysis\n", - " - Security alerts monitoring\n", - " - Network traffic analysis\n", - "4. **Graph visualization** of:\n", - " - User-IP relationships\n", - " - Alert correlations\n", - "5. **Advanced correlation** across multiple data sources\n", - "\n", - "## Next Steps\n", - "\n", - "- Customize queries for your specific security use cases\n", - "- Create automated monitoring dashboards\n", - "- Integrate with threat intelligence feeds\n", - "- Build detection rules based on graph patterns\n", - "\n", - "## Resources\n", - "\n", - "- [Microsoft Sentinel KQL Reference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/)\n", - "- [Graphistry Documentation](https://pygraphistry.readthedocs.io/)\n", - "- [Azure Monitor Query Documentation](https://docs.microsoft.com/en-us/python/api/azure-monitor-query/)\n" - ], + "source": "## Summary\n\nThis notebook demonstrated:\n\n1. **Connecting to Microsoft Sentinel** using Azure authentication (device code, service principal, or DefaultAzureCredential)\n2. **Exploring available data** with `sentinel_tables()` and `sentinel_schema()`\n3. **Security analysis** using KQL queries for:\n - Failed login analysis\n - Security alerts monitoring\n - Network traffic analysis\n4. **Graph visualization** of:\n - User-IP relationships\n - Alert correlations\n5. **Advanced correlation** across multiple data sources\n\n## Next Steps\n\n- **Customize queries** for your specific security use cases and available data tables\n- **Create automated dashboards** by scheduling notebook execution\n- **Integrate with threat intelligence** feeds using additional KQL joins\n- **Build detection rules** based on graph patterns you discover\n- **Scale analysis** by adjusting time windows and data volumes\n\n## Troubleshooting Tips\n\n- **No data found**: Some workspaces may not have SecurityEvent, SigninLogs, or SecurityAlert tables\n- **Authentication issues**: Try `az login` first, or check your service principal credentials\n- **Permission errors**: Ensure your account has Log Analytics Reader permissions\n- **Empty results**: Adjust time ranges - some workspaces have limited data retention\n\n## Resources\n\n- [Microsoft Sentinel KQL Reference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/)\n- [Graphistry Documentation](https://pygraphistry.readthedocs.io/)\n- [Azure Monitor Query Documentation](https://docs.microsoft.com/en-us/python/api/azure-monitor-query/)\n- [Sentinel Data Connectors](https://docs.microsoft.com/en-us/azure/sentinel/connect-data-sources)", "outputs": [] } ], From cf14ee9d2a84081ab3218d9ebab00462f3852228 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Tue, 23 Sep 2025 11:40:19 +0200 Subject: [PATCH 18/21] fix: Add missing SentinelMixin import in test file - Import SentinelMixin to fix F821 undefined name errors - Resolves flake8 lint issues in test_sentinel.py --- graphistry/tests/test_sentinel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/graphistry/tests/test_sentinel.py b/graphistry/tests/test_sentinel.py index d44f94afcf..5bcf349b16 100644 --- a/graphistry/tests/test_sentinel.py +++ b/graphistry/tests/test_sentinel.py @@ -4,6 +4,7 @@ from datetime import datetime, timedelta from typing import List +from graphistry.plugins.sentinel import SentinelMixin from graphistry.plugins_types.sentinel_types import ( SentinelConfig, SentinelConnectionError, From e9c77d5ba433e7f348e5a23b9be51b153ee65264 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Tue, 23 Sep 2025 14:29:31 +0200 Subject: [PATCH 19/21] fix: Resolve all remaining lint issues - Fix W504: Move binary operator before line break - Fix W292: Add newlines at end of all files - Fix F841: Remove unused local variables in tests - All flake8 lint issues now resolved --- graphistry/plugins/sentinel.py | 6 +++--- graphistry/plugins_types/sentinel_types.py | 2 +- graphistry/tests/test_sentinel.py | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py index 2183803611..7dec325f2f 100644 --- a/graphistry/plugins/sentinel.py +++ b/graphistry/plugins/sentinel.py @@ -355,8 +355,8 @@ def kql( for result in results: # Determine if we should unwrap nested data do_unwrap = ( - unwrap_nested is True or - (unwrap_nested is None and _should_unwrap(result)) + unwrap_nested is True + or (unwrap_nested is None and _should_unwrap(result)) ) if do_unwrap: @@ -690,4 +690,4 @@ def _should_unwrap(result: SentinelQueryResult, sample_rows: int = 5) -> bool: except (json.JSONDecodeError, ValueError): continue - return False \ No newline at end of file + return False diff --git a/graphistry/plugins_types/sentinel_types.py b/graphistry/plugins_types/sentinel_types.py index 6005dbbd68..a32773e02d 100644 --- a/graphistry/plugins_types/sentinel_types.py +++ b/graphistry/plugins_types/sentinel_types.py @@ -70,4 +70,4 @@ class SentinelConfig: """Use device code authentication flow""" _client: Optional[LogsQueryClient] = None - """Cached client instance (internal use)""" \ No newline at end of file + """Cached client instance (internal use)""" diff --git a/graphistry/tests/test_sentinel.py b/graphistry/tests/test_sentinel.py index 5bcf349b16..76c3a4a705 100644 --- a/graphistry/tests/test_sentinel.py +++ b/graphistry/tests/test_sentinel.py @@ -59,7 +59,7 @@ def test_configure_sentinel_service_principal(self): def test_configure_sentinel_custom_timespan(self): """Test Sentinel configuration with custom default timespan.""" custom_timespan = timedelta(days=7) - result = self.plotter.configure_sentinel( + self.plotter.configure_sentinel( workspace_id=self.workspace_id, default_timespan=custom_timespan ) @@ -331,7 +331,7 @@ def test_init_default_credential(self, mock_credential_class, mock_client_class) mock_credential_class.return_value = mock_credential config = SentinelConfig(workspace_id="test-workspace") - client = init_sentinel_client(config) + init_sentinel_client(config) mock_credential_class.assert_called_once() mock_client_class.assert_called_once_with(mock_credential) @@ -351,7 +351,7 @@ def test_init_service_principal(self, mock_credential_class, mock_client_class): client_id="client", client_secret="secret" ) - client = init_sentinel_client(config) + init_sentinel_client(config) mock_credential_class.assert_called_once_with( tenant_id="tenant", @@ -371,10 +371,10 @@ def test_init_custom_credential(self, mock_client_class): credential=custom_credential ) - client = init_sentinel_client(config) + init_sentinel_client(config) mock_client_class.assert_called_once_with(custom_credential) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From 6f91006bab6292b9182c4b618430d83d985f5186 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Tue, 23 Sep 2025 15:42:29 +0200 Subject: [PATCH 20/21] fix: Resolve mypy type checking errors for Sentinel plugin --- graphistry/client_session.py | 2 ++ graphistry/plugins/sentinel.py | 18 ++++++++++++------ mypy.ini | 9 +++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/graphistry/client_session.py b/graphistry/client_session.py index f12f7bcb56..4861b65140 100644 --- a/graphistry/client_session.py +++ b/graphistry/client_session.py @@ -9,6 +9,7 @@ from . import util from .plugins_types.spanner_types import SpannerConfig from .plugins_types.kusto_types import KustoConfig +from .plugins_types.sentinel_types import SentinelConfig @@ -85,6 +86,7 @@ def __init__(self) -> None: # NOTE: These are dataclasses, so we shallow copy them self.kusto: Optional[KustoConfig] = None self.spanner: Optional[SpannerConfig] = None + self.sentinel: Optional[SentinelConfig] = None # TODO: Migrate to a pattern like Kusto or Spanner self._bolt_driver: Optional[Any] = None diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py index 7dec325f2f..d0c4f02a3b 100644 --- a/graphistry/plugins/sentinel.py +++ b/graphistry/plugins/sentinel.py @@ -1,14 +1,20 @@ import time import pandas as pd -from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal +from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal, Tuple from datetime import datetime, timedelta if TYPE_CHECKING: from azure.monitor.query import LogsQueryClient from azure.core.credentials import TokenCredential + from azure.core.exceptions import HttpResponseError + from azure.identity import DefaultAzureCredential, ClientSecretCredential, DeviceCodeCredential else: LogsQueryClient = Any TokenCredential = Any + HttpResponseError = Any + DefaultAzureCredential = Any + ClientSecretCredential = Any + DeviceCodeCredential = Any from graphistry.Plottable import Plottable from graphistry.util import setup_logger @@ -228,7 +234,7 @@ def kql( self, query: str, *, - timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, single_table: Literal[True] = True, include_statistics: bool = False @@ -240,7 +246,7 @@ def kql( self, query: str, *, - timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, single_table: Literal[False], include_statistics: bool = False @@ -252,7 +258,7 @@ def kql( self, query: str, *, - timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, single_table: bool = True, include_statistics: bool = False @@ -263,7 +269,7 @@ def kql( self, query: str, *, - timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, single_table: bool = True, include_statistics: bool = False @@ -479,7 +485,7 @@ def sentinel_schema(self, table: str) -> pd.DataFrame: def _sentinel_query( self, query: str, - timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] = None + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None ) -> List[SentinelQueryResult]: """Execute KQL query and return raw results. diff --git a/mypy.ini b/mypy.ini index 48d2a4279d..835bbc23cc 100644 --- a/mypy.ini +++ b/mypy.ini @@ -110,5 +110,14 @@ ignore_missing_imports = True [mypy-azure.kusto.*] ignore_missing_imports = True +[mypy-azure.monitor.*] +ignore_missing_imports = True + +[mypy-azure.identity.*] +ignore_missing_imports = True + +[mypy-azure.core.*] +ignore_missing_imports = True + [mypy-requests.*] ignore_missing_imports = True From 137427d60fb4b1fd06d4ad37d624ac047dfc9341 Mon Sep 17 00:00:00 2001 From: Sindre Breda Date: Tue, 23 Sep 2025 15:57:28 +0200 Subject: [PATCH 21/21] fix: Resolve kql method signature conflict between SentinelMixin and KustoMixin --- graphistry/plugins/kusto.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/graphistry/plugins/kusto.py b/graphistry/plugins/kusto.py index 69e2bbe80d..9593988e62 100644 --- a/graphistry/plugins/kusto.py +++ b/graphistry/plugins/kusto.py @@ -1,6 +1,7 @@ import time import pandas as pd -from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal +from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal, Tuple +from datetime import datetime, timedelta if TYPE_CHECKING: from azure.kusto.data import KustoClient @@ -176,9 +177,11 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: Literal[True] = True - ) -> List[pd.DataFrame]: + single_table: Literal[True] = True, + include_statistics: bool = False + ) -> pd.DataFrame: ... @overload @@ -186,9 +189,11 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: Literal[False] - ) -> pd.DataFrame: + single_table: Literal[False], + include_statistics: bool = False + ) -> List[pd.DataFrame]: ... @overload @@ -196,8 +201,10 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: bool = True + single_table: bool = True, + include_statistics: bool = False ) -> Union[pd.DataFrame, List[pd.DataFrame]]: ... @@ -205,8 +212,10 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: bool = True + single_table: bool = True, + include_statistics: bool = False ) -> Union[pd.DataFrame, List[pd.DataFrame]]: """Execute KQL query and return result tables as DataFrames. @@ -217,10 +226,14 @@ def kql( :param query: KQL query string to execute :type query: str + :param timespan: Time range for the query (ignored by Kusto, for compatibility with Sentinel) + :type timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] :param unwrap_nested: Strategy for handling nested/dynamic columns :type unwrap_nested: Optional[bool] :param single_table: If True, return single DataFrame (first table if multiple); if False, return list :type single_table: bool + :param include_statistics: Include query statistics (ignored by Kusto, for compatibility with Sentinel) + :type include_statistics: bool :returns: Single DataFrame if single_table=True, else list of DataFrames :rtype: Union[pd.DataFrame, List[pd.DataFrame]]