In [None]:
!pip install mercury --quiet
## ignore errors

In [None]:
import mercury as mr

## Classes for consuming Knowledge Engine API

In [None]:
"""
  ------------------------------------------
  Classes for consuming Knowledge Engine API
  ------------------------------------------
"""

from datetime import datetime
from typing import List, Optional, Any, Dict, ClassVar
from uuid import UUID

from pydantic import BaseModel, Field
from enum import Enum

"""
  Generic Models for KE Scans API
"""
class Data(BaseModel):
    """Represents the data source for the scan."""
    RESOURCE_TYPE_TABLE: ClassVar[str] = "table"
    RESOURCE_TYPE_DATASET: ClassVar[str] = "dataset"

    resource: str

    @property
    def is_for_table(self) -> bool:
        return self.resource.split('/')[-2][:-1] == self.RESOURCE_TYPE_TABLE

    @property
    def is_for_dataset(self) -> bool:
        return self.resource.split('/')[-2][:-1] == self.RESOURCE_TYPE_DATASET

    @property
    def resource_short_name(self) -> str:
        return self.resource.split('/')[-1]


class OnDemand(BaseModel):
    """Represents an on-demand trigger configuration. Empty in the provided data."""
    pass


class Trigger(BaseModel):
    """Represents the trigger mechanism for a scan."""
    on_demand: OnDemand = Field(..., alias='onDemand')


class ExecutionSpec(BaseModel):
    """Represents the execution specification for a scan."""
    trigger: Trigger


class ExecutionStatus(BaseModel):
    """Represents the execution status of the latest job for a scan."""
    # This field is optional as it's not present in all scan types (e.g., KNOWLEDGE_ENGINE).
    latest_job_start_time: Optional[datetime] = Field(None, alias='latestJobStartTime')
    latest_job_end_time: datetime = Field(..., alias='latestJobEndTime')
    latest_job_create_time: datetime = Field(..., alias='latestJobCreateTime')


class ScanTypeValue(Enum):
    KNOWLEDGE_ENGINE = "KNOWLEDGE_ENGINE"
    DATA_DOCUMENTATION = "DATA_DOCUMENTATION"
    DATA_PROFILE = "DATA_PROFILE"


class DataScan(BaseModel):
    """Represents a single data scan item."""
    name: str
    uid: UUID
    description: str
    display_name: str = Field(..., alias='displayName')
    state: str
    create_time: datetime = Field(..., alias='createTime')
    update_time: datetime = Field(..., alias='updateTime')
    data: Data
    execution_spec: ExecutionSpec = Field(..., alias='executionSpec')
    execution_status: ExecutionStatus = Field(..., alias='executionStatus')
    type: ScanTypeValue

    @property
    def is_for_table(self) -> bool:
        return self.data.is_for_table

    @property
    def is_for_dataset(self) -> bool:
        return self.data.is_for_dataset

    @property
    def resource_short_name(self) -> str:
        return self.data.resource_short_name

    @property
    def resource_name(self) -> str:
        return self.data.resource


class DataScansResponse(BaseModel):
    """The root model for the entire JSON API response."""
    data_scans: List[DataScan] = Field(..., alias='dataScans')


"""
  type KNOWLEDGE_ENGINE models
"""
class KESpec(BaseModel):
    """Represents knowledgeEngineSpec."""
    pass


class ColumnTuple(BaseModel):
    """Represents a fully qualified column used in a join relationship."""
    entry_fqn: str = Field(..., alias='entryFqn', description="Fully qualified name of the BigQuery table.")
    field_path: str = Field(..., alias='fieldPath', description="The name of the column.")


class SchemaRelationship(BaseModel):
    """Defines a join relationship between two sets of columns."""
    left_columns_tuple: List[ColumnTuple] = Field(..., alias='leftColumnsTuple')
    right_columns_tuple: List[ColumnTuple] = Field(..., alias='rightColumnsTuple')
    type: str = Field(..., description="The type of relationship, e.g., 'JOIN'.")


class BusinessTerm(BaseModel):
    """A single term and its definition from the business glossary."""
    title: str
    description: str


class BusinessGlossary(BaseModel):
    """Contains a list of business terms relevant to the dataset."""
    terms: List[BusinessTerm]


class DatasetResult(BaseModel):
    """Contains the description, schema relationships, and glossary for a dataset."""
    description: str
    schema_relationship: List[SchemaRelationship] = Field(..., alias='schemaRelationship')
    business_glossary: BusinessGlossary = Field(..., alias='businessGlossary')


class KEResult(BaseModel):
    """The main result object from a KNOWLEDGE_ENGINE data scan."""
    dataset_result: DatasetResult = Field(..., alias='datasetResult')


class KEScan(DataScan):
    """Represents a KNOWLEDGE_ENGINE data scan."""
    knowledge_engine_spec: Optional[KESpec] = Field(None, alias='knowledgeEngineSpec')
    knowledge_engine_result: KEResult = Field(..., alias='knowledgeEngineResult')

    @property
    def dataset_description(self) -> str:
        return self.knowledge_engine_result.dataset_result.description # shortcut

    @property
    def business_glossary(self) -> BusinessGlossary:
        return self.knowledge_engine_result.dataset_result.business_glossary

    @property
    def schema_relationships(self) -> SchemaRelationship:
        return self.knowledge_engine_result.dataset_result.schema_relationship

"""
  type DATA_DOCUMENTATION generic models
"""

class DDSpec(BaseModel):
    """Represents dataDocumentationSpec."""
    pass


class Query(BaseModel):
    """Represents a single SQL query with its description."""
    sql: str
    description: str



"""
  type DATA_DOCUMENTATION table models
"""
class SchemaField(BaseModel):
    """Represents a single field (column) in a table schema."""
    name: str
    description: str


class Schema(BaseModel):
    """Represents the schema of a table, containing a list of fields."""
    fields: List[SchemaField]


class TableResult(BaseModel):
    """Contains the detailed documentation results for a specific table."""
    overview: str
    the_schema: Schema = Field(alias="schema") # renamed to the_schema to preven collision
    queries: List[Query]
    query_theme: Optional[Dict[str, Any]] = Field(None, alias='queryTheme')


class DDTableResult(BaseModel):
    """The main result object from a DATA_DOCUMENTATION table scan."""
    queries: List[Query]
    overview: str
    the_schema: Schema = Field(alias="schema") # renamed to the_schema to preven collision
    table_result: TableResult = Field(..., alias='tableResult')


class DDTableScan(DataScan):
    """Represents a DATA_DOCUMENTATION data scan."""
    data_documentation_spec: Optional[DDSpec] = Field(None, alias='dataDocumentationSpec')
    data_documentation_result: DDTableResult = Field(..., alias='dataDocumentationResult')

    @property
    def full_table_name(self) -> str:
        parts = self.data.resource.split('/')
        return f"{parts[4]}.{parts[6]}.{parts[8]}"

    @property
    def description(self) -> str:
        return self.data_documentation_result.table_result.overview # shortcut

    @property
    def fields(self) -> List[SchemaField]:
        return self.data_documentation_result.table_result.the_schema.fields

    @property
    def queries(self) -> List[Query]:
        return self.data_documentation_result.table_result.queries

"""
  type DATA_DOCUMENTATION dataset models
"""
class DDDatasetResult(BaseModel):
    queries: List[Query]

class DDDataDocumentationResult(BaseModel):
    """The main result object from a DATA_DOCUMENTATION dataset scan."""
    queries: List[Query]
    dataset_result: DDDatasetResult = Field(..., alias='datasetResult')


class DDDatasetScan(DataScan):
    """Represents a DATA_DOCUMENTATION dataset scan."""
    data_documentation_spec: Optional[DDSpec] = Field(None, alias='dataDocumentationSpec')
    data_documentation_result: DDDataDocumentationResult = Field(..., alias='dataDocumentationResult')

    @property
    def queries(self) -> List[Query]:
        return self.data_documentation_result.dataset_result.queries

## KEDataScanHelper

### Classes for output from KEDatasetScanHelper

In [None]:
"""
  ------------------------------------------
  Classes for output from KEDatasetScanHelper
  ------------------------------------------
"""
import json

class KEDatasetTable(BaseModel):
    """
    Represents a single table.
    """
    name: str
    description: str
    fields: List[SchemaField] = Field(..., description="A list of fields in the table.")
    queries: List[Query] = Field(..., description="A list of queries that can be run against the table.")

    @property
    def fields_json(self) -> str:
        full_model = self.model_dump()
        return json.dumps(full_model['fields'])

    @property
    def queries_json(self) -> str:
        full_model = self.model_dump()
        return json.dumps(full_model['queries'])

    @property
    def text_field_descriptions(self) -> str:
        field_descriptions = '```\n'
        for field in self.fields:
            field_descriptions += f"`{field.name}` -- Definition: {field.description}\n"

        field_descriptions += '```'

        return field_descriptions


class KEDatasetRelationship(BaseModel):
    """
    Represents a single relationship between two database tables.
    """
    table1: str = Field(..., description="The name of the first table in the relationship.")
    table2: str = Field(..., description="The name of the second table in the relationship.")
    relationship: str = Field(..., description="The join condition that defines the relationship.")
    source: str = Field(..., description="The source that inferred or defined this relationship.")

class KEDatasetDetails(BaseModel):
    """
    Represents the detailed documentation results for a specific dataset.
    """
    project_id: str = Field(..., description="Project ID of the dataset.")
    dataset_name: str = Field(..., description="Name of the dataset")
    dataset_location: str = Field(..., description="Location of the dataset.")
    dataset_description: str = Field(..., description="A brief overview of the dataset.")
    dataset_relationships: List[KEDatasetRelationship] = Field(..., description="A list of table relationships.")
    dataset_queries: List[Query] = Field(..., description="A list of queries that can be run against the dataset.")
    dataset_business_glossary: List[BusinessTerm] = Field(..., description="A list of business glossary terms.")
    dataset_tables: List[KEDatasetTable] = Field(..., description="A list of tables in the dataset.")

    @property
    def dataset_relationships_json(self) -> str:
        return self.dataset_relationships.model_dump_json()

    @property
    def dataset_queries_json(self) -> str:
        full_model = self.model_dump()
        return json.dumps(full_model['dataset_queries'])

    @property
    def dataset_glossary_terms_json(self) -> str:
        full_model = self.model_dump()
        return json.dumps(full_model['dataset_business_glossary'])

### Helper Authentication

In [None]:
import requests, re

from google.cloud import bigquery
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
import google.auth

class KEAuth:

    def __init__(self):
        self.__credentials = None
        self.__project = None

    def __refresh_credentials(self):
        if not self.__credentials:
            self.__credentials, __project = google.auth.default()

        if not self.__credentials.valid:
            self.__credentials.refresh(Request())

        return self

    def __get_headers(self):
        self.__refresh_credentials()
        return {
          "Authorization": f"Bearer {self.__credentials.token}",
          "Content-Type": "application/json"
        }

    def get_url_content(self, url: str):
        try:
            response = requests.get(url,headers=self.__get_headers())
            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)
            return response.text
        except requests.exceptions.RequestException as e:
            print(f"Error fetching URL {url}: {e}")
            raise e

### KEDatasetScanHelper

In [None]:

"""
  ------------------------------------------
  KEDatasetScanHelper
  ------------------------------------------
"""

class KEDatasetScanHelper(KEAuth):
    """A helper for interacting with the Knowledge Engine API."""
    DATAPLEX_BASE_URL = "https://dataplex.googleapis.com/v1"
    DATAPLEX_LIST_SCANS_URL = DATAPLEX_BASE_URL + "/projects/{project_id}/locations/{location}/dataScans"
    TABLE_RESOURCE_TEMPLATE = "//bigquery.googleapis.com/projects/{project_id/datasets/{dataset_id}/tables/{table_name}"

    def __init__(self, project_id: str, dataset_name: str):
        super().__init__()
        self.dataset_name = dataset_name
        self.project_id = project_id
        self.__dataset_location = None
        self.__tables = []
        self.__data_scans = []
        self.__allowlist_tables = {}
        self.__blocklist_tables = {}

    def _flush(self):
        self.__tables.clear()
        self.__data_scans.clear()
        self.__allowlist_tables.clear()
        self.__blocklist_tables.clear()

    def _table_is_allowed(self, table: str) -> bool:
        return self._is_in_allowlist(table) and not self._is_in_blocklist(table)

    def _is_in_allowlist(self, table: str) -> bool:
        if not self.__allowlist_tables:
            return True

        return table in self.__allowlist_tables

    def _is_in_blocklist(self, table: str) -> bool:
        if not self.__blocklist_tables:
            return False

        return table in self.__blocklist_tables

    def _get_scans_of_interest(self) -> list:
        scan_url = self.DATAPLEX_LIST_SCANS_URL.format(
            base_url=self.DATAPLEX_BASE_URL,
            project_id=self.project_id,
            location=self.dataset_location
        )

        try:
            response = self.get_url_content(scan_url)
        except Exception as e:
            print(f"Error fetching data scans: {e}")

        try:
            scans = json.loads(response)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON response: {e}")

        # Limit the scans to items in the requested dataset (per constructor)
        ds_test_string = f"/datasets/{self.dataset_name}"
        table_test_string = f"{ds_test_string}/tables/"

        scans_of_interest = []
        for scan in scans.get('dataScans', []):
            if scan.get('data') and scan.get('data').get('resource'):
                resource = scan.get('data').get('resource')

                if resource.endswith(ds_test_string) or table_test_string in resource:
                    new_scan = DataScan(**scan)

                    if new_scan.is_for_table:
                        if self._table_is_allowed(new_scan.resource_short_name):
                            scans_of_interest.append(new_scan)

                    if new_scan.is_for_dataset:
                        scans_of_interest.append(new_scan)

        return scans_of_interest

    def set_table_list_constraints(self, allowlist: list = [], blocklist: list = []):
        overlap = list(set(allowlist).intersection(set(blocklist)))
        if overlap:
            raise ValueError(f"Allowlist and blocklist cannot contain the same items: {overlap}")

        def table_name_to_resource(table_name: str) -> str:
            return self.TABLE_RESOURCE_TEMPLATE.format(
                project_id=self.project_id,
                dataset_name=self.dataset_name,
                table_name=table_name
            )

        self._flush()
        self.__allowlist_tables.update(map(table_name_to_resource, allowlist))
        self.__blocklist_tables.update(map(table_name_to_resource, blocklist))

        return self

    @property
    def dataset_location(self) -> str:
        if not self.__dataset_location:
            client = bigquery.Client()
            dataset = client.get_dataset(f'{self.project_id}.{self.dataset_name}')
            self.__dataset_location = dataset.location

        return self.__dataset_location

    @property
    def dataplex_scans(self) -> list:
        if not self.__data_scans:
            scans = self._get_scans_of_interest()

            for scan in scans:
                full_scan_url = f"{self.DATAPLEX_BASE_URL}/{scan.name}?view=FULL"

                try:
                    response = self.get_url_content(full_scan_url)
                except Exception as e:
                    print(f"Error fetching data scans: {e}")

                try:
                    full_view_scan = json.loads(response)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON response: {e}")

                new_scan = None

                if scan.type == ScanTypeValue.KNOWLEDGE_ENGINE:
                    new_scan = KEScan(**full_view_scan)

                if scan.type == ScanTypeValue.DATA_DOCUMENTATION:
                    if scan.is_for_table:
                        new_scan = DDTableScan(**full_view_scan)

                    if scan.is_for_dataset:
                        new_scan = DDDatasetScan(**full_view_scan)

                if new_scan:
                  self.__data_scans.append(new_scan)

        return self.__data_scans

    @property # dataset knowledge engine scan
    def dataset_ke_scan(self) -> KEScan:
        for scan in self.dataplex_scans:
            if isinstance(scan, KEScan):
                return scan

    @property # dataset data documentation scan
    def dataset_dd_scan(self) -> DDDatasetScan:
        for scan in self.dataplex_scans:
            if isinstance(scan, DDDatasetScan):
                return scan

    @property
    def dataset_description(self) -> str:
        return self.dataset_ke_scan.dataset_description

    @property
    def dataset_tables(self) -> List[KEDatasetTable]:
        tables = []
        for scan in self.dataplex_scans:
            if isinstance(scan, DDTableScan):
                if self._table_is_allowed(scan.resource_name):
                    tables.append(KEDatasetTable(**{
                        "name": scan.full_table_name,
                        "description": scan.description,
                        "fields": scan.fields,
                        "queries": scan.queries
                    }))

        return tables

    @property
    def dataset_queries(self) -> List[Query]:
        return self.dataset_dd_scan.queries

    @property
    def dataset_business_glossary(self) -> List[BusinessTerm]:
        return self.dataset_ke_scan.business_glossary.terms

    @property
    def dataset_relationships(self) -> List[KEDatasetRelationship]:
        """
          This will require update when the relation representation becomes more complex.
          Currently should handle multple anded = conditions between left and right side.
        """
        project_dataset = self.project_id + '.' + self.dataset_name

        return_relationships = []

        relationships = self.dataset_ke_scan.schema_relationships
        for relationship in relationships:

          left_tuples = relationship.left_columns_tuple
          table1_fqn = left_tuples[0].entry_fqn
          table1_sql_name = f"{project_dataset}.{table1_fqn.split('.')[-1]}"
          if not self._table_is_allowed(table1_fqn):
              continue

          right_tuples = relationship.right_columns_tuple
          table2_fqn = right_tuples[0].entry_fqn
          table2_sql_name = f"{project_dataset}.{table2_fqn.split('.')[-1]}"
          if not self._table_is_allowed(table2_fqn):
              continue

          join_conditions = []

          for i, left_item in enumerate(left_tuples):
              right_item = right_tuples[i]
              new_join_condition = table1_sql_name + '.' + left_item.field_path
              new_join_condition += ' = '
              new_join_condition += table2_sql_name + '.' + right_item.field_path
              join_conditions.append(new_join_condition)

          return_relationships.append(KEDatasetRelationship(**{
              'table1': table1_sql_name,
              'table2': table2_sql_name,
              'relationship': ' AND '.join(join_conditions),
              'source': 'LLM-inferred'
          }))

        return return_relationships

    @property
    def dataset_all_details(self) -> KEDatasetDetails:
        return KEDatasetDetails(**{
            "project_id": self.project_id,
            "dataset_name": self.dataset_name,
            "dataset_location": self.dataset_location,
            "dataset_description": self.dataset_description,
            "dataset_relationships": self.dataset_relationships,
            "dataset_queries": self.dataset_queries,
            "dataset_business_glossary": self.dataset_business_glossary,
            "dataset_tables": self.dataset_tables
        })

## Testing

In [None]:
ke_helper = KEDatasetScanHelper('ai-learning-agents', 'thelook')
ds_details = ke_helper.dataset_all_details

app = mr.App(title="Display notebook", static_notebook=True)
mr.JSON(ds_details.model_dump_json())


# Stuff

In [None]:
# KE Dataset
# {
#   "name": "projects/ai-learning-agents/locations/us-central1/dataScans/aee90f95a-b987-4310-b525-3b3a93e03158",
#   "uid": "2ffffe29-9610-44ae-b29f-cae0f7537ae9",
#   "description": "Insights scan for the dataset - \"thelook\" with default configuration created through generate insights button",
#   "displayName": "thelook-knowledge-engine-scan",
#   "state": "ACTIVE",
#   "createTime": "2025-06-06T16:52:44.272215688Z",
#   "updateTime": "2025-06-06T16:52:49.162830306Z",
#   "data": {
#     "resource": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook"
#   },
#   "executionSpec": {
#     "trigger": {
#       "onDemand": {}
#     }
#   },
#   "executionStatus": {
#     "latestJobEndTime": "2025-06-17T14:33:00.788526252Z",
#     "latestJobCreateTime": "2025-06-17T14:31:44.596020646Z"
#   },
#   "type": "KNOWLEDGE_ENGINE",
#   "knowledgeEngineSpec": {},
#   "knowledgeEngineResult": {
#     "datasetResult": {
#       "description": "The dataset contains comprehensive information about e-commerce operations, including customer orders, product details, user information, and website activity. It provides a detailed view of the supply chain through distribution center and inventory data. User interactions and events on the platform are tracked, offering insights into user behavior. The dataset facilitates analysis of order fulfillment, product sales performance, and user segmentation. Businesses can leverage this data to optimize marketing efforts, improve operational efficiency, and gain a deeper understanding of their customer base.",
#       "schemaRelationship": [
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/order_items",
#               "fieldPath": "order_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/orders",
#               "fieldPath": "order_id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/inventory_items",
#               "fieldPath": "product_distribution_center_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/distribution_centers",
#               "fieldPath": "id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/inventory_items",
#               "fieldPath": "id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/order_items",
#               "fieldPath": "inventory_item_id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/events",
#               "fieldPath": "user_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/users",
#               "fieldPath": "id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/order_items",
#               "fieldPath": "product_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/products",
#               "fieldPath": "id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/order_items",
#               "fieldPath": "user_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/users",
#               "fieldPath": "id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/inventory_items",
#               "fieldPath": "product_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/products",
#               "fieldPath": "id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/orders",
#               "fieldPath": "user_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/users",
#               "fieldPath": "id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/products",
#               "fieldPath": "distribution_center_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/distribution_centers",
#               "fieldPath": "id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/inventory_items",
#               "fieldPath": "product_id"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/order_items",
#               "fieldPath": "product_id"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/products",
#               "fieldPath": "brand"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/inventory_items",
#               "fieldPath": "product_brand"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/products",
#               "fieldPath": "category"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/inventory_items",
#               "fieldPath": "product_category"
#             }
#           ],
#           "type": "JOIN"
#         },
#         {
#           "leftColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/products",
#               "fieldPath": "department"
#             }
#           ],
#           "rightColumnsTuple": [
#             {
#               "entryFqn": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/inventory_items",
#               "fieldPath": "product_department"
#             }
#           ],
#           "type": "JOIN"
#         }
#       ],
#       "businessGlossary": {
#         "terms": [
#           {
#             "title": "Age",
#             "description": "The number of years a user has lived. For example, a user who is 30 years old would have an age of 30."
#           },
#           {
#             "title": "Average Order Value (AOV)",
#             "description": "[METRIC] The average amount spent per order. It is calculated by dividing the total revenue by the number of orders. For example, if total revenue is $10,000 and there are 100 orders, the AOV is $100."
#           },
#           {
#             "title": "Brand",
#             "description": "The name of the company that produces a product. For example, Nike is a brand of athletic apparel."
#           },
#           {
#             "title": "Browser",
#             "description": "The software application used to access the internet. Examples include Chrome, Firefox, and Safari."
#           },
#           {
#             "title": "Category",
#             "description": "A group of similar products. For example, \"Electronics\" is a product category."
#           },
#           {
#             "title": "City",
#             "description": "The city in which a user or event is located. For example, \"New York City\" is a city."
#           },
#           {
#             "title": "Cost",
#             "description": "The amount of money spent to acquire or produce an item. For example, the cost of manufacturing a t-shirt."
#           },
#           {
#             "title": "Country",
#             "description": "The country in which a user or event is located. For example, \"United States\" is a country."
#           },
#           {
#             "title": "Customer Lifetime Value (CLTV)",
#             "description": "[METRIC] A prediction of the net profit attributed to the entire future relationship with a customer."
#           },
#           {
#             "title": "Department",
#             "description": "A section within a store or organization that sells a specific type of product. For example, \"Clothing\" is a department."
#           },
#           {
#             "title": "Distribution Center",
#             "description": "A warehouse or facility where products are stored before being shipped to customers or stores."
#           },
#           {
#             "title": "Email",
#             "description": "A user's electronic mail address. For example, john.doe@example.com."
#           },
#           {
#             "title": "Event",
#             "description": "An action taken by a user on a website or application. Examples include page views, button clicks, and form submissions."
#           },
#           {
#             "title": "Event Type",
#             "description": "The specific type of event that occurred. Examples include \"page_view\", \"add_to_cart\", and \"purchase\"."
#           },
#           {
#             "title": "First Name",
#             "description": "A user's given name. For example, \"John\" is a first name."
#           },
#           {
#             "title": "Gender",
#             "description": "A user's self-identified gender. Examples include Male, Female, and Other."
#           },
#           {
#             "title": "Inventory Item",
#             "description": "A single unit of a product in the inventory."
#           },
#           {
#             "title": "Last Name",
#             "description": "A user's family name. For example, \"Doe\" is a last name."
#           },
#           {
#             "title": "Location",
#             "description": "Geographic coordinates (latitude and longitude) or a geographical region (city, state, country) associated with a user, event, or distribution center."
#           },
#           {
#             "title": "Margin",
#             "description": "[METRIC] The difference between revenue and cost, expressed as a percentage. It indicates the profitability of a product or service."
#           },
#           {
#             "title": "Order",
#             "description": "A customer's request to purchase one or more items."
#           },
#           {
#             "title": "Order Fulfillment",
#             "description": "The process of receiving, processing, and delivering orders to customers."
#           },
#           {
#             "title": "Order Item",
#             "description": "A single item within an order."
#           },
#           {
#             "title": "Order Status",
#             "description": "The current state of an order. Examples include \"created\", \"shipped\", \"delivered\", and \"returned\"."
#           },
#           {
#             "title": "Postal Code",
#             "description": "A code used to identify a specific geographic area for mail delivery. For example, \"90210\" is a postal code."
#           },
#           {
#             "title": "Product",
#             "description": "An item offered for sale."
#           },
#           {
#             "title": "Product ID",
#             "description": "A unique identifier for a product."
#           },
#           {
#             "title": "Product Retail Price",
#             "description": "The price at which a product is sold to customers."
#           },
#           {
#             "title": "Product SKU",
#             "description": "A unique identifier for a specific product, often used for inventory management."
#           },
#           {
#             "title": "Purchase Frequency",
#             "description": "[METRIC] The average number of purchases a customer makes within a given period."
#           },
#           {
#             "title": "Return Rate",
#             "description": "[METRIC] The percentage of orders or items that are returned by customers."
#           },
#           {
#             "title": "Revenue",
#             "description": "The total amount of money earned from sales."
#           },
#           {
#             "title": "Session",
#             "description": "A period of continuous activity by a user on a website or application."
#           },
#           {
#             "title": "State",
#             "description": "The state in which a user or event is located. For example, \"California\" is a state."
#           },
#           {
#             "title": "Traffic Source",
#             "description": "The origin of traffic to a website or application. Examples include \"search engine\", \"social media\", and \"referral\"."
#           },
#           {
#             "title": "User",
#             "description": "An individual who interacts with a website, application, or service."
#           },
#           {
#             "title": "User ID",
#             "description": "A unique identifier for a user."
#           },
#           {
#             "title": "Website Visitors",
#             "description": "Individuals who visit a website."
#           },
#           {
#             "title": "Customer",
#             "description": "An individual or entity that purchases goods or services."
#           },
#           {
#             "title": "Prospect",
#             "description": "A potential customer who has shown interest in a product or service."
#           },
#           {
#             "title": "Lead",
#             "description": "A potential customer who has provided contact information and expressed interest in a product or service."
#           },
#           {
#             "title": "Conversion Rate",
#             "description": "The percentage of website visitors who complete a desired action, such as making a purchase or filling out a form."
#           },
#           {
#             "title": "Cart Abandonment Rate",
#             "description": "The percentage of users who add items to their cart but do not complete the purchase."
#           },
#           {
#             "title": "Shipping Cost",
#             "description": "The expense incurred to transport goods from a distribution center to a customer's location."
#           },
#           {
#             "title": "Payment Gateway",
#             "description": "A service that processes online payments for businesses."
#           },
#           {
#             "title": "Fraud Score",
#             "description": "A numerical value indicating the likelihood of a transaction being fraudulent."
#           },
#           {
#             "title": "Promotion Code",
#             "description": "A code that customers can use to receive a discount on their purchase."
#           },
#           {
#             "title": "Customer Segment",
#             "description": "A group of customers with similar characteristics, such as demographics, purchase history, or behavior."
#           },
#           {
#             "title": "Marketing Campaign",
#             "description": "A coordinated set of activities designed to promote a product or service."
#           },
#           {
#             "title": "Supply Chain",
#             "description": "The network of organizations and activities involved in producing and delivering a product or service."
#           },
#           {
#             "title": "Inventory Turnover",
#             "description": "The rate at which inventory is sold and replaced over a given period."
#           },
#           {
#             "title": "Stockout",
#             "description": "A situation where a product is out of stock and unavailable for sale."
#           },
#           {
#             "title": "Supplier",
#             "description": "A company that provides goods or services to another company."
#           },
#           {
#             "title": "Warehouse Management System (WMS)",
#             "description": "A software system that manages and controls the movement and storage of materials within a warehouse."
#           },
#           {
#             "title": "Enterprise Resource Planning (ERP)",
#             "description": "A software system that integrates all aspects of a business, including planning, manufacturing, sales, and finance."
#           },
#           {
#             "title": "Customer Relationship Management (CRM)",
#             "description": "A software system that manages interactions with current and potential customers."
#           },
#           {
#             "title": "Data Lake",
#             "description": "A centralized repository for storing structured and unstructured data."
#           },
#           {
#             "title": "Data Warehouse",
#             "description": "A centralized repository for storing structured data for reporting and analysis."
#           },
#           {
#             "title": "Business Intelligence (BI)",
#             "description": "The process of analyzing data to gain insights and make better business decisions."
#           },
#           {
#             "title": "Analytics",
#             "description": "The process of examining data to draw conclusions and make predictions."
#           },
#           {
#             "title": "Dashboard",
#             "description": "A visual display of key performance indicators (KPIs) and other important metrics."
#           },
#           {
#             "title": "Report",
#             "description": "A document that presents data in a structured and organized format."
#           },
#           {
#             "title": "Key Performance Indicator (KPI)",
#             "description": "A measurable value that demonstrates how effectively a company is achieving key business objectives."
#           },
#           {
#             "title": "Machine Learning (ML)",
#             "description": "A type of artificial intelligence that allows computers to learn from data without being explicitly programmed."
#           },
#           {
#             "title": "Artificial Intelligence (AI)",
#             "description": "The ability of a computer or machine to mimic human intelligence."
#           },
#           {
#             "title": "Cloud Computing",
#             "description": "The delivery of computing services—including servers, storage, databases, networking, software, analytics, and intelligence—over the Internet (“the cloud”) to offer faster innovation, flexible resources, and economies of scale."
#           },
#           {
#             "title": "API (Application Programming Interface)",
#             "description": "A set of rules and specifications that software programs can follow to communicate with each other. It serves as an interface between different software systems, allowing them to exchange data and functionality."
#           },
#           {
#             "title": "ETL (Extract, Transform, Load)",
#             "description": "A data warehousing process that involves extracting data from various sources, transforming it into a consistent format, and loading it into a data warehouse."
#           },
#           {
#             "title": "Data Governance",
#             "description": "The overall management of the availability, usability, integrity, and security of data used in an enterprise. It includes establishing policies, standards, and procedures to ensure data quality and compliance."
#           },
#           {
#             "title": "Metadata",
#             "description": "Data about data. It provides information about the characteristics, origin, and usage of data."
#           },
#           {
#             "title": "Data Quality",
#             "description": "The degree to which data is accurate, complete, consistent, timely, and valid."
#           },
#           {
#             "title": "Data Security",
#             "description": "The protection of data from unauthorized access, use, disclosure, disruption, modification, or destruction."
#           },
#           {
#             "title": "Data Privacy",
#             "description": "The right of individuals to control how their personal data is collected, used, and shared."
#           },
#           {
#             "title": "Compliance",
#             "description": "Adherence to laws, regulations, and industry standards."
#           },
#           {
#             "title": "Risk Management",
#             "description": "The process of identifying, assessing, and controlling risks."
#           },
#           {
#             "title": "Audit Trail",
#             "description": "A chronological record of system activities that can be used to track changes to data and identify security breaches."
#           },
#           {
#             "title": "Data Retention",
#             "description": "The policies and procedures for storing and deleting data."
#           },
#           {
#             "title": "Data Archiving",
#             "description": "The process of moving data to a long-term storage location for preservation."
#           },
#           {
#             "title": "Data Recovery",
#             "description": "The process of restoring data after a loss or corruption event."
#           },
#           {
#             "title": "Disaster Recovery",
#             "description": "The process of recovering IT systems and data after a disaster."
#           },
#           {
#             "title": "Business Continuity",
#             "description": "The ability of an organization to maintain essential business functions during and after a disruption."
#           },
#           {
#             "title": "Incident Response",
#             "description": "The process of handling security incidents and data breaches."
#           },
#           {
#             "title": "Security Awareness Training",
#             "description": "Training employees on security best practices and how to identify and respond to security threats."
#           },
#           {
#             "title": "Penetration Testing",
#             "description": "A simulated attack on a system to identify vulnerabilities."
#           },
#           {
#             "title": "Vulnerability Assessment",
#             "description": "The process of identifying and assessing vulnerabilities in a system."
#           },
#           {
#             "title": "Security Patch",
#             "description": "A software update that fixes a security vulnerability."
#           },
#           {
#             "title": "Firewall",
#             "description": "A network security system that monitors and controls incoming and outgoing network traffic based on predetermined security rules."
#           },
#           {
#             "title": "Intrusion Detection System (IDS)",
#             "description": "A system that monitors network traffic for suspicious activity and alerts administrators."
#           },
#           {
#             "title": "Intrusion Prevention System (IPS)",
#             "description": "A system that automatically blocks or prevents malicious network traffic."
#           },
#           {
#             "title": "Encryption",
#             "description": "The process of converting data into an unreadable format to protect it from unauthorized access."
#           },
#           {
#             "title": "Access Control",
#             "description": "The process of restricting access to data and resources to authorized users."
#           },
#           {
#             "title": "Authentication",
#             "description": "The process of verifying the identity of a user or device."
#           },
#           {
#             "title": "Authorization",
#             "description": "The process of granting access to specific resources based on a user's identity and permissions."
#           },
#           {
#             "title": "Multi-Factor Authentication (MFA)",
#             "description": "A security system that requires multiple forms of authentication to verify a user's identity."
#           },
#           {
#             "title": "Single Sign-On (SSO)",
#             "description": "A system that allows users to access multiple applications with a single set of credentials."
#           },
#           {
#             "title": "Password Policy",
#             "description": "A set of rules that govern the creation and use of passwords."
#           },
#           {
#             "title": "Data Loss Prevention (DLP)",
#             "description": "A system that prevents sensitive data from leaving the organization's control."
#           },
#           {
#             "title": "Data Masking",
#             "description": "The process of obscuring sensitive data to protect it from unauthorized access."
#           },
#           {
#             "title": "Data Anonymization",
#             "description": "The process of removing personally identifiable information (PII) from data."
#           },
#           {
#             "title": "Data Aggregation",
#             "description": "The process of combining data from multiple sources into a single dataset."
#           },
#           {
#             "title": "Data Mining",
#             "description": "The process of discovering patterns and insights from large datasets."
#           },
#           {
#             "title": "Predictive Analytics",
#             "description": "The process of using data to predict future outcomes."
#           },
#           {
#             "title": "Prescriptive Analytics",
#             "description": "The process of using data to recommend actions that will optimize outcomes."
#           },
#           {
#             "title": "Real-Time Analytics",
#             "description": "The process of analyzing data as it is being generated."
#           },
#           {
#             "title": "Big Data",
#             "description": "Extremely large datasets that are difficult to process using traditional methods."
#           },
#           {
#             "title": "Data Science",
#             "description": "An interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge and insights from data in various forms, both structured and unstructured."
#           },
#           {
#             "title": "Machine Learning Engineer",
#             "description": "A person who develops, tests, and deploys machine learning models."
#           },
#           {
#             "title": "Data Analyst",
#             "description": "A person who analyzes data to identify trends and insights."
#           },
#           {
#             "title": "Business Analyst",
#             "description": "A person who analyzes business processes and identifies opportunities for improvement."
#           },
#           {
#             "title": "Data Engineer",
#             "description": "A person who builds and maintains data pipelines and infrastructure."
#           },
#           {
#             "title": "Data Architect",
#             "description": "A person who designs and implements data management systems."
#           },
#           {
#             "title": "Chief Data Officer (CDO)",
#             "description": "The executive responsible for data governance and strategy within an organization."
#           },
#           {
#             "title": "Data Steward",
#             "description": "A person responsible for ensuring the quality and integrity of data."
#           },
#           {
#             "title": "Subject Matter Expert (SME)",
#             "description": "A person with deep knowledge of a particular topic or area."
#           },
#           {
#             "title": "Stakeholder",
#             "description": "A person or group that has an interest in a project or initiative."
#           },
#           {
#             "title": "Return on Investment (ROI)",
#             "description": "A performance measure used to evaluate the efficiency of an investment or compare the efficiency of a number of different investments."
#           },
#           {
#             "title": "Churn Rate",
#             "description": "The rate at which customers stop doing business with a company."
#           },
#           {
#             "title": "Net Promoter Score (NPS)",
#             "description": "A metric used to measure customer loyalty and satisfaction."
#           },
#           {
#             "title": "Customer Acquisition Cost (CAC)",
#             "description": "The cost of acquiring a new customer."
#           },
#           {
#             "title": "Conversion Funnel",
#             "description": "A visual representation of the steps a customer takes to complete a desired action, such as making a purchase."
#           },
#           {
#             "title": "A/B Testing",
#             "description": "A method of comparing two versions of a webpage or app to see which one performs better."
#           },
#           {
#             "title": "Multivariate Testing",
#             "description": "A method of testing multiple variables at the same time to see which combination performs best."
#           },
#           {
#             "title": "Segmentation",
#             "description": "The process of dividing a market into distinct groups of customers with similar needs and characteristics."
#           },
#           {
#             "title": "Targeting",
#             "description": "The process of selecting specific segments to focus marketing efforts on."
#           },
#           {
#             "title": "Positioning",
#             "description": "The process of creating a unique and desirable image for a product or service in the minds of customers."
#           },
#           {
#             "title": "Marketing Mix",
#             "description": "The set of controllable, tactical marketing tools that a company uses to produce the response it wants in the target market."
#           },
#           {
#             "title": "Search Engine Optimization (SEO)",
#             "description": "The process of optimizing a website to rank higher in search engine results pages (SERPs)."
#           },
#           {
#             "title": "Pay-Per-Click (PPC) Advertising",
#             "description": "A form of online advertising where advertisers pay a fee each time one of their ads is clicked."
#           },
#           {
#             "title": "Social Media Marketing",
#             "description": "The process of using social media platforms to promote a product or service."
#           },
#           {
#             "title": "Email Marketing",
#             "description": "The process of sending commercial messages to a group of people using email."
#           },
#           {
#             "title": "Content Marketing",
#             "description": "A marketing technique of creating and distributing valuable, relevant and consistent content to attract and acquire a clearly defined audience – with the objective of driving profitable customer action."
#           },
#           {
#             "title": "Influencer Marketing",
#             "description": "A type of marketing that involves collaborating with influencers to promote a product or service."
#           },
#           {
#             "title": "Affiliate Marketing",
#             "description": "A marketing arrangement by which an online retailer pays a commission to an external website for traffic or sales generated from its referrals."
#           },
#           {
#             "title": "Mobile Marketing",
#             "description": "The process of marketing to customers through mobile devices."
#           },
#           {
#             "title": "Video Marketing",
#             "description": "The process of using video to promote a product or service."
#           },
#           {
#             "title": "Event Marketing",
#             "description": "The process of promoting a product or service through live events."
#           },
#           {
#             "title": "Public Relations (PR)",
#             "description": "The practice of managing the spread of information between an individual or an organization and the public."
#           },
#           {
#             "title": "Brand Awareness",
#             "description": "The extent to which consumers are familiar with the qualities or image of a particular brand of goods or services."
#           },
#           {
#             "title": "Brand Loyalty",
#             "description": "The tendency of consumers to continuously purchase one brand's products rather than those of its competitors."
#           },
#           {
#             "title": "Customer Satisfaction",
#             "description": "A measure of how products and services supplied by a company meet or surpass customer expectation."
#           },
#           {
#             "title": "Customer Engagement",
#             "description": "The emotional connection between a customer and a brand."
#           },
#           {
#             "title": "Customer Advocacy",
#             "description": "The act of customers recommending a product or service to others."
#           },
#           {
#             "title": "Word-of-Mouth Marketing",
#             "description": "The process of generating positive buzz about a product or service through customer referrals."
#           },
#           {
#             "title": "Viral Marketing",
#             "description": "A marketing phenomenon that facilitates and encourages people to pass along a marketing message."
#           },
#           {
#             "title": "Guerrilla Marketing",
#             "description": "An advertising strategy that focuses on low-cost unconventional marketing tactics that yield maximum results."
#           },
#           {
#             "title": "Product Development",
#             "description": "The process of creating and launching new products."
#           },
#           {
#             "title": "Market Research",
#             "description": "The process of gathering information about a market to understand customer needs and preferences."
#           },
#           {
#             "title": "Competitive Analysis",
#             "description": "The process of identifying and evaluating competitors."
#           },
#           {
#             "title": "SWOT Analysis",
#             "description": "A strategic planning tool used to evaluate the strengths, weaknesses, opportunities, and threats involved in a project or business venture."
#           },
#           {
#             "title": "Business Model",
#             "description": "A plan for how a company will create, deliver, and capture value."
#           },
#           {
#             "title": "Value Proposition",
#             "description": "A statement that describes the benefits that customers will receive from a product or service."
#           },
#           {
#             "title": "Revenue Stream",
#             "description": "A source of revenue for a business."
#           },
#           {
#             "title": "Cost Structure",
#             "description": "The costs that a business incurs to operate."
#           },
#           {
#             "title": "Profit Margin",
#             "description": "The percentage of revenue that remains after deducting all costs."
#           },
#           {
#             "title": "Cash Flow",
#             "description": "The movement of money into and out of a business."
#           },
#           {
#             "title": "Balance Sheet",
#             "description": "A financial statement that summarizes a company's assets, liabilities, and equity at a specific point in time."
#           },
#           {
#             "title": "Income Statement",
#             "description": "A financial statement that reports a company's financial performance over a period of time."
#           },
#           {
#             "title": "Statement of Cash Flows",
#             "description": "A financial statement that reports the movement of cash into and out of a company over a period of time."
#           },
#           {
#             "title": "Financial Ratio",
#             "description": "A comparison of two financial statement items."
#           },
#           {
#             "title": "Working Capital",
#             "description": "The difference between a company's current assets and current liabilities."
#           },
#           {
#             "title": "Inventory Management",
#             "description": "The process of managing the flow of inventory into and out of a business."
#           },
#           {
#             "title": "Supply Chain Management",
#             "description": "The process of managing the flow of goods and services from suppliers to customers."
#           },
#           {
#             "title": "Logistics",
#             "description": "The process of planning, implementing, and controlling the efficient, effective flow and storage of goods, services, and related information from point of origin to point of consumption for the purpose of conforming to customer requirements."
#           },
#           {
#             "title": "Transportation",
#             "description": "The movement of goods and services from one location to another."
#           },
#           {
#             "title": "Warehousing",
#             "description": "The storage of goods in a warehouse."
#           },
#           {
#             "title": "Distribution",
#             "description": "The process of getting goods from the manufacturer to the customer."
#           },
#           {
#             "title": "Customer Service",
#             "description": "The assistance and advice provided by a company to those people who buy or use its products or services."
#           },
#           {
#             "title": "Technical Support",
#             "description": "The assistance provided to users of technology products or services."
#           },
#           {
#             "title": "Help Desk",
#             "description": "A resource intended to provide the customer or end user with information and support related to a company's or institution's products and services."
#           },
#           {
#             "title": "Knowledge Base",
#             "description": "A centralized repository of information about a product or service."
#           },
#           {
#             "title": "Frequently Asked Questions (FAQ)",
#             "description": "A list of common questions and answers about a product or service."
#           },
#           {
#             "title": "Troubleshooting",
#             "description": "The process of identifying and resolving problems."
#           },
#           {
#             "title": "Bug",
#             "description": "An error in a software program."
#           },
#           {
#             "title": "Patch",
#             "description": "A software update that fixes a bug."
#           },
#           {
#             "title": "Release",
#             "description": "A version of a software program that is made available to the public."
#           },
#           {
#             "title": "Deployment",
#             "description": "The process of installing and configuring a software program on a computer or server."
#           },
#           {
#             "title": "Maintenance",
#             "description": "The process of keeping a software program running smoothly."
#           },
#           {
#             "title": "Upgrade",
#             "description": "The process of installing a new version of a software program."
#           },
#           {
#             "title": "Migration",
#             "description": "The process of moving data or applications from one system to another."
#           },
#           {
#             "title": "Integration",
#             "description": "The process of connecting two or more systems together."
#           },
#           {
#             "title": "Customization",
#             "description": "The process of modifying a software program to meet specific needs."
#           },
#           {
#             "title": "Configuration",
#             "description": "The process of setting up a software program to work in a specific environment."
#           },
#           {
#             "title": "Security",
#             "description": "The protection of data and systems from unauthorized access, use, disclosure, disruption, modification, or destruction."
#           },
#           {
#             "title": "Privacy",
#             "description": "The right of individuals to control how their personal data is collected, used, and shared."
#           },
#           {
#             "title": "Compliance",
#             "description": "Adherence to laws, regulations, and industry standards."
#           },
#           {
#             "title": "Risk Management",
#             "description": "The process of identifying, assessing, and controlling risks."
#           },
#           {
#             "title": "Audit",
#             "description": "An examination of records or activities to verify their accuracy and compliance."
#           },
#           {
#             "title": "Governance",
#             "description": "The overall management of an organization."
#           },
#           {
#             "title": "Strategy",
#             "description": "A plan of action designed to achieve a long-term or overall aim."
#           },
#           {
#             "title": "Innovation",
#             "description": "The introduction of something new."
#           },
#           {
#             "title": "Efficiency",
#             "description": "The ability to accomplish something with the least waste of time and effort."
#           },
#           {
#             "title": "Effectiveness",
#             "description": "The degree to which something is successful in producing a desired result."
#           },
#           {
#             "title": "Quality",
#             "description": "The degree to which something is excellent or superior."
#           },
#           {
#             "title": "Value",
#             "description": "The worth, importance, or usefulness of something."
#           },
#           {
#             "title": "Sustainability",
#             "description": "The ability to be maintained at a certain rate or level."
#           },
#           {
#             "title": "Social Responsibility",
#             "description": "The idea that businesses should act in a way that benefits society."
#           },
#           {
#             "title": "Ethics",
#             "description": "Moral principles that govern a person's behavior or the conducting of an activity."
#           },
#           {
#             "title": "Integrity",
#             "description": "The quality of being honest and having strong moral principles."
#           },
#           {
#             "title": "Transparency",
#             "description": "The condition of being transparent."
#           },
#           {
#             "title": "Accountability",
#             "description": "The obligation or willingness to accept responsibility or to account for one's actions."
#           },
#           {
#             "title": "Collaboration",
#             "description": "The action of working with someone to produce or create something."
#           },
#           {
#             "title": "Communication",
#             "description": "The process of conveying information."
#           },
#           {
#             "title": "Leadership",
#             "description": "The action of leading a group of people or an organization."
#           },
#           {
#             "title": "Management",
#             "description": "The process of dealing with or controlling things or people."
#           },
#           {
#             "title": "Organization",
#             "description": "An organized group of people with a particular purpose, such as a business or government department."
#           },
#           {
#             "title": "Teamwork",
#             "description": "The combined action of a group of people, especially when effective and efficient."
#           },
#           {
#             "title": "Culture",
#             "description": "The ideas, customs, and social behaviour of a particular people or society."
#           },
#           {
#             "title": "Diversity",
#             "description": "The state of being diverse."
#           },
#           {
#             "title": "Inclusion",
#             "description": "The practice or policy of providing equal access to opportunities and resources for people who might otherwise be excluded or marginalized, such as those having physical or mental disabilities or belonging to other minority groups."
#           },
#           {
#             "title": "Equity",
#             "description": "The quality of being fair and impartial."
#           },
#           {
#             "title": "Belonging",
#             "description": "The feeling of being comfortable and accepted in a particular group or place."
#           },
#           {
#             "title": "Well-being",
#             "description": "The state of being comfortable, healthy, or happy."
#           },
#           {
#             "title": "Work-Life Balance",
#             "description": "The amount of time you spend doing your job compared with the amount of time you spend with your family and doing things you enjoy."
#           },
#           {
#             "title": "Employee Engagement",
#             "description": "The emotional commitment the employee has to the organization and its goals."
#           },
#           {
#             "title": "Employee Satisfaction",
#             "description": "The extent to which employees are happy with their jobs."
#           },
#           {
#             "title": "Employee Retention",
#             "description": "The ability of an organization to retain its employees."
#           },
#           {
#             "title": "Human Resources (HR)",
#             "description": "The department of a business or organization that deals with the hiring, administration, and training of personnel."
#           },
#           {
#             "title": "Talent Acquisition",
#             "description": "The process of finding and hiring qualified candidates for a job."
#           },
#           {
#             "title": "Training and Development",
#             "description": "The process of providing employees with the skills and knowledge they need to perform their jobs effectively."
#           },
#           {
#             "title": "Performance Management",
#             "description": "The process of setting goals, monitoring performance, and providing feedback to employees."
#           },
#           {
#             "title": "Compensation and Benefits",
#             "description": "The pay and other benefits that employees receive for their work."
#           },
#           {
#             "title": "Employee Relations",
#             "description": "The relationship between an employer and its employees."
#           },
#           {
#             "title": "Labor Law",
#             "description": "The body of laws, administrative rulings, and precedents which address the legal rights of, and restrictions on, employed people and organizations."
#           },
#           {
#             "title": "Workplace Safety",
#             "description": "The safety, health and welfare of people engaged in work or employment."
#           },
#           {
#             "title": "Data Breach",
#             "description": "A security incident in which sensitive, protected or confidential data is copied, transmitted, viewed, stolen or used by an individual unauthorized to do so."
#           },
#           {
#             "title": "Phishing",
#             "description": "The fraudulent attempt to obtain sensitive information such as usernames, passwords and credit card details, by disguising oneself as a trustworthy entity in an electronic communication."
#           },
#           {
#             "title": "Ransomware",
#             "description": "A type of malicious software designed to block access to a computer system until a sum of money is paid."
#           },
#           {
#             "title": "Malware",
#             "description": "Software that is intended to damage or disable computers and computer systems."
#           },
#           {
#             "title": "Virus",
#             "description": "A type of malware that, when executed, replicates by inserting copies of itself (possibly modified) into other computer programs, data files, or the boot sector of the hard drive; when this replication succeeds, the affected areas are then said to be \"infected\"."
#           },
#           {
#             "title": "Trojan Horse",
#             "description": "A program that appears legitimate but performs some illicit activity when it is run."
#           },
#           {
#             "title": "Spyware",
#             "description": "Software that aims to gather information about a person or organization without their knowledge and that may send such information to another entity without the consumer's consent, or that asserts control over a computer without the consumer's knowledge."
#           },
#           {
#             "title": "Adware",
#             "description": "Software that automatically displays or downloads advertising material (often unwanted) when a user is online."
#           },
#           {
#             "title": "Denial-of-Service (DoS) Attack",
#             "description": "A cyber-attack in which the perpetrator seeks to make a machine or network resource unavailable to its intended users by temporarily or indefinitely disrupting services of a host connected to a network."
#           },
#           {
#             "title": "Distributed Denial-of-Service (DDoS) Attack",
#             "description": "A type of DoS attack where the attacker uses multiple compromised computer systems as sources of attack traffic."
#           },
#           {
#             "title": "SQL Injection",
#             "description": "A code injection technique used to attack data-driven applications, in which malicious SQL statements are inserted into an entry field for execution (e.g. to dump the database content to the attacker)."
#           },
#           {
#             "title": "Cross-Site Scripting (XSS)",
#             "description": "A type of computer security vulnerability typically found in web applications. XSS enables attackers to inject client-side scripts into web pages viewed by other users."
#           },
#           {
#             "title": "Clickjacking",
#             "description": "A malicious technique of tricking a Web user into clicking something different from what the user perceives, thus potentially revealing confidential information or taking control of their computer while clicking on seemingly innocuous web pages."
#           },
#           {
#             "title": "Session Hijacking",
#             "description": "An attack in which an attacker takes over a valid session between two computers."
#           },
#           {
#             "title": "Man-in-the-Middle (MitM) Attack",
#             "description": "An attack where the attacker secretly relays and possibly alters the communication between two parties who believe they are directly communicating with each other."
#           },
#           {
#             "title": "Rootkit",
#             "description": "A collection of computer software, typically malicious, designed to enable access to a computer or an area of its software that is not otherwise allowed (e.g., to an unauthorized user) and often masks its existence or the existence of other software."
#           },
#           {
#             "title": "Keylogger",
#             "description": "A type of surveillance software (considered either software or spyware) that has the capability to record every keystroke a user makes."
#           },
#           {
#             "title": "Botnet",
#             "description": "A network of private computers infected with malicious software and controlled as a group without the owners' knowledge, e.g., to send spam messages."
#           },
#           {
#             "title": "Zero-Day Exploit",
#             "description": "A computer-software vulnerability that is unknown to, or unaddressed by, those who should be mitigating the vulnerability."
#           },
#           {
#             "title": "Advanced Persistent Threat (APT)",
#             "description": "A stealthy threat actor, typically a nation state or state-sponsored group, which gains unauthorized access to a computer network and remains undetected for an extended period."
#           },
#           {
#             "title": "Data Encryption Standard (DES)",
#             "description": "A symmetric-key block cipher algorithm and was published by the National Institute of Standards and Technology (NIST)."
#           },
#           {
#             "title": "Advanced Encryption Standard (AES)",
#             "description": "A symmetric block cipher chosen by the U.S. National Institute of Standards and Technology (NIST) to replace Data Encryption Standard (DES)."
#           },
#           {
#             "title": "Rivest–Shamir–Adleman (RSA)",
#             "description": "A public-key cryptosystem that is widely used for secure data transmission."
#           },
#           {
#             "title": "Secure Sockets Layer (SSL)",
#             "description": "A standard security technology for establishing an encrypted link between a web server and a browser, ensuring that all data passed between them remain private and integral."
#           },
#           {
#             "title": "Transport Layer Security (TLS)",
#             "description": "A widely adopted security protocol designed to provide privacy and data integrity between two communicating applications. It is the successor to Secure Sockets Layer (SSL)."
#           },
#           {
#             "title": "Virtual Private Network (VPN)",
#             "description": "Extends a private network across a public network and enables users to send and receive data across shared or public networks as if their computing devices were directly connected to the private network."
#           },
#           {
#             "title": "Proxy Server",
#             "description": "A server application that acts as an intermediary between a client seeking resources from other servers and those servers."
#           },
#           {
#             "title": "Honeypot",
#             "description": "A computer security mechanism set to detect, deflect, or, in some manner, counteract attempts at unauthorized use of information systems."
#           },
#           {
#             "title": "Firewall",
#             "description": "A network security system that monitors and controls incoming and outgoing network traffic based on predetermined security rules."
#           },
#           {
#             "title": "Intrusion Detection System (IDS)",
#             "description": "A system that monitors network traffic for suspicious activity and alerts administrators."
#           },
#           {
#             "title": "Intrusion Prevention System (IPS)",
#             "description": "A system that automatically blocks or prevents malicious network traffic."
#           },
#           {
#             "title": "Two-Factor Authentication (2FA)",
#             "description": "A security process in which the user provides two authentication factors to verify they are who they say they are."
#           },
#           {
#             "title": "Biometrics",
#             "description": "Authentication techniques that rely on measurable physical characteristics that can be automatically checked."
#           },
#           {
#             "title": "Password Manager",
#             "description": "A software application that stores and manages a user's passwords for various online services and websites."
#           },
#           {
#             "title": "Data Backup",
#             "description": "The process of copying data to a separate storage device or location to protect it in case of data loss or corruption."
#           },
#           {
#             "title": "Disaster Recovery Plan (DRP)",
#             "description": "A documented process or set of procedures to recover and protect a business IT infrastructure in the event of a disaster."
#           },
#           {
#             "title": "Business Continuity Plan (BCP)",
#             "description": "A plan to ensure that a business can continue to operate in the event of a disaster or other disruption."
#           },
#           {
#             "title": "Incident Response Plan (IRP)",
#             "description": "A set of instructions or procedures to detect, respond to, and limit the consequences of a security incident."
#           },
#           {
#             "title": "Vulnerability Management",
#             "description": "The cyclical process of identifying, classifying, remediating and mitigating vulnerabilities."
#           },
#           {
#             "title": "Patch Management",
#             "description": "The process of managing and deploying software updates to fix vulnerabilities and improve security."
#           },
#           {
#             "title": "Security Information and Event Management (SIEM)",
#             "description": "A software solution that aggregates and analyzes security logs and events from various sources to detect and respond to security threats."
#           },
#           {
#             "title": "Security Orchestration, Automation and Response (SOAR)",
#             "description": "A collection of software solutions and tools that allow an organization to collect security threats data and alerts from different sources."
#           },
#           {
#             "title": "Threat Intelligence",
#             "description": "Evidence-based knowledge, including context, mechanisms, indicators, implications and actionable advice, about an existing or emerging menace or hazard to assets."
#           },
#           {
#             "title": "Cybersecurity Framework",
#             "description": "A set of guidelines and best practices for managing cybersecurity risk."
#           },
#           {
#             "title": "Compliance Framework",
#             "description": "A set of guidelines and best practices for complying with laws, regulations, and industry standards."
#           },
#           {
#             "title": "Risk Assessment",
#             "description": "The process of identifying and evaluating risks."
#           },
#           {
#             "title": "Data Loss Prevention (DLP)",
#             "description": "A set of technologies and processes used to prevent sensitive data from leaving an organization's control."
#           },
#           {
#             "title": "Data Masking",
#             "description": "The process of obscuring sensitive data to protect it from unauthorized access."
#           },
#           {
#             "title": "Data Encryption",
#             "description": "The process of converting data into an unreadable format to protect it from unauthorized access."
#           },
#           {
#             "title": "Access Control",
#             "description": "The process of restricting access to data and resources to authorized users."
#           },
#           {
#             "title": "Least Privilege",
#             "description": "The principle of granting users only the minimum level of access necessary to perform their job duties."
#           },
#           {
#             "title": "Separation of Duties",
#             "description": "The principle of dividing responsibilities among different individuals to prevent fraud and errors."
#           },
#           {
#             "title": "Change Management",
#             "description": "The process of managing changes to IT systems and processes."
#           },
#           {
#             "title": "Configuration Management",
#             "description": "The process of managing the configuration of IT systems and devices."
#           },
#           {
#             "title": "Asset Management",
#             "description": "The process of managing the organization's IT assets."
#           },
#           {
#             "title": "Identity Management",
#             "description": "The process of managing user identities and access rights."
#           },
#           {
#             "title": "Data Governance",
#             "description": "The overall management of the availability, usability, integrity, and security of data used in an enterprise."
#           },
#           {
#             "title": "Data Quality",
#             "description": "The degree to which data is accurate, complete, consistent, timely, and valid."
#           },
#           {
#             "title": "Data Lineage",
#             "description": "The process of tracking the origin and movement of data."
#           },
#           {
#             "title": "Data Catalog",
#             "description": "A centralized repository of metadata about an organization's data assets."
#           },
#           {
#             "title": "Data Dictionary",
#             "description": "A centralized repository of information about the data elements used in an organization."
#           },
#           {
#             "title": "Master Data Management (MDM)",
#             "description": "The process of creating and maintaining a single, consistent view of master data."
#           },
#           {
#             "title": "Reference Data Management (RDM)",
#             "description": "The process of creating and maintaining a single, consistent view of reference data."
#           },
#           {
#             "title": "Metadata Management",
#             "description": "The process of managing metadata about an organization's data assets."
#           },
#           {
#             "title": "Data Integration",
#             "description": "The process of combining data from multiple sources into a single, unified view."
#           },
#           {
#             "title": "Data Migration",
#             "description": "The process of moving data from one system to another."
#           },
#           {
#             "title": "Data Transformation",
#             "description": "The process of converting data from one format to another."
#           },
#           {
#             "title": "Data Cleansing",
#             "description": "The process of correcting or removing inaccurate, incomplete, or inconsistent data."
#           },
#           {
#             "title": "Data Validation",
#             "description": "The process of ensuring that data meets certain quality standards."
#           },
#           {
#             "title": "Data Profiling",
#             "description": "The process of analyzing data to understand its characteristics and quality."
#           },
#           {
#             "title": "Data Discovery",
#             "description": "The process of identifying and locating data assets."
#           },
#           {
#             "title": "Data Mining",
#             "description": "The process of discovering patterns and insights from large datasets."
#           },
#           {
#             "title": "Machine Learning",
#             "description": "A type of artificial intelligence that allows computers to learn from data without being explicitly programmed."
#           },
#           {
#             "title": "Artificial Intelligence (AI)",
#             "description": "The ability of a computer or machine to mimic human intelligence."
#           },
#           {
#             "title": "Cloud Computing",
#             "description": "The delivery of computing services—including servers, storage, databases, networking, software, analytics, and intelligence—over the Internet (“the cloud”) to offer faster innovation, flexible resources, and economies of scale."
#           },
#           {
#             "title": "Big Data",
#             "description": "Extremely large datasets that are difficult to process using traditional methods."
#           },
#           {
#             "title": "Data Science",
#             "description": "An interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge and insights from data in various forms, both structured and unstructured."
#           },
#           {
#             "title": "Data Lake",
#             "description": "A centralized repository for storing structured and unstructured data."
#           },
#           {
#             "title": "Data Warehouse",
#             "description": "A centralized repository for storing structured data for reporting and analysis."
#           },
#           {
#             "title": "Business Intelligence (BI)",
#             "description": "The process of analyzing data to gain insights and make better business decisions."
#           },
#           {
#             "title": "Analytics",
#             "description": "The process of examining data to draw conclusions and make predictions."
#           },
#           {
#             "title": "Dashboard",
#             "description": "A visual display of key performance indicators (KPIs) and other important metrics."
#           },
#           {
#             "title": "Report",
#             "description": "A document that presents data in a structured and organized format."
#           },
#           {
#             "title": "Key Performance Indicator (KPI)",
#             "description": "A measurable value that demonstrates how effectively a company is achieving key business objectives."
#           },
#           {
#             "title": "Return on Investment (ROI)",
#             "description": "A performance measure used to evaluate the efficiency of an investment or compare the efficiency of a number of different investments."
#           },
#           {
#             "title": "Net Promoter Score (NPS)",
#             "description": "A metric used to measure customer loyalty and satisfaction."
#           },
#           {
#             "title": "Customer Satisfaction (CSAT)",
#             "description": "A metric used to measure customer satisfaction with a specific product or service."
#           },
#           {
#             "title": "Customer Effort Score (CES)",
#             "description": "A metric used to measure the effort a customer has to expend to get an issue resolved."
#           },
#           {
#             "title": "Churn Rate",
#             "description": "The rate at which customers stop doing business with a company."
#           },
#           {
#             "title": "Customer Lifetime Value (CLTV)",
#             "description": "A prediction of the net profit attributed to the entire future relationship with a customer."
#           },
#           {
#             "title": "Customer Acquisition Cost (CAC)",
#             "description": "The cost of acquiring a new customer."
#           },
#           {
#             "title": "Conversion Rate",
#             "description": "The percentage of website visitors who complete a desired action, such as making a purchase or filling out a form."
#           },
#           {
#             "title": "Cart Abandonment Rate",
#             "description": "The percentage of users who add items to their cart but do not complete the purchase."
#           },
#           {
#             "title": "Bounce Rate",
#             "description": "The percentage of visitors who enter the site and then leave (\"bounce\") rather than continuing to view other pages within the same site."
#           },
#           {
#             "title": "Time on Site",
#             "description": "The average amount of time that visitors spend on a website."
#           },
#           {
#             "title": "Page Views",
#             "description": "The number of times a page on a website is viewed."
#           },
#           {
#             "title": "Unique Visitors",
#             "description": "The number of distinct individuals who visit a website during a specific period."
#           },
#           {
#             "title": "Traffic Source",
#             "description": "The origin of traffic to a website or application."
#           },
#           {
#             "title": "Search Engine Optimization (SEO)",
#             "description": "The process of optimizing a website to rank higher in search engine results pages (SERPs)."
#           },
#           {
#             "title": "Pay-Per-Click (PPC) Advertising",
#             "description": "A form of online advertising where advertisers pay a fee each time one of their ads is clicked."
#           },
#           {
#             "title": "Social Media Marketing",
#             "description": "The process of using social media platforms to promote a product or service."
#           },
#           {
#             "title": "Email Marketing",
#             "description": "The process of sending commercial messages to a group of people using email."
#           },
#           {
#             "title": "Content Marketing",
#             "description": "A marketing technique of creating and distributing valuable, relevant and consistent content to attract and acquire a clearly defined audience – with the objective of driving profitable customer action."
#           },
#           {
#             "title": "Influencer Marketing",
#             "description": "A type of marketing that involves collaborating with influencers to promote a product or service."
#           },
#           {
#             "title": "Affiliate Marketing",
#             "description": "A marketing arrangement by which an online retailer pays a commission to an external website for traffic or sales generated from its referrals."
#           },
#           {
#             "title": "Mobile Marketing",
#             "description": "The process of marketing to customers through mobile devices."
#           },
#           {
#             "title": "Video Marketing",
#             "description": "The process of using video to promote a product or service."
#           },
#           {
#             "title": "Event Marketing",
#             "description": "The process of promoting a product or service through live events."
#           },
#           {
#             "title": "Public Relations (PR)",
#             "description": "The practice of managing the spread of information between an individual or an organization and the public."
#           },
#           {
#             "title": "Brand Awareness",
#             "description": "The extent to which consumers are familiar with the qualities or image of a particular brand of goods or services."
#           },
#           {
#             "title": "Brand Loyalty",
#             "description": "The tendency of consumers to continuously purchase one brand's products rather than those of its competitors."
#           },
#           {
#             "title": "Customer Engagement",
#             "description": "The emotional connection between a customer and a brand."
#           },
#           {
#             "title": "Customer Advocacy",
#             "description": "The act of customers recommending a product or service to others."
#           },
#           {
#             "title": "Word-of-Mouth Marketing",
#             "description": "The process of generating positive buzz about a product or service through customer referrals."
#           },
#           {
#             "title": "Viral Marketing",
#             "description": "A marketing phenomenon that facilitates and encourages people to pass along a marketing message."
#           },
#           {
#             "title": "Guerrilla Marketing",
#             "description": "An advertising strategy that focuses on low-cost unconventional marketing tactics that yield maximum results."
#           },
#           {
#             "title": "Product Development",
#             "description": "The process of creating and launching new products."
#           },
#           {
#             "title": "Market Research",
#             "description": "The process of gathering information about a market to understand customer needs and preferences."
#           },
#           {
#             "title": "Competitive Analysis",
#             "description": "The process of identifying and evaluating competitors."
#           },
#           {
#             "title": "SWOT Analysis",
#             "description": "A strategic planning tool used to evaluate the strengths, weaknesses, opportunities, and threats involved in a project or business venture."
#           },
#           {
#             "title": "Business Model",
#             "description": "A plan for how a company will create, deliver, and capture value."
#           },
#           {
#             "title": "Value Proposition",
#             "description": "A statement that describes the benefits that customers will receive from a product or service."
#           },
#           {
#             "title": "Revenue Stream",
#             "description": "A source of revenue for a business."
#           },
#           {
#             "title": "Cost Structure",
#             "description": "The costs that a business incurs to operate."
#           },
#           {
#             "title": "Profit Margin",
#             "description": "The percentage of revenue that remains after deducting all costs."
#           },
#           {
#             "title": "Cash Flow",
#             "description": "The movement of money into and out of a business."
#           },
#           {
#             "title": "Balance Sheet",
#             "description": "A financial statement that summarizes a company's assets, liabilities, and equity at a specific point in time."
#           },
#           {
#             "title": "Income Statement",
#             "description": "A financial statement that reports a company's financial performance over a period of time."
#           },
#           {
#             "title": "Statement of Cash Flows",
#             "description": "A financial statement that reports the movement of cash into and out of a company over a period of time."
#           },
#           {
#             "title": "Financial Ratio",
#             "description": "A comparison of two financial statement items."
#           },
#           {
#             "title": "Working Capital",
#             "description": "The difference between a company's current assets and current liabilities."
#           },
#           {
#             "title": "Inventory Management",
#             "description": "The process of managing the flow of inventory into and out of a business."
#           },
#           {
#             "title": "Supply Chain Management",
#             "description": "The process of managing the flow of goods and services from suppliers to customers."
#           },
#           {
#             "title": "Logistics",
#             "description": "The process of planning, implementing, and controlling the efficient, effective flow and storage of goods, services, and related information from point of origin to point of consumption for the purpose of conforming to customer requirements."
#           },
#           {
#             "title": "Transportation",
#             "description": "The movement of goods and services from one location to another."
#           },
#           {
#             "title": "Warehousing",
#             "description": "The storage of goods in a warehouse."
#           },
#           {
#             "title": "Distribution",
#             "description": "The process of getting goods from the manufacturer to the customer."
#           },
#           {
#             "title": "Customer Service",
#             "description": "The assistance and advice provided by a company to those people who buy or use its products or services."
#           },
#           {
#             "title": "Technical Support",
#             "description": "The assistance provided to users of technology products or services."
#           },
#           {
#             "title": "Help Desk",
#             "description": "A resource intended to provide the customer or end user with information and support related to a company's or institution's products and services."
#           },
#           {
#             "title": "Knowledge Base",
#             "description": "A centralized repository of information about a product or service."
#           },
#           {
#             "title": "Frequently Asked Questions (FAQ)",
#             "description": "A list of common questions and answers about a product or service."
#           },
#           {
#             "title": "Troubleshooting",
#             "description": "The process of identifying and resolving problems."
#           },
#           {
#             "title": "Bug",
#             "description": "An error in a software program."
#           },
#           {
#             "title": "Patch",
#             "description": "A software update that fixes a bug."
#           },
#           {
#             "title": "Release",
#             "description": "A version of a software program that is made available to the public."
#           },
#           {
#             "title": "Deployment",
#             "description": "The process of installing and configuring a software program on a computer or server."
#           },
#           {
#             "title": "Maintenance",
#             "description": "The process of keeping a software program running smoothly."
#           },
#           {
#             "title": "Upgrade",
#             "description": "The process of installing a new version of a software program."
#           },
#           {
#             "title": "Migration",
#             "description": "The process of moving data or applications from one system to another."
#           },
#           {
#             "title": "Integration",
#             "description": "The process of connecting two or more systems together."
#           },
#           {
#             "title": "Customization",
#             "description": "The process of modifying a software program to meet specific needs."
#           },
#           {
#             "title": "Configuration",
#             "description": "The process of setting up a software program to work in a specific environment."
#           },
#           {
#             "title": "Session Duration",
#             "description": "The length of time a user spends on a website or application during a single session."
#           },
#           {
#             "title": "Bounce Rate",
#             "description": "The percentage of visitors who enter the site and then leave (\"bounce\") rather than continuing to view other pages within the same site."
#           },
#           {
#             "title": "Exit Rate",
#             "description": "The percentage of visitors who leave a website from a specific page."
#           },
#           {
#             "title": "Page Depth",
#             "description": "The average number of pages viewed by a visitor during a single session."
#           },
#           {
#             "title": "Click-Through Rate (CTR)",
#             "description": "The percentage of users who click on a specific link or ad."
#           },
#           {
#             "title": "Cost Per Click (CPC)",
#             "description": "The amount of money an advertiser pays each time someone clicks on their ad."
#           },
#           {
#             "title": "Cost Per Acquisition (CPA)",
#             "description": "The amount of money an advertiser pays to acquire a new customer."
#           },
#           {
#             "title": "Return on Ad Spend (ROAS)",
#             "description": "The amount of revenue generated for every dollar spent on advertising."
#           },
#           {
#             "title": "Website Conversion Rate",
#             "description": "The percentage of website visitors who complete a desired action, such as making a purchase or filling out a form."
#           },
#           {
#             "title": "Landing Page Conversion Rate",
#             "description": "The percentage of visitors who arrive on a specific landing page and complete a desired action."
#           },
#           {
#             "title": "Lead Generation",
#             "description": "The process of attracting and capturing the interest of potential customers."
#           },
#           {
#             "title": "Lead Qualification",
#             "description": "The process of determining whether a lead is a good fit for a company's products or services."
#           },
#           {
#             "title": "Sales Pipeline",
#             "description": "A visual representation of the stages a lead goes through from initial contact to becoming a customer."
#           }
#         ]
#       }
#     }
#   }
# }

SyntaxError: invalid syntax (<ipython-input-283-bf72856abd8b>, line 1)

In [None]:
"""DD Dataset

{'name': 'projects/ai-learning-agents/locations/us-central1/dataScans/a353d3c5c-2d04-47cb-9b4a-b07e60fb8868', 'uid': '77523313-7a2d-47a7-992e-53c85737bbab', 'description': 'Data documentation scan for the dataset - "thelook" with default configuration created through generate insights', 'displayName': 'thelook-data-documentation-scan', 'state': 'ACTIVE', 'createTime': '2025-06-06T16:57:36.078877880Z', 'updateTime': '2025-06-06T16:57:40.959530013Z', 'data': {'resource': '//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook'}, 'executionSpec': {'trigger': {'onDemand': {}}}, 'executionStatus': {'latestJobStartTime': '2025-06-06T17:02:54.794856720Z', 'latestJobEndTime': '2025-06-06T17:03:49.304670226Z', 'latestJobCreateTime': '2025-06-06T17:02:54.794781099Z'}, 'type': 'DATA_DOCUMENTATION', 'dataDocumentationSpec': {}, 'dataDocumentationResult': {'queries': [{'sql': 'WITH user_order_counts AS (\n    SELECT\n        u.id AS user_id,\n        COUNT(o.order_id) AS order_count\n    FROM\n        `thelook.users` u\n    LEFT JOIN\n        `thelook.orders` o ON u.id = o.user_id\n    GROUP BY\n        u.id\n),\naverage_order_count AS (\n    SELECT\n        AVG(order_count) AS avg_order_count,\n        STDDEV(order_count) AS std_order_count\n    FROM\n        user_order_counts\n)\nSELECT\n    uoc.user_id,\n    uoc.order_count,\n    aoc.avg_order_count,\n    aoc.std_order_count\nFROM\n    user_order_counts uoc\nCROSS JOIN\n    average_order_count aoc\nWHERE\n    uoc.order_count > aoc.avg_order_count + (3 * aoc.std_order_count)\nORDER BY\n    uoc.order_count DESC;', 'description': 'Identify users with unusually high order frequencies compared to the average order frequency, potentially indicating bot activity or fraudulent behavior, by joining the users and orders tables.'}, {'sql': 'WITH product_return_rates AS (\n    SELECT\n        p.category AS product_category,\n        p.id AS product_id,\n        SUM(CASE WHEN oi.returned_at IS NOT NULL THEN 1 ELSE 0 END) AS return_count,\n        COUNT(oi.id) AS total_order_items,\n        SAFE_DIVIDE(SUM(CASE WHEN oi.returned_at IS NOT NULL THEN 1 ELSE 0 END), COUNT(oi.id)) AS return_rate\n    FROM\n        `thelook.products` p\n    JOIN\n        `thelook.order_items` oi ON p.id = oi.product_id\n    GROUP BY\n        p.category, p.id\n),\ncategory_avg_return_rates AS (\n    SELECT\n        product_category,\n        AVG(return_rate) AS avg_return_rate,\n        STDDEV(return_rate) AS std_return_rate\n    FROM\n        product_return_rates\n    GROUP BY\n        product_category\n)\nSELECT\n    prr.product_category,\n    prr.product_id,\n    prr.return_rate,\n    carr.avg_return_rate,\n    carr.std_return_rate\nFROM\n    product_return_rates prr\nJOIN\n    category_avg_return_rates carr ON prr.product_category = carr.product_category\nWHERE\n    prr.return_rate > carr.avg_return_rate + (3 * carr.std_return_rate)\nORDER BY\n    prr.return_rate DESC;', 'description': 'Identify products with unusually high return rates compared to the average return rate for products in the same category, potentially indicating quality issues or misleading descriptions, by joining the products and order_items tables.'}, {'sql': 'WITH distribution_center_avg_costs AS (\n    SELECT\n        dc.id AS distribution_center_id,\n        AVG(p.cost) AS avg_product_cost,\n        STDDEV(p.cost) AS std_product_cost\n    FROM\n        `thelook.distribution_centers` dc\n    JOIN\n        `thelook.products` p ON dc.id = p.distribution_center_id\n    GROUP BY\n        dc.id\n),\noverall_avg_cost AS (\n    SELECT\n        AVG(avg_product_cost) AS overall_avg_cost,\n        STDDEV(avg_product_cost) AS overall_std_cost\n    FROM\n        distribution_center_avg_costs\n)\nSELECT\n    dcac.distribution_center_id,\n    dcac.avg_product_cost,\n    oac.overall_avg_cost,\n    oac.overall_std_cost\nFROM\n    distribution_center_avg_costs dcac\nCROSS JOIN\n    overall_avg_cost oac\nWHERE\n    dcac.avg_product_cost > oac.overall_avg_cost + (3 * oac.overall_std_cost)\nORDER BY\n    dcac.avg_product_cost DESC;', 'description': 'Identify distribution centers with unusually high average product costs compared to other distribution centers, potentially indicating inefficiencies or pricing discrepancies, by joining the products and distribution_centers tables.'}, {'sql': "WITH ShippedItems AS (\n    SELECT\n        FORMAT_DATE('%Y-%Q', DATE(oi.shipped_at)) AS quarter,\n        ii.product_brand,\n        COUNT(oi.id) AS shipped_count\n    FROM\n        `thelook.order_items` oi\n    JOIN\n        `thelook.inventory_items` ii ON oi.inventory_item_id = ii.id\n    WHERE oi.shipped_at IS NOT NULL\n    GROUP BY 1, 2\n), ReturnedItems AS (\n    SELECT\n        FORMAT_DATE('%Y-%Q', DATE(oi.returned_at)) AS quarter,\n        ii.product_brand,\n        COUNT(oi.id) AS returned_count\n    FROM\n        `thelook.order_items` oi\n    JOIN\n        `thelook.inventory_items` ii ON oi.inventory_item_id = ii.id\n    WHERE oi.returned_at IS NOT NULL\n    GROUP BY 1, 2\n)\nSELECT\n    COALESCE(si.quarter, ri.quarter) AS quarter,\n    COALESCE(si.product_brand, ri.product_brand) AS product_brand,\n    COALESCE(si.shipped_count, 0) AS shipped_count,\n    COALESCE(ri.returned_count, 0) AS returned_count,\n    CASE\n        WHEN COALESCE(ri.returned_count, 0) = 0 THEN NULL\n        ELSE SAFE_DIVIDE(COALESCE(si.shipped_count, 0), COALESCE(ri.returned_count, 0))\n    END AS shipped_to_returned_ratio\nFROM ShippedItems si FULL OUTER JOIN ReturnedItems ri ON si.quarter = ri.quarter AND si.product_brand = ri.product_brand\nORDER BY quarter, product_brand;", 'description': 'What is the trend of the ratio of shipped vs. returned order items by product brand on a quarterly basis?'}, {'sql': "WITH\n  ReturnedSales AS (\n    SELECT\n      FORMAT_DATE('%Y', DATE(oi.returned_at)) AS year,\n      p.category,\n      SUM(oi.sale_price) AS total_returned_sales\n    FROM\n      `thelook.order_items` oi\n    JOIN\n      `thelook.products` p ON oi.product_id = p.id\n    WHERE oi.returned_at IS NOT NULL\n    GROUP BY 1, 2\n  ),\n  TotalSales AS (\n    SELECT\n      FORMAT_DATE('%Y', DATE(oi.created_at)) AS year,\n      p.category,\n      SUM(oi.sale_price) AS total_sales\n    FROM\n      `thelook.order_items` oi\n    JOIN\n      `thelook.products` p ON oi.product_id = p.id\n    GROUP BY 1, 2\n  )\nSELECT\n  rs.year,\n  rs.category,\n  rs.total_returned_sales,\n  ts.total_sales,\n  SAFE_DIVIDE(rs.total_returned_sales, ts.total_sales) AS return_percentage\nFROM\n  ReturnedSales rs\nJOIN\n  TotalSales ts ON rs.year = ts.year AND rs.category = ts.category\nORDER BY rs.year, rs.category;", 'description': "What is the yearly trend of total returned order item sale price by product category, and what percentage of total sales does each category's returns represent?"}, {'sql': "WITH\n  OrderCounts AS (\n    SELECT\n      FORMAT_DATE('%Y-%W', DATE(o.created_at)) AS week,\n      u.traffic_source,\n      COUNT(DISTINCT o.order_id) AS num_orders\n    FROM\n      `thelook.orders` o\n    JOIN\n      `thelook.users` u ON o.user_id = u.id\n    GROUP BY 1, 2\n  ),\n  EventCounts AS (\n    SELECT\n      FORMAT_DATE('%Y-%W', DATE(e.created_at)) AS week,\n      e.traffic_source,\n      COUNT(DISTINCT e.session_id) AS num_events\n    FROM\n      `thelook.events` e\n    GROUP BY 1, 2\n  )\nSELECT\n  oc.week,\n  oc.traffic_source,\n  SAFE_DIVIDE(oc.num_orders, ec.num_events) AS order_conversion_rate,\n  AVG(SAFE_DIVIDE(oc.num_orders, ec.num_events)) OVER (PARTITION BY oc.traffic_source ORDER BY oc.week ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS moving_avg_conversion_rate\nFROM\n  OrderCounts oc\nJOIN\n  EventCounts ec ON oc.week = ec.week AND oc.traffic_source = ec.traffic_source\nORDER BY oc.week, oc.traffic_source;", 'description': 'How has the weekly order conversion rate (number of orders divided by the number of events) changed over time, segmented by traffic source?'}, {'sql': 'SELECT\n    u.traffic_source,\n    AVG(oi.sale_price) AS average_order_value\n  FROM\n    `thelook.users` u\n  JOIN\n    `thelook.order_items` oi ON u.id = oi.user_id\n  GROUP BY\n    u.traffic_source\nORDER BY\n    average_order_value DESC\nLIMIT 5;', 'description': 'Identify the top 5 traffic sources that lead to the highest average order value, indicating the most valuable marketing channels.'}, {'sql': 'SELECT\n    CORR(u.age, o.num_of_item) AS age_vs_num_items_correlation\n  FROM\n    `thelook.users` AS u\n  JOIN\n    `thelook.orders` AS o\n  ON\n    u.id = o.user_id;', 'description': 'Calculate the correlation between user age and the number of items in their orders to understand if older users tend to order more items.'}, {'sql': 'SELECT\n    p.category,\n    COUNTIF(oi.returned_at IS NOT NULL) / COUNT(oi.id) AS return_rate\n  FROM\n    `thelook.order_items` oi\n  JOIN\n    `thelook.products` p ON oi.product_id = p.id\n  GROUP BY\n    p.category;', 'description': 'Calculate the percentage of orders returned for each product category to identify categories with high return rates.'}, {'sql': 'SELECT\n    p.department,\n    AVG(u.age) AS average_user_age\n  FROM\n    `thelook.orders` o\n  JOIN\n    `thelook.users` u ON o.user_id = u.id\n  JOIN\n    `thelook.order_items` oi ON o.order_id = oi.order_id\n  JOIN\n    `thelook.products` p ON oi.product_id = p.id\n  GROUP BY\n    p.department;', 'description': 'Calculate the average age of users who placed orders for each product department, to understand which departments are popular among different age groups.'}, {'sql': 'SELECT\n    u.traffic_source,\n    AVG(oi.sale_price) AS average_order_value\n  FROM\n    `thelook.users` AS u\n  JOIN\n    `thelook.order_items` AS oi\n  ON\n    u.id = oi.user_id\n  GROUP BY\n    u.traffic_source\nORDER BY\n  average_order_value DESC\nLIMIT 5;', 'description': 'Identify the top 5 traffic sources with the highest average order value to optimize marketing spend.'}, {'sql': 'SELECT\n    dc.name AS distribution_center_name,\n    VAR_POP(p.retail_price) AS retail_price_variance\n  FROM\n    `thelook.products` AS p\n  JOIN\n    `thelook.distribution_centers` AS dc\n  ON\n    p.distribution_center_id = dc.id\n  GROUP BY\n    dc.name;', 'description': 'Calculate the variance in product retail prices across different distribution centers.'}, {'sql': 'SELECT\n    p.category,\n    AVG(oi.sale_price) AS average_sale_price,\n    STDDEV_SAMP(oi.sale_price) AS sale_price_std_dev\n  FROM\n    `thelook.products` AS p\n  JOIN\n    `thelook.order_items` AS oi\n  ON\n    p.id = oi.product_id\n  GROUP BY\n    p.category;', 'description': 'Determine the average sale price of products in each category and calculate the standard deviation to identify categories with high price variability.'}, {'sql': 'SELECT\n    p.department,\n    AVG(TIMESTAMP_DIFF(oi.delivered_at, oi.created_at, DAY)) AS avg_delivery_time_days\n  FROM\n    `thelook.order_items` AS oi\n  JOIN\n    `thelook.products` AS p\n  ON\n    oi.product_id = p.id\n  WHERE oi.delivered_at IS NOT NULL\n  GROUP BY\n    p.department;', 'description': 'Determine the average time between order creation and delivery, grouped by product department.'}, {'sql': 'SELECT\n    p.distribution_center_id,\n    AVG(TIMESTAMP_DIFF(oi.delivered_at, oi.created_at, DAY)) AS avg_delivery_time\n  FROM\n    `thelook.order_items` oi\n  JOIN\n    `thelook.products` p ON oi.product_id = p.id\n  WHERE oi.delivered_at IS NOT NULL\n  GROUP BY\n    p.distribution_center_id;', 'description': 'Determine the average number of days between order creation and delivery for each distribution center to assess fulfillment efficiency.'}, {'sql': 'SELECT\n    u.state,\n    ANY_VALUE(e.browser) AS most_common_browser\n  FROM\n    `thelook.users` u\n  JOIN\n    `thelook.events` e ON u.id = e.user_id\n  GROUP BY\n    u.state;', 'description': 'Determine the most common browser used by users from each state to understand regional technology preferences.'}, {'sql': 'WITH product_avg_prices AS (\n    SELECT\n        p.id AS product_id,\n        AVG(oi.sale_price) AS avg_sale_price,\n        STDDEV(oi.sale_price) AS std_sale_price\n    FROM\n        `thelook.products` p\n    JOIN\n        `thelook.order_items` oi ON p.id = oi.product_id\n    GROUP BY\n        p.id\n),\norder_item_anomalies AS (\n    SELECT\n        oi.id AS order_item_id,\n        oi.product_id,\n        oi.sale_price,\n        pap.avg_sale_price,\n        pap.std_sale_price\n    FROM\n        `thelook.order_items` oi\n    JOIN\n        product_avg_prices pap ON oi.product_id = pap.product_id\n    WHERE pap.std_sale_price > 0\n)\nSELECT\n    oia.order_item_id,\n    oia.product_id,\n    oia.sale_price,\n    oia.avg_sale_price,\n    oia.std_sale_price\nFROM\n    order_item_anomalies oia\nWHERE\n    oia.sale_price > oia.avg_sale_price + (3 * oia.std_sale_price)\nORDER BY\n    oia.sale_price DESC;', 'description': 'Detect anomalous order items with unusually high sale prices compared to the average sale price for the same product, potentially indicating pricing errors or promotional abuse, by joining the order_items and products tables.'}, {'sql': 'WITH order_item_counts AS (\n    SELECT\n        o.user_id,\n        o.order_id,\n        COUNT(oi.id) AS item_count\n    FROM\n        `thelook.orders` o\n    JOIN\n        `thelook.order_items` oi ON o.order_id = oi.order_id\n    GROUP BY\n        o.user_id, o.order_id\n),\nuser_avg_item_counts AS (\n    SELECT\n        user_id,\n        AVG(item_count) AS avg_item_count,\n        STDDEV(item_count) AS std_item_count\n    FROM\n        order_item_counts\n    GROUP BY\n        user_id\n)\nSELECT\n    oic.user_id,\n    oic.order_id,\n    oic.item_count,\n    uaic.avg_item_count,\n    uaic.std_item_count\nFROM\n    order_item_counts oic\nJOIN\n    user_avg_item_counts uaic ON oic.user_id = uaic.user_id\nWHERE\n    oic.item_count > uaic.avg_item_count + (3 * uaic.std_item_count)\nORDER BY\n    oic.item_count DESC;', 'description': 'Detect users with unusually high numbers of order items compared to the average number of items per order, potentially indicating bulk purchases or reseller activity, by joining the orders and order_items tables.'}], 'datasetResult': {'queries': [{'sql': 'WITH user_order_counts AS (\n    SELECT\n        u.id AS user_id,\n        COUNT(o.order_id) AS order_count\n    FROM\n        `thelook.users` u\n    LEFT JOIN\n        `thelook.orders` o ON u.id = o.user_id\n    GROUP BY\n        u.id\n),\naverage_order_count AS (\n    SELECT\n        AVG(order_count) AS avg_order_count,\n        STDDEV(order_count) AS std_order_count\n    FROM\n        user_order_counts\n)\nSELECT\n    uoc.user_id,\n    uoc.order_count,\n    aoc.avg_order_count,\n    aoc.std_order_count\nFROM\n    user_order_counts uoc\nCROSS JOIN\n    average_order_count aoc\nWHERE\n    uoc.order_count > aoc.avg_order_count + (3 * aoc.std_order_count)\nORDER BY\n    uoc.order_count DESC;', 'description': 'Identify users with unusually high order frequencies compared to the average order frequency, potentially indicating bot activity or fraudulent behavior, by joining the users and orders tables.'}, {'sql': 'WITH product_return_rates AS (\n    SELECT\n        p.category AS product_category,\n        p.id AS product_id,\n        SUM(CASE WHEN oi.returned_at IS NOT NULL THEN 1 ELSE 0 END) AS return_count,\n        COUNT(oi.id) AS total_order_items,\n        SAFE_DIVIDE(SUM(CASE WHEN oi.returned_at IS NOT NULL THEN 1 ELSE 0 END), COUNT(oi.id)) AS return_rate\n    FROM\n        `thelook.products` p\n    JOIN\n        `thelook.order_items` oi ON p.id = oi.product_id\n    GROUP BY\n        p.category, p.id\n),\ncategory_avg_return_rates AS (\n    SELECT\n        product_category,\n        AVG(return_rate) AS avg_return_rate,\n        STDDEV(return_rate) AS std_return_rate\n    FROM\n        product_return_rates\n    GROUP BY\n        product_category\n)\nSELECT\n    prr.product_category,\n    prr.product_id,\n    prr.return_rate,\n    carr.avg_return_rate,\n    carr.std_return_rate\nFROM\n    product_return_rates prr\nJOIN\n    category_avg_return_rates carr ON prr.product_category = carr.product_category\nWHERE\n    prr.return_rate > carr.avg_return_rate + (3 * carr.std_return_rate)\nORDER BY\n    prr.return_rate DESC;', 'description': 'Identify products with unusually high return rates compared to the average return rate for products in the same category, potentially indicating quality issues or misleading descriptions, by joining the products and order_items tables.'}, {'sql': 'WITH distribution_center_avg_costs AS (\n    SELECT\n        dc.id AS distribution_center_id,\n        AVG(p.cost) AS avg_product_cost,\n        STDDEV(p.cost) AS std_product_cost\n    FROM\n        `thelook.distribution_centers` dc\n    JOIN\n        `thelook.products` p ON dc.id = p.distribution_center_id\n    GROUP BY\n        dc.id\n),\noverall_avg_cost AS (\n    SELECT\n        AVG(avg_product_cost) AS overall_avg_cost,\n        STDDEV(avg_product_cost) AS overall_std_cost\n    FROM\n        distribution_center_avg_costs\n)\nSELECT\n    dcac.distribution_center_id,\n    dcac.avg_product_cost,\n    oac.overall_avg_cost,\n    oac.overall_std_cost\nFROM\n    distribution_center_avg_costs dcac\nCROSS JOIN\n    overall_avg_cost oac\nWHERE\n    dcac.avg_product_cost > oac.overall_avg_cost + (3 * oac.overall_std_cost)\nORDER BY\n    dcac.avg_product_cost DESC;', 'description': 'Identify distribution centers with unusually high average product costs compared to other distribution centers, potentially indicating inefficiencies or pricing discrepancies, by joining the products and distribution_centers tables.'}, {'sql': "WITH ShippedItems AS (\n    SELECT\n        FORMAT_DATE('%Y-%Q', DATE(oi.shipped_at)) AS quarter,\n        ii.product_brand,\n        COUNT(oi.id) AS shipped_count\n    FROM\n        `thelook.order_items` oi\n    JOIN\n        `thelook.inventory_items` ii ON oi.inventory_item_id = ii.id\n    WHERE oi.shipped_at IS NOT NULL\n    GROUP BY 1, 2\n), ReturnedItems AS (\n    SELECT\n        FORMAT_DATE('%Y-%Q', DATE(oi.returned_at)) AS quarter,\n        ii.product_brand,\n        COUNT(oi.id) AS returned_count\n    FROM\n        `thelook.order_items` oi\n    JOIN\n        `thelook.inventory_items` ii ON oi.inventory_item_id = ii.id\n    WHERE oi.returned_at IS NOT NULL\n    GROUP BY 1, 2\n)\nSELECT\n    COALESCE(si.quarter, ri.quarter) AS quarter,\n    COALESCE(si.product_brand, ri.product_brand) AS product_brand,\n    COALESCE(si.shipped_count, 0) AS shipped_count,\n    COALESCE(ri.returned_count, 0) AS returned_count,\n    CASE\n        WHEN COALESCE(ri.returned_count, 0) = 0 THEN NULL\n        ELSE SAFE_DIVIDE(COALESCE(si.shipped_count, 0), COALESCE(ri.returned_count, 0))\n    END AS shipped_to_returned_ratio\nFROM ShippedItems si FULL OUTER JOIN ReturnedItems ri ON si.quarter = ri.quarter AND si.product_brand = ri.product_brand\nORDER BY quarter, product_brand;", 'description': 'What is the trend of the ratio of shipped vs. returned order items by product brand on a quarterly basis?'}, {'sql': "WITH\n  ReturnedSales AS (\n    SELECT\n      FORMAT_DATE('%Y', DATE(oi.returned_at)) AS year,\n      p.category,\n      SUM(oi.sale_price) AS total_returned_sales\n    FROM\n      `thelook.order_items` oi\n    JOIN\n      `thelook.products` p ON oi.product_id = p.id\n    WHERE oi.returned_at IS NOT NULL\n    GROUP BY 1, 2\n  ),\n  TotalSales AS (\n    SELECT\n      FORMAT_DATE('%Y', DATE(oi.created_at)) AS year,\n      p.category,\n      SUM(oi.sale_price) AS total_sales\n    FROM\n      `thelook.order_items` oi\n    JOIN\n      `thelook.products` p ON oi.product_id = p.id\n    GROUP BY 1, 2\n  )\nSELECT\n  rs.year,\n  rs.category,\n  rs.total_returned_sales,\n  ts.total_sales,\n  SAFE_DIVIDE(rs.total_returned_sales, ts.total_sales) AS return_percentage\nFROM\n  ReturnedSales rs\nJOIN\n  TotalSales ts ON rs.year = ts.year AND rs.category = ts.category\nORDER BY rs.year, rs.category;", 'description': "What is the yearly trend of total returned order item sale price by product category, and what percentage of total sales does each category's returns represent?"}, {'sql': "WITH\n  OrderCounts AS (\n    SELECT\n      FORMAT_DATE('%Y-%W', DATE(o.created_at)) AS week,\n      u.traffic_source,\n      COUNT(DISTINCT o.order_id) AS num_orders\n    FROM\n      `thelook.orders` o\n    JOIN\n      `thelook.users` u ON o.user_id = u.id\n    GROUP BY 1, 2\n  ),\n  EventCounts AS (\n    SELECT\n      FORMAT_DATE('%Y-%W', DATE(e.created_at)) AS week,\n      e.traffic_source,\n      COUNT(DISTINCT e.session_id) AS num_events\n    FROM\n      `thelook.events` e\n    GROUP BY 1, 2\n  )\nSELECT\n  oc.week,\n  oc.traffic_source,\n  SAFE_DIVIDE(oc.num_orders, ec.num_events) AS order_conversion_rate,\n  AVG(SAFE_DIVIDE(oc.num_orders, ec.num_events)) OVER (PARTITION BY oc.traffic_source ORDER BY oc.week ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS moving_avg_conversion_rate\nFROM\n  OrderCounts oc\nJOIN\n  EventCounts ec ON oc.week = ec.week AND oc.traffic_source = ec.traffic_source\nORDER BY oc.week, oc.traffic_source;", 'description': 'How has the weekly order conversion rate (number of orders divided by the number of events) changed over time, segmented by traffic source?'}, {'sql': 'SELECT\n    u.traffic_source,\n    AVG(oi.sale_price) AS average_order_value\n  FROM\n    `thelook.users` u\n  JOIN\n    `thelook.order_items` oi ON u.id = oi.user_id\n  GROUP BY\n    u.traffic_source\nORDER BY\n    average_order_value DESC\nLIMIT 5;', 'description': 'Identify the top 5 traffic sources that lead to the highest average order value, indicating the most valuable marketing channels.'}, {'sql': 'SELECT\n    CORR(u.age, o.num_of_item) AS age_vs_num_items_correlation\n  FROM\n    `thelook.users` AS u\n  JOIN\n    `thelook.orders` AS o\n  ON\n    u.id = o.user_id;', 'description': 'Calculate the correlation between user age and the number of items in their orders to understand if older users tend to order more items.'}, {'sql': 'SELECT\n    p.category,\n    COUNTIF(oi.returned_at IS NOT NULL) / COUNT(oi.id) AS return_rate\n  FROM\n    `thelook.order_items` oi\n  JOIN\n    `thelook.products` p ON oi.product_id = p.id\n  GROUP BY\n    p.category;', 'description': 'Calculate the percentage of orders returned for each product category to identify categories with high return rates.'}, {'sql': 'SELECT\n    p.department,\n    AVG(u.age) AS average_user_age\n  FROM\n    `thelook.orders` o\n  JOIN\n    `thelook.users` u ON o.user_id = u.id\n  JOIN\n    `thelook.order_items` oi ON o.order_id = oi.order_id\n  JOIN\n    `thelook.products` p ON oi.product_id = p.id\n  GROUP BY\n    p.department;', 'description': 'Calculate the average age of users who placed orders for each product department, to understand which departments are popular among different age groups.'}, {'sql': 'SELECT\n    u.traffic_source,\n    AVG(oi.sale_price) AS average_order_value\n  FROM\n    `thelook.users` AS u\n  JOIN\n    `thelook.order_items` AS oi\n  ON\n    u.id = oi.user_id\n  GROUP BY\n    u.traffic_source\nORDER BY\n  average_order_value DESC\nLIMIT 5;', 'description': 'Identify the top 5 traffic sources with the highest average order value to optimize marketing spend.'}, {'sql': 'SELECT\n    dc.name AS distribution_center_name,\n    VAR_POP(p.retail_price) AS retail_price_variance\n  FROM\n    `thelook.products` AS p\n  JOIN\n    `thelook.distribution_centers` AS dc\n  ON\n    p.distribution_center_id = dc.id\n  GROUP BY\n    dc.name;', 'description': 'Calculate the variance in product retail prices across different distribution centers.'}, {'sql': 'SELECT\n    p.category,\n    AVG(oi.sale_price) AS average_sale_price,\n    STDDEV_SAMP(oi.sale_price) AS sale_price_std_dev\n  FROM\n    `thelook.products` AS p\n  JOIN\n    `thelook.order_items` AS oi\n  ON\n    p.id = oi.product_id\n  GROUP BY\n    p.category;', 'description': 'Determine the average sale price of products in each category and calculate the standard deviation to identify categories with high price variability.'}, {'sql': 'SELECT\n    p.department,\n    AVG(TIMESTAMP_DIFF(oi.delivered_at, oi.created_at, DAY)) AS avg_delivery_time_days\n  FROM\n    `thelook.order_items` AS oi\n  JOIN\n    `thelook.products` AS p\n  ON\n    oi.product_id = p.id\n  WHERE oi.delivered_at IS NOT NULL\n  GROUP BY\n    p.department;', 'description': 'Determine the average time between order creation and delivery, grouped by product department.'}, {'sql': 'SELECT\n    p.distribution_center_id,\n    AVG(TIMESTAMP_DIFF(oi.delivered_at, oi.created_at, DAY)) AS avg_delivery_time\n  FROM\n    `thelook.order_items` oi\n  JOIN\n    `thelook.products` p ON oi.product_id = p.id\n  WHERE oi.delivered_at IS NOT NULL\n  GROUP BY\n    p.distribution_center_id;', 'description': 'Determine the average number of days between order creation and delivery for each distribution center to assess fulfillment efficiency.'}, {'sql': 'SELECT\n    u.state,\n    ANY_VALUE(e.browser) AS most_common_browser\n  FROM\n    `thelook.users` u\n  JOIN\n    `thelook.events` e ON u.id = e.user_id\n  GROUP BY\n    u.state;', 'description': 'Determine the most common browser used by users from each state to understand regional technology preferences.'}, {'sql': 'WITH product_avg_prices AS (\n    SELECT\n        p.id AS product_id,\n        AVG(oi.sale_price) AS avg_sale_price,\n        STDDEV(oi.sale_price) AS std_sale_price\n    FROM\n        `thelook.products` p\n    JOIN\n        `thelook.order_items` oi ON p.id = oi.product_id\n    GROUP BY\n        p.id\n),\norder_item_anomalies AS (\n    SELECT\n        oi.id AS order_item_id,\n        oi.product_id,\n        oi.sale_price,\n        pap.avg_sale_price,\n        pap.std_sale_price\n    FROM\n        `thelook.order_items` oi\n    JOIN\n        product_avg_prices pap ON oi.product_id = pap.product_id\n    WHERE pap.std_sale_price > 0\n)\nSELECT\n    oia.order_item_id,\n    oia.product_id,\n    oia.sale_price,\n    oia.avg_sale_price,\n    oia.std_sale_price\nFROM\n    order_item_anomalies oia\nWHERE\n    oia.sale_price > oia.avg_sale_price + (3 * oia.std_sale_price)\nORDER BY\n    oia.sale_price DESC;', 'description': 'Detect anomalous order items with unusually high sale prices compared to the average sale price for the same product, potentially indicating pricing errors or promotional abuse, by joining the order_items and products tables.'}, {'sql': 'WITH order_item_counts AS (\n    SELECT\n        o.user_id,\n        o.order_id,\n        COUNT(oi.id) AS item_count\n    FROM\n        `thelook.orders` o\n    JOIN\n        `thelook.order_items` oi ON o.order_id = oi.order_id\n    GROUP BY\n        o.user_id, o.order_id\n),\nuser_avg_item_counts AS (\n    SELECT\n        user_id,\n        AVG(item_count) AS avg_item_count,\n        STDDEV(item_count) AS std_item_count\n    FROM\n        order_item_counts\n    GROUP BY\n        user_id\n)\nSELECT\n    oic.user_id,\n    oic.order_id,\n    oic.item_count,\n    uaic.avg_item_count,\n    uaic.std_item_count\nFROM\n    order_item_counts oic\nJOIN\n    user_avg_item_counts uaic ON oic.user_id = uaic.user_id\nWHERE\n    oic.item_count > uaic.avg_item_count + (3 * uaic.std_item_count)\nORDER BY\n    oic.item_count DESC;', 'description': 'Detect users with unusually high numbers of order items compared to the average number of items per order, potentially indicating bulk purchases or reseller activity, by joining the orders and order_items tables.'}]}}}
"""

In [None]:
# DD Table
# {
#   "name": "projects/ai-learning-agents/locations/us-central1/dataScans/a5f1f8396-cfad-4bc8-a25d-187c34201858",
#   "uid": "1023a6e5-9fd3-424b-9809-3685fe71bf9f",
#   "description": "Data documentation scan for the table - \"events\" with default configuration created through generate insights",
#   "displayName": "events-data-documentation-scan",
#   "state": "ACTIVE",
#   "createTime": "2025-08-07T16:47:05.422593610Z",
#   "updateTime": "2025-08-07T16:47:10.661816006Z",
#   "data": {
#     "resource": "//bigquery.googleapis.com/projects/ai-learning-agents/datasets/thelook/tables/events"
#   },
#   "executionSpec": {
#     "trigger": {
#       "onDemand": {}
#     }
#   },
#   "executionStatus": {
#     "latestJobStartTime": "2025-08-07T16:49:48.930606142Z",
#     "latestJobEndTime": "2025-08-07T16:50:50.878682518Z",
#     "latestJobCreateTime": "2025-08-07T16:49:48.930561242Z"
#   },
#   "type": "DATA_DOCUMENTATION",
#   "dataDocumentationSpec": {},
#   "dataDocumentationResult": {
#     "queries": [
#       {
#         "sql": "SELECT traffic_source, COUNT(DISTINCT user_id) AS unique_users FROM `thelook.events` WHERE sequence_number \u003e 5 GROUP BY traffic_source ORDER BY unique_users DESC;",
#         "description": "Identify the traffic sources with the highest number of unique users who have a sequence number greater than 5, indicating highly engaged users from those sources."
#       },
#       {
#         "sql": "SELECT browser, STDDEV_POP(sequence_number) AS std_dev_sequence_number FROM `thelook.events` GROUP BY browser ORDER BY std_dev_sequence_number DESC;",
#         "description": "Identify the browsers with the highest standard deviation in event sequence numbers, indicating variability in user behavior within those browsers."
#       },
#       {
#         "sql": "SELECT city, COUNT(DISTINCT user_id) AS unique_users, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY city ORDER BY unique_users DESC LIMIT 5;",
#         "description": "Identify the top 5 cities with the highest number of unique users and their corresponding average event sequence number."
#       },
#       {
#         "sql": "SELECT postal_code, COUNT(*) AS event_count, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY postal_code ORDER BY event_count DESC LIMIT 3;",
#         "description": "Identify the top 3 postal codes with the highest number of events, and calculate the average sequence number for events originating from those postal codes."
#       },
#       {
#         "sql": "SELECT browser, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY browser;",
#         "description": "Calculate the average sequence number of events for each user, grouped by browser type, to understand user engagement patterns across different browsers."
#       },
#       {
#         "sql": "SELECT traffic_source, AVG(event_count) AS avg_events_per_session FROM (SELECT traffic_source, session_id, COUNT(*) AS event_count FROM `thelook.events` GROUP BY traffic_source, session_id) GROUP BY traffic_source;",
#         "description": "Calculate the average number of events per session, grouped by traffic source, to understand user engagement levels from different sources."
#       },
#       {
#         "sql": "SELECT browser, traffic_source, COUNT(*) AS event_count, COUNT(*) * 100 / (SELECT COUNT(*) FROM `thelook.events`) AS percentage FROM `thelook.events` GROUP BY browser, traffic_source ORDER BY event_count DESC;",
#         "description": "Calculate the number of events for each browser, traffic_source combination, and then determine the percentage of total events each combination represents."
#       },
#       {
#         "sql": "SELECT EXTRACT(HOUR FROM created_at) AS hour_of_day, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY hour_of_day ORDER BY hour_of_day;",
#         "description": "Calculate the average sequence number for each user, grouped by the hour of the day the event occurred, to identify peak activity times."
#       },
#       {
#         "sql": "SELECT browser, APPROX_QUANTILES(sequence_number, 100)[OFFSET(90)] AS percentile_90 FROM `thelook.events` GROUP BY browser;",
#         "description": "Calculate the 90th percentile of sequence numbers for each browser, providing a measure of high engagement levels within each browser."
#       },
#       {
#         "sql": "SELECT\n  traffic_source,\n  VAR_POP(sequence_number) AS sequence_number_variance\nFROM\n  `thelook.events`\nGROUP BY\n  traffic_source;",
#         "description": "Calculate the population variance of the sequence number of events for each traffic source to understand the spread of user interaction within different traffic channels."
#       },
#       {
#         "sql": "SELECT user_id, APPROX_QUANTILES(sequence_number, 2)[OFFSET(1)] AS median_sequence_number FROM `thelook.events` GROUP BY user_id;",
#         "description": "Calculate the median sequence number for each user, providing a robust measure of user engagement that is less sensitive to outliers."
#       },
#       {
#         "sql": "SELECT\n  city,\n  AVG(events_per_user) AS avg_events_per_user\nFROM (\n  SELECT\n    city,\n    user_id,\n    COUNT(*) AS events_per_user\n  FROM\n    `thelook.events`\n  GROUP BY\n    city,\n    user_id\n)\nGROUP BY\n  city\nORDER BY\n  avg_events_per_user DESC\nLIMIT 5;",
#         "description": "Identify the top 5 cities with the highest average number of events per user to find the most active locations."
#       },
#       {
#         "sql": "SELECT EXTRACT(DAYOFWEEK FROM created_at) AS day_of_week, COUNT(*) * 100 / (SELECT COUNT(*) FROM `thelook.events`) AS percentage FROM `thelook.events` GROUP BY day_of_week ORDER BY day_of_week;",
#         "description": "Calculate the percentage of events that occur on each day of the week, to identify the busiest days for website traffic."
#       },
#       {
#         "sql": "SELECT user_id, MIN(created_at) AS first_event, MAX(created_at) AS last_event FROM `thelook.events` GROUP BY user_id;",
#         "description": "Find the minimum and maximum created_at timestamps for each user, to determine the duration of their activity on the platform."
#       },
#       {
#         "sql": "SELECT\n  browser,\n  AVG(sequence_number) AS avg_sequence_number\nFROM\n  `thelook.events`\nGROUP BY\n  browser\nORDER BY\n  avg_sequence_number DESC\nLIMIT 1;",
#         "description": "Find the browser with the maximum average sequence number to identify the browser with the most engaged users."
#       },
#       {
#         "sql": "SELECT\n  AVG(event_count) AS avg_events_per_session,\n  STDDEV_SAMP(event_count) AS stddev_events_per_session\nFROM (\n  SELECT\n    session_id,\n    COUNT(*) AS event_count\n  FROM\n    `thelook.events`\n  GROUP BY\n    session_id\n);",
#         "description": "Calculate the average number of events per session and the standard deviation to understand session activity."
#       },
#       {
#         "sql": "SELECT traffic_source, COUNT(*) * 100 / (SELECT COUNT(*) FROM `thelook.events`) AS percentage FROM `thelook.events` GROUP BY traffic_source;",
#         "description": "Determine the percentage of events originating from each traffic source, providing insights into the effectiveness of different marketing channels."
#       },
#       {
#         "sql": "SELECT browser, event_type, COUNT(*) AS event_count, ROW_NUMBER() OVER(PARTITION BY browser ORDER BY COUNT(*) DESC) AS rn FROM `thelook.events` GROUP BY browser, event_type QUALIFY rn = 1;",
#         "description": "Determine the most common event type for each browser, providing insights into how different browsers are used on the platform."
#       },
#       {
#         "sql": "SELECT CORR(sequence_number, user_id) FROM `thelook.events`;",
#         "description": "Calculate the correlation between the sequence number and user ID to understand if there's a relationship between user ID and event sequence within sessions."
#       },
#       {
#         "sql": "SELECT state, event_type, COUNT(*) AS event_count FROM `thelook.events` GROUP BY state, event_type ORDER BY state, event_count DESC;",
#         "description": "Determine the distribution of event types across different states, providing insights into regional variations in user behavior."
#       },
#       {
#         "sql": "SELECT city, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` WHERE user_id \u003e 100 GROUP BY city ORDER BY avg_sequence_number DESC LIMIT 5;",
#         "description": "Identify the top 5 cities with the highest average event sequence number for users with user IDs greater than 100."
#       },
#       {
#         "sql": "SELECT browser, traffic_source, AVG(event_count) AS avg_events_per_user FROM (SELECT browser, traffic_source, user_id, COUNT(*) AS event_count FROM `thelook.events` GROUP BY browser, traffic_source, user_id) GROUP BY browser, traffic_source;",
#         "description": "Determine the average number of events per user, broken down by browser and traffic source, to understand user engagement across different channels and browsers."
#       },
#       {
#         "sql": "SELECT state, event_type, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY state, event_type;",
#         "description": "Determine the average sequence number for each event type, grouped by state, to understand how user engagement varies across different event types and regions."
#       },
#       {
#         "sql": "SELECT browser, uri, COUNT(*) AS uri_count, ROW_NUMBER() OVER(PARTITION BY browser ORDER BY COUNT(*) DESC) AS rn FROM `thelook.events` GROUP BY browser, uri QUALIFY rn \u003c= 3;",
#         "description": "Determine the top 3 most frequent URI values for each browser, providing insights into the most popular pages visited by users of different browsers."
#       },
#       {
#         "sql": "SELECT\n  CORR(sequence_number, user_id) AS correlation_coefficient\nFROM\n  `thelook.events`;",
#         "description": "Calculate the correlation between the sequence number of an event within a session and the user ID to understand if user behavior changes over the course of a session."
#       },
#       {
#         "sql": "SELECT\n  browser,\n  AVG(sequence_number) AS avg_sequence_number,\n  STDDEV_SAMP(sequence_number) AS stddev_sequence_number\nFROM\n  `thelook.events`\nGROUP BY\n  browser;",
#         "description": "Determine the average sequence number of events for each browser and calculate the standard deviation to understand the consistency of user interaction across different browsers."
#       },
#       {
#         "sql": "SELECT\n  COVAR_SAMP(user_id, sequence_number) AS covariance\nFROM\n  `thelook.events`;",
#         "description": "Calculate the sample covariance between user ID and sequence number to understand the relationship between user and event order within a session."
#       },
#       {
#         "sql": "SELECT\n  traffic_source,\n  AVG(sequence_number) AS avg_sequence_number\nFROM\n  `thelook.events`\nGROUP BY\n  traffic_source\nORDER BY\n  avg_sequence_number ASC\nLIMIT 1;",
#         "description": "Determine the traffic source with the minimum average sequence number to identify the least engaged traffic channel."
#       }
#     ],
#     "overview": "This table stores records of user interactions and activities on a website or application. It captures various details associated with each event. The data includes information about the user, the event itself, and the context in which it occurred. This allows for analysis of user behavior, tracking of key performance indicators, and identification of trends. The table supports investigations into user journeys and event patterns.",
#     "schema": {
#       "fields": [
#         {
#           "name": "id",
#           "description": "Unique identifier for the event."
#         },
#         {
#           "name": "traffic_source",
#           "description": "The origin or source of the website traffic."
#         },
#         {
#           "name": "city",
#           "description": "The city from which the event originated."
#         },
#         {
#           "name": "uri",
#           "description": "The specific web address or resource being accessed."
#         },
#         {
#           "name": "user_id",
#           "description": "Unique identifier for the user."
#         },
#         {
#           "name": "event_type",
#           "description": "The category or type of user event that occurred."
#         },
#         {
#           "name": "session_id",
#           "description": "Unique identifier for the user's session."
#         },
#         {
#           "name": "created_at",
#           "description": "Timestamp indicating when the event occurred."
#         },
#         {
#           "name": "ip_address",
#           "description": "The IP address of the user who triggered the event."
#         },
#         {
#           "name": "postal_code",
#           "description": "The postal code associated with the event's location."
#         },
#         {
#           "name": "sequence_number",
#           "description": "The order of the event within a user session."
#         },
#         {
#           "name": "browser",
#           "description": "The web browser used by the user."
#         },
#         {
#           "name": "state",
#           "description": "The state or status associated with the event."
#         }
#       ]
#     },
#     "tableResult": {
#       "overview": "This table stores records of user interactions and activities on a website or application. It captures various details associated with each event. The data includes information about the user, the event itself, and the context in which it occurred. This allows for analysis of user behavior, tracking of key performance indicators, and identification of trends. The table supports investigations into user journeys and event patterns.",
#       "schema": {
#         "fields": [
#           {
#             "name": "id",
#             "description": "Unique identifier for the event."
#           },
#           {
#             "name": "traffic_source",
#             "description": "The origin or source of the website traffic."
#           },
#           {
#             "name": "city",
#             "description": "The city from which the event originated."
#           },
#           {
#             "name": "uri",
#             "description": "The specific web address or resource being accessed."
#           },
#           {
#             "name": "user_id",
#             "description": "Unique identifier for the user."
#           },
#           {
#             "name": "event_type",
#             "description": "The category or type of user event that occurred."
#           },
#           {
#             "name": "session_id",
#             "description": "Unique identifier for the user's session."
#           },
#           {
#             "name": "created_at",
#             "description": "Timestamp indicating when the event occurred."
#           },
#           {
#             "name": "ip_address",
#             "description": "The IP address of the user who triggered the event."
#           },
#           {
#             "name": "postal_code",
#             "description": "The postal code associated with the event's location."
#           },
#           {
#             "name": "sequence_number",
#             "description": "The order of the event within a user session."
#           },
#           {
#             "name": "browser",
#             "description": "The web browser used by the user."
#           },
#           {
#             "name": "state",
#             "description": "The state or status associated with the event."
#           }
#         ]
#       },
#       "queries": [
#         {
#           "sql": "SELECT traffic_source, COUNT(DISTINCT user_id) AS unique_users FROM `thelook.events` WHERE sequence_number \u003e 5 GROUP BY traffic_source ORDER BY unique_users DESC;",
#           "description": "Identify the traffic sources with the highest number of unique users who have a sequence number greater than 5, indicating highly engaged users from those sources."
#         },
#         {
#           "sql": "SELECT browser, STDDEV_POP(sequence_number) AS std_dev_sequence_number FROM `thelook.events` GROUP BY browser ORDER BY std_dev_sequence_number DESC;",
#           "description": "Identify the browsers with the highest standard deviation in event sequence numbers, indicating variability in user behavior within those browsers."
#         },
#         {
#           "sql": "SELECT city, COUNT(DISTINCT user_id) AS unique_users, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY city ORDER BY unique_users DESC LIMIT 5;",
#           "description": "Identify the top 5 cities with the highest number of unique users and their corresponding average event sequence number."
#         },
#         {
#           "sql": "SELECT postal_code, COUNT(*) AS event_count, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY postal_code ORDER BY event_count DESC LIMIT 3;",
#           "description": "Identify the top 3 postal codes with the highest number of events, and calculate the average sequence number for events originating from those postal codes."
#         },
#         {
#           "sql": "SELECT browser, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY browser;",
#           "description": "Calculate the average sequence number of events for each user, grouped by browser type, to understand user engagement patterns across different browsers."
#         },
#         {
#           "sql": "SELECT traffic_source, AVG(event_count) AS avg_events_per_session FROM (SELECT traffic_source, session_id, COUNT(*) AS event_count FROM `thelook.events` GROUP BY traffic_source, session_id) GROUP BY traffic_source;",
#           "description": "Calculate the average number of events per session, grouped by traffic source, to understand user engagement levels from different sources."
#         },
#         {
#           "sql": "SELECT browser, traffic_source, COUNT(*) AS event_count, COUNT(*) * 100 / (SELECT COUNT(*) FROM `thelook.events`) AS percentage FROM `thelook.events` GROUP BY browser, traffic_source ORDER BY event_count DESC;",
#           "description": "Calculate the number of events for each browser, traffic_source combination, and then determine the percentage of total events each combination represents."
#         },
#         {
#           "sql": "SELECT EXTRACT(HOUR FROM created_at) AS hour_of_day, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY hour_of_day ORDER BY hour_of_day;",
#           "description": "Calculate the average sequence number for each user, grouped by the hour of the day the event occurred, to identify peak activity times."
#         },
#         {
#           "sql": "SELECT browser, APPROX_QUANTILES(sequence_number, 100)[OFFSET(90)] AS percentile_90 FROM `thelook.events` GROUP BY browser;",
#           "description": "Calculate the 90th percentile of sequence numbers for each browser, providing a measure of high engagement levels within each browser."
#         },
#         {
#           "sql": "SELECT\n  traffic_source,\n  VAR_POP(sequence_number) AS sequence_number_variance\nFROM\n  `thelook.events`\nGROUP BY\n  traffic_source;",
#           "description": "Calculate the population variance of the sequence number of events for each traffic source to understand the spread of user interaction within different traffic channels."
#         },
#         {
#           "sql": "SELECT user_id, APPROX_QUANTILES(sequence_number, 2)[OFFSET(1)] AS median_sequence_number FROM `thelook.events` GROUP BY user_id;",
#           "description": "Calculate the median sequence number for each user, providing a robust measure of user engagement that is less sensitive to outliers."
#         },
#         {
#           "sql": "SELECT\n  city,\n  AVG(events_per_user) AS avg_events_per_user\nFROM (\n  SELECT\n    city,\n    user_id,\n    COUNT(*) AS events_per_user\n  FROM\n    `thelook.events`\n  GROUP BY\n    city,\n    user_id\n)\nGROUP BY\n  city\nORDER BY\n  avg_events_per_user DESC\nLIMIT 5;",
#           "description": "Identify the top 5 cities with the highest average number of events per user to find the most active locations."
#         },
#         {
#           "sql": "SELECT EXTRACT(DAYOFWEEK FROM created_at) AS day_of_week, COUNT(*) * 100 / (SELECT COUNT(*) FROM `thelook.events`) AS percentage FROM `thelook.events` GROUP BY day_of_week ORDER BY day_of_week;",
#           "description": "Calculate the percentage of events that occur on each day of the week, to identify the busiest days for website traffic."
#         },
#         {
#           "sql": "SELECT user_id, MIN(created_at) AS first_event, MAX(created_at) AS last_event FROM `thelook.events` GROUP BY user_id;",
#           "description": "Find the minimum and maximum created_at timestamps for each user, to determine the duration of their activity on the platform."
#         },
#         {
#           "sql": "SELECT\n  browser,\n  AVG(sequence_number) AS avg_sequence_number\nFROM\n  `thelook.events`\nGROUP BY\n  browser\nORDER BY\n  avg_sequence_number DESC\nLIMIT 1;",
#           "description": "Find the browser with the maximum average sequence number to identify the browser with the most engaged users."
#         },
#         {
#           "sql": "SELECT\n  AVG(event_count) AS avg_events_per_session,\n  STDDEV_SAMP(event_count) AS stddev_events_per_session\nFROM (\n  SELECT\n    session_id,\n    COUNT(*) AS event_count\n  FROM\n    `thelook.events`\n  GROUP BY\n    session_id\n);",
#           "description": "Calculate the average number of events per session and the standard deviation to understand session activity."
#         },
#         {
#           "sql": "SELECT traffic_source, COUNT(*) * 100 / (SELECT COUNT(*) FROM `thelook.events`) AS percentage FROM `thelook.events` GROUP BY traffic_source;",
#           "description": "Determine the percentage of events originating from each traffic source, providing insights into the effectiveness of different marketing channels."
#         },
#         {
#           "sql": "SELECT browser, event_type, COUNT(*) AS event_count, ROW_NUMBER() OVER(PARTITION BY browser ORDER BY COUNT(*) DESC) AS rn FROM `thelook.events` GROUP BY browser, event_type QUALIFY rn = 1;",
#           "description": "Determine the most common event type for each browser, providing insights into how different browsers are used on the platform."
#         },
#         {
#           "sql": "SELECT CORR(sequence_number, user_id) FROM `thelook.events`;",
#           "description": "Calculate the correlation between the sequence number and user ID to understand if there's a relationship between user ID and event sequence within sessions."
#         },
#         {
#           "sql": "SELECT state, event_type, COUNT(*) AS event_count FROM `thelook.events` GROUP BY state, event_type ORDER BY state, event_count DESC;",
#           "description": "Determine the distribution of event types across different states, providing insights into regional variations in user behavior."
#         },
#         {
#           "sql": "SELECT city, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` WHERE user_id \u003e 100 GROUP BY city ORDER BY avg_sequence_number DESC LIMIT 5;",
#           "description": "Identify the top 5 cities with the highest average event sequence number for users with user IDs greater than 100."
#         },
#         {
#           "sql": "SELECT browser, traffic_source, AVG(event_count) AS avg_events_per_user FROM (SELECT browser, traffic_source, user_id, COUNT(*) AS event_count FROM `thelook.events` GROUP BY browser, traffic_source, user_id) GROUP BY browser, traffic_source;",
#           "description": "Determine the average number of events per user, broken down by browser and traffic source, to understand user engagement across different channels and browsers."
#         },
#         {
#           "sql": "SELECT state, event_type, AVG(sequence_number) AS avg_sequence_number FROM `thelook.events` GROUP BY state, event_type;",
#           "description": "Determine the average sequence number for each event type, grouped by state, to understand how user engagement varies across different event types and regions."
#         },
#         {
#           "sql": "SELECT browser, uri, COUNT(*) AS uri_count, ROW_NUMBER() OVER(PARTITION BY browser ORDER BY COUNT(*) DESC) AS rn FROM `thelook.events` GROUP BY browser, uri QUALIFY rn \u003c= 3;",
#           "description": "Determine the top 3 most frequent URI values for each browser, providing insights into the most popular pages visited by users of different browsers."
#         },
#         {
#           "sql": "SELECT\n  CORR(sequence_number, user_id) AS correlation_coefficient\nFROM\n  `thelook.events`;",
#           "description": "Calculate the correlation between the sequence number of an event within a session and the user ID to understand if user behavior changes over the course of a session."
#         },
#         {
#           "sql": "SELECT\n  browser,\n  AVG(sequence_number) AS avg_sequence_number,\n  STDDEV_SAMP(sequence_number) AS stddev_sequence_number\nFROM\n  `thelook.events`\nGROUP BY\n  browser;",
#           "description": "Determine the average sequence number of events for each browser and calculate the standard deviation to understand the consistency of user interaction across different browsers."
#         },
#         {
#           "sql": "SELECT\n  COVAR_SAMP(user_id, sequence_number) AS covariance\nFROM\n  `thelook.events`;",
#           "description": "Calculate the sample covariance between user ID and sequence number to understand the relationship between user and event order within a session."
#         },
#         {
#           "sql": "SELECT\n  traffic_source,\n  AVG(sequence_number) AS avg_sequence_number\nFROM\n  `thelook.events`\nGROUP BY\n  traffic_source\nORDER BY\n  avg_sequence_number ASC\nLIMIT 1;",
#           "description": "Determine the traffic source with the minimum average sequence number to identify the least engaged traffic channel."
#         }
#       ],
#       "queryTheme": {}
#     }
#   }
# }

SyntaxError: invalid syntax (<ipython-input-282-063c154fa7b6>, line 1)