In [None]:
# BigQuery tables:
# project-627b8b21-5a0c-430f-aa1.customer_data.customer
# project-627b8b21-5a0c-430f-aa1.customer_data.orders
# project-627b8b21-5a0c-430f-aa1.customer_data.products
# project-627b8b21-5a0c-430f-aa1.customer_data.ads_vml
# project-627b8b21-5a0c-430f-aa1.customer_data.bkp2


In [2]:
import json

with open('/content/ads.json', 'r') as f:
    ads_data = json.load(f)

# Display the keys to see the structure
display(ads_data.keys())

dict_keys(['age_max', 'age_min', 'age_range', 'brand_safety_content_filter_levels', 'device_platforms', 'facebook_positions', 'flexible_spec', 'genders', 'geo_locations', 'instagram_positions', 'locales', 'publisher_platforms', 'targeting_automation'])


SELECT
    *
FROM
    `project-627b8b21-5a0c-430f-aa1.customer_data.bkp2`
WHERE
    EXISTS (SELECT 1 FROM UNNEST(geo_locations.location_types) as location_type WHERE location_type = 'home')



dict_keys(['age_max', 'age_min', 'age_range', 'brand_safety_content_filter_levels', 'device_platforms', 'facebook_positions', 'flexible_spec', 'genders', 'geo_locations', 'instagram_positions', 'locales', 'publisher_platforms', 'targeting_automation'])

In [4]:
import pandas as pd

try:
    ads_df = pd.DataFrame([ads_data])
    display(ads_df.head())
except Exception as e:
    print(f"Could not convert to DataFrame directly. Error: {e}")
    print("Let's inspect the data structure more closely.")
    display(ads_data)

Unnamed: 0,age_max,age_min,age_range,brand_safety_content_filter_levels,device_platforms,facebook_positions,flexible_spec,genders,geo_locations,instagram_positions,locales,publisher_platforms,targeting_automation
0,65,18,"[18, 54]",[FEED_STRICT],"[mobile, desktop]","[feed, biz_disco_feed, facebook_reels, profile...","[{'family_statuses': [{'id': '6002714398372', ...",[0],"{'countries': ['US'], 'location_types': ['home...","[stream, profile_reels, story, explore, reels,...","[24, 6]","[facebook, instagram]",{'advantage_audience': 1}


In [7]:
from google.cloud import bigquery
from google.oauth2 import service_account

# BigQuery tables:
# project-627b8b21-5a0c-430f-aa1.customer_data.customer
# project-627b8b21-5a0c-430f-aa1.customer_data.orders
# project-627b8b21-5a0c-430f-aa1.customer_data.products
# project-627b8b21-5a0c-430f-aa1.customer_data.ads_vml

project_id = "project-627b8b21-5a0c-430f-aa1"
dataset_id = "customer_data"
ads_table_id = f"{project_id}.{dataset_id}.ads_vml"

# Authenticate using the service account key file
credentials = service_account.Credentials.from_service_account_file('/content/cred.json')

# Initialize BigQuery client with credentials
client = bigquery.Client(credentials=credentials, project=project_id)

# Configure job for loading data
job_config = bigquery.LoadJobConfig(
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, # Overwrite the table if it exists
)

print(f"Loading data into {ads_table_id}...")
job = client.load_table_from_dataframe(
    ads_df, ads_table_id, job_config=job_config
)

job.result()

print(f"Loaded {job.output_rows} rows into {ads_table_id}.")

Loading data into project-627b8b21-5a0c-430f-aa1.customer_data.ads_vml...
Loaded 1 rows into project-627b8b21-5a0c-430f-aa1.customer_data.ads_vml.


In [13]:
from google.cloud import bigquery
from google.oauth2 import service_account
import json
import io

# BigQuery tables:
# project-627b8b21-5a0c-430f-aa1.customer_data.customer
# project-627b8b21-5a0c-430f-aa1.customer_data.orders
# project-627b8b21-5a0c-430f-aa1.customer_data.products

# TODO: Replace with your project_id, dataset_id, and desired table name
project_id = "project-627b8b21-5a0c-430f-aa1"
dataset_id = "customer_data"
ads_table_id = f"{project_id}.{dataset_id}.bkp2"


credentials = service_account.Credentials.from_service_account_file('/content/cred.json')

client = bigquery.Client(credentials=credentials, project=project_id)

ads_data_list = [ads_data]


json_lines = '\n'.join(json.dumps(record) for record in ads_data_list)


# This schema is based on the structure of ads_data
schema = [
    bigquery.SchemaField("age_max", "INTEGER"),
    bigquery.SchemaField("age_min", "INTEGER"),
    bigquery.SchemaField("age_range", "INTEGER", mode="REPEATED"),
    bigquery.SchemaField("brand_safety_content_filter_levels", "STRING", mode="REPEATED"),
    bigquery.SchemaField("device_platforms", "STRING", mode="REPEATED"),
    bigquery.SchemaField("facebook_positions", "STRING", mode="REPEATED"),
    bigquery.SchemaField("flexible_spec", "RECORD", mode="REPEATED", fields=[
        # Define fields within flexible_spec RECORD: Solution for the Task
        bigquery.SchemaField("family_statuses", "RECORD", mode="REPEATED", fields=[
             bigquery.SchemaField("id", "STRING"),
             bigquery.SchemaField("name", "STRING"),
        ]),
        bigquery.SchemaField("interests", "RECORD", mode="REPEATED", fields=[
            bigquery.SchemaField("id", "STRING"),
            bigquery.SchemaField("name", "STRING"),
        ]),
        bigquery.SchemaField("life_events", "RECORD", mode="REPEATED", fields=[
            bigquery.SchemaField("id", "STRING"),
            bigquery.SchemaField("name", "STRING"),
        ]),
    ]),
    bigquery.SchemaField("genders", "INTEGER", mode="REPEATED"),
    bigquery.SchemaField("geo_locations", "RECORD", fields=[
        bigquery.SchemaField("countries", "STRING", mode="REPEATED"),
        bigquery.SchemaField("location_types", "STRING", mode="REPEATED"),

    ]),
    bigquery.SchemaField("instagram_positions", "STRING", mode="REPEATED"),
    bigquery.SchemaField("locales", "INTEGER", mode="REPEATED"),
    bigquery.SchemaField("publisher_platforms", "STRING", mode="REPEATED"),
    bigquery.SchemaField("targeting_automation", "RECORD", fields=[
        bigquery.SchemaField("advantage_audience", "INTEGER"),
    ]),
]

job_config = bigquery.LoadJobConfig(
    schema=schema,
    source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, # Overwrite the table if it exists
)

# Load data from JSON Lines string into BigQuery
print(f"Loading data into {ads_table_id} from JSON Lines...")

job = client.load_table_from_file(
    io.BytesIO(json_lines.encode('utf-8')), ads_table_id, job_config=job_config
)

job.result()

print(f"Loaded {job.output_rows} rows into {ads_table_id}.")

Loading data into project-627b8b21-5a0c-430f-aa1.customer_data.bkp2 from JSON Lines...
Loaded 1 rows into project-627b8b21-5a0c-430f-aa1.customer_data.bkp2.


In [15]:
# Analysts use custom filter criteria
query = """
SELECT
    *
FROM
    `project-627b8b21-5a0c-430f-aa1.customer_data.bkp2`
WHERE
    EXISTS (SELECT 1 FROM UNNEST(geo_locations.location_types) as location_type WHERE location_type = 'home')
"""


query_job = client.query(query)
results = query_job.result()
for row in results:
    print(row)

print(query)

Row((65, 18, [18, 54], ['FEED_STRICT'], ['mobile', 'desktop'], ['feed', 'biz_disco_feed', 'facebook_reels', 'profile_feed', 'marketplace', 'story'], [{'family_statuses': [{'id': '6002714398372', 'name': 'Parents (All)'}], 'interests': [{'id': '6002920953955', 'name': 'Interiordesign (design)'}, {'id': '6002979893723', 'name': 'Renovation(construction)'}, {'id': '6003051498644', 'name': 'Extra SpaceStorage'}, {'id': '6003105618835', 'name': 'Crafts(hobbies)'}, {'id': '6003132926214', 'name': 'Furniture (homefurnishings)'}, {'id': '6003138547539', 'name': 'Bedroom furniture (homefurnishings)'}, {'id': '6003148862840', 'name': 'Shelf(storage)'}, {'id': '6003153995505', 'name': 'Wardrobe'}, {'id': '6003234413249', 'name': 'Homeimprovement (home and garden)'}, {'id': '6003253467911', 'name': 'Modern furniture (homefurnishings)'}, {'id': '6003293328579', 'name': 'Living room(architecture)'}, {'id': '6003401828547', 'name': 'Room(architecture)'}, {'id': '6003470511564', 'name': 'Do it yoursel

In [17]:
# Interest Example:
query = """
SELECT
    *
FROM
    `project-627b8b21-5a0c-430f-aa1.customer_data.bkp2`
WHERE
    EXISTS (
        SELECT 1
        FROM UNNEST(flexible_spec) as fs,
             UNNEST(fs.interests) as interest
        WHERE interest.name = 'Do it yourself(DIY)'
    )
"""

query_job = client.query(query)
results = query_job.result()
for row in results:
    print(row)

print(query)

Row((65, 18, [18, 54], ['FEED_STRICT'], ['mobile', 'desktop'], ['feed', 'biz_disco_feed', 'facebook_reels', 'profile_feed', 'marketplace', 'story'], [{'family_statuses': [{'id': '6002714398372', 'name': 'Parents (All)'}], 'interests': [{'id': '6002920953955', 'name': 'Interiordesign (design)'}, {'id': '6002979893723', 'name': 'Renovation(construction)'}, {'id': '6003051498644', 'name': 'Extra SpaceStorage'}, {'id': '6003105618835', 'name': 'Crafts(hobbies)'}, {'id': '6003132926214', 'name': 'Furniture (homefurnishings)'}, {'id': '6003138547539', 'name': 'Bedroom furniture (homefurnishings)'}, {'id': '6003148862840', 'name': 'Shelf(storage)'}, {'id': '6003153995505', 'name': 'Wardrobe'}, {'id': '6003234413249', 'name': 'Homeimprovement (home and garden)'}, {'id': '6003253467911', 'name': 'Modern furniture (homefurnishings)'}, {'id': '6003293328579', 'name': 'Living room(architecture)'}, {'id': '6003401828547', 'name': 'Room(architecture)'}, {'id': '6003470511564', 'name': 'Do it yoursel

### Targeting Fact-Dimension Data Model

To effectively filter and group ads based on targeting criteria, we can create a normalized data model in BigQuery. This model will consist of the original ad data (or a refined version of it) linked to separate dimension tables for multi-valued attributes.

**1. FACT Table of Ads Data  (`bkp2` and ads_vml):**

*   This table will contain one row per ad or targeting configuration.
*   It will include core ad identifiers and potentially simplified targeting attributes that are single-valued or can be easily represented (e.g., `age_max`, `age_min`, derived `age_group`, `gender` code).
*   It will also contain foreign keys or linking mechanisms to the dimension tables for multi-valued attributes.

**2. Dimension Tables:**

We will create separate tables for attributes that are lists or nested, allowing for a cleaner structure and easier querying:

*   **`dim_countries`**:
    *   `country_code` (e.g., 'US', 'GB') - Primary Key
    *   `country_name` (e.g., 'United States', 'United Kingdom')
*   **`dim_interests`**:
    *   `interest_id` (from JSON) - Primary Key
    *   `interest_name` (from JSON)
*   **`dim_device_platforms`**:
    *   `platform_name` (e.g., 'mobile', 'desktop') - Primary Key
*   **`dim_publisher_platforms`**:
    *   `platform_name` (e.g., 'facebook', 'instagram') - Primary Key
*   **`dim_location_types`**:
    *   `location_type` (e.g., 'home', 'recent') - Primary Key
*   **`dim_brand_safety_levels`**:
    *   `level_name` (e.g., 'FEED_STRICT') - Primary Key
*   **Derived Dimensions (Optional but Recommended):**
    *   **`dim_age_groups`**:
        *   `age_group_id` (e.g., '18-24', '25-34') - Primary Key
        *   `age_group_min`
        *   `age_group_max`
    *   **`dim_genders`**:
        *   `gender_code` (e.g., 0, 1) - Primary Key
        *   `gender_label` (e.g., 'All', 'Male', 'Female')
    *   **`dim_family_statuses`** (from flexible_spec):
        *   `status_id`
        *   `status_name`
    *   **`dim_life_events`** (from flexible_spec):
        *   `event_id`
        *   `event_name`

**3. Linking Tables (for Many-to-Many Relationships):**

Since an ad can target multiple countries, interests, device platforms, etc., we need linking tables to connect the core ad data to the dimension tables.

*   **`ad_country_link`**:
    *   `ad_id` (Foreign Key to core ad data)
    *   `country_code` (Foreign Key to `dim_countries`)
*   **`ad_interest_link`**:
    *   `ad_id`
    *   `interest_id`
*   **`ad_device_platform_link`**:
    *   `ad_id`
    *   `platform_name`
*   *(Similar linking tables for other multi-valued dimensions)*

**Relationship with Existing Ad Table (`bkp2`):**

The `bkp2` table, which currently holds your raw or semi-structured ad targeting data, can be the source for populating these new dimension and linking tables.

*   You would extract the distinct values for each dimension (countries, interests, etc.) from the `bkp2` table to populate the respective dimension tables.
*   You would extract the relationships between each ad and its targeting attributes from `bkp2` to populate the linking tables.

Alternatively, the `bkp2` table itself could be transformed into the "Core Ad Data Table" by flattening some fields and adding derived attributes like `age_group`. The nested fields would then be primarily used to populate the separate dimension and linking tables.

This normalized structure allows analysts to:
*   Filter ads by simply joining with the relevant dimension table (e.g., `JOIN ad_country_link ON ad.id = ad_country_link.ad_id JOIN dim_countries ON ad_country_link.country_code = dim_countries.country_code WHERE dim_countries.country_name = 'United States'`).
*   Easily group ads by dimension attributes.

In [19]:
# Query to find ads targeting 'Interior design' and 'US'
query_interior_design_us = """
SELECT
    *
FROM
    `project-627b8b21-5a0c-430f-aa1.customer_data.bkp2`
WHERE
    EXISTS (
        SELECT 1
        FROM UNNEST(flexible_spec) as fs,
             UNNEST(fs.interests) as interest
        WHERE interest.name = 'Interiordesign (design)'
    )
    AND EXISTS (
        SELECT 1
        FROM UNNEST(geo_locations.countries) as country
        WHERE country = 'US'
    )
"""

print("Executing query for ads targeting 'Interior design' in US:")
query_job_interior_design_us = client.query(query_interior_design_us)
results_interior_design_us = query_job_interior_design_us.result()

# Print the results
for row in results_interior_design_us:
    print(row)

print("\n" + query_interior_design_us)

Executing query for ads targeting 'Interior design' in US:
Row((65, 18, [18, 54], ['FEED_STRICT'], ['mobile', 'desktop'], ['feed', 'biz_disco_feed', 'facebook_reels', 'profile_feed', 'marketplace', 'story'], [{'family_statuses': [{'id': '6002714398372', 'name': 'Parents (All)'}], 'interests': [{'id': '6002920953955', 'name': 'Interiordesign (design)'}, {'id': '6002979893723', 'name': 'Renovation(construction)'}, {'id': '6003051498644', 'name': 'Extra SpaceStorage'}, {'id': '6003105618835', 'name': 'Crafts(hobbies)'}, {'id': '6003132926214', 'name': 'Furniture (homefurnishings)'}, {'id': '6003138547539', 'name': 'Bedroom furniture (homefurnishings)'}, {'id': '6003148862840', 'name': 'Shelf(storage)'}, {'id': '6003153995505', 'name': 'Wardrobe'}, {'id': '6003234413249', 'name': 'Homeimprovement (home and garden)'}, {'id': '6003253467911', 'name': 'Modern furniture (homefurnishings)'}, {'id': '6003293328579', 'name': 'Living room(architecture)'}, {'id': '6003401828547', 'name': 'Room(arch