In [7]:
# New Combined API: Dataset + Filter Fields
# No need to import separate field objects!
from util import BrightDataFilter
# Create dataset connections
amazon_products = BrightDataFilter("amazon_products")
shopee = BrightDataFilter("shopee")
amazon_walmart = BrightDataFilter("amazon_walmart")

print("âœ… Dataset connections created with built-in filter fields")
print(f"Amazon Products: {amazon_products.dataset_id}")
print(f"Shopee: {shopee.dataset_id}")
print(f"Amazon-Walmart: {amazon_walmart.dataset_id}")

# Each dataset provides its own filter fields
print("\nðŸ“Š Available filter fields:")
print(f"Amazon fields: {len(amazon_products.filter.get_field_names())} fields")
print(f"Shopee fields: {len(shopee.filter.get_field_names())} fields")
print(f"Amazon-Walmart fields: {len(amazon_walmart.filter.get_field_names())} fields")


âœ… Dataset connections created with built-in filter fields
Amazon Products: gd_l7q7dkf244hwjntr0
Shopee: gd_lk122xxgf86xf97py
Amazon-Walmart: gd_m4l6s4mn2g2rkx9lia

ðŸ“Š Available filter fields:
Amazon fields: 52 fields
Shopee fields: 36 fields
Amazon-Walmart fields: 63 fields


In [6]:
# Create filters using the dataset's built-in filter fields
# Much cleaner and more intuitive!

# Amazon Products filters
amazon_high_rated = amazon_products.filter.rating >= 4.5
amazon_affordable = amazon_products.filter.final_price <= 100
amazon_apple_products = amazon_products.filter.brand == "Apple"

# Shopee filters (using correct field names)
shopee_popular = shopee.filter.rating >= 4.0
shopee_cheap = shopee.filter.final_price <= 50

# Amazon-Walmart comparison filters
price_difference = amazon_walmart.filter.price_difference > 20

print("âœ… Filters created using dataset's built-in filter fields")
print(f"Amazon high-rated: {amazon_high_rated}")
print(f"Amazon affordable: {amazon_affordable}")
print(f"Amazon Apple products: {amazon_apple_products}")
print(f"Shopee popular: {shopee_popular}")
print(f"Shopee cheap: {shopee_cheap}")
print(f"Price difference: {price_difference}")


âœ… Filters created using dataset's built-in filter fields
Amazon high-rated: rating >= 4.5
Amazon affordable: final_price <= 100
Amazon Apple products: brand = Apple
Shopee popular: rating >= 4.0
Shopee cheap: final_price <= 50
Price difference: price_difference > 20


In [3]:
# Complex filters using logical operators
# All using the same dataset's filter fields

# Complex Amazon filter
amazon_complex = (
    (amazon_products.filter.rating >= 4.0) &
    (amazon_products.filter.final_price <= 200) &
    (amazon_products.filter.brand == "Apple") &
    (amazon_products.filter.is_available.is_true())
)

# Complex Shopee filter (using correct field names)
shopee_complex = (
    (shopee.filter.rating >= 4.0) &
    (shopee.filter.final_price <= 100) &
    (shopee.filter.is_available.is_true())
)

print("âœ… Complex filters created")
print(f"Amazon complex filter: {amazon_complex}")
print(f"Number of conditions: {len(amazon_complex.filters)}")
print(f"Shopee complex filter: {shopee_complex}")
print(f"Number of conditions: {len(shopee_complex.filters)}")


âœ… Complex filters created
Amazon complex filter: (
  rating >= 4.0
  AND
  final_price <= 200
  AND
  brand = Apple
  AND
  is_available = True
)
Number of conditions: 4
Shopee complex filter: (
  rating >= 4.0
  AND
  final_price <= 100
  AND
  is_available = True
)
Number of conditions: 3


In [5]:
# Get information about the current dataset

info = amazon_products.get_dataset_info()
print(f"Dataset: {info['name']}")
print(f"Available fields: {len(info['available_fields'])}")

# Get field reference for current dataset
field_ref = amazon_products.get_field_reference()
print(f"\nðŸ“‹ Field Reference (showing first 10 fields):")
for i, (field_name, description) in enumerate(field_ref.items()):
    if i >= 10:  # Show only first 10 fields
        print(f"... and {len(field_ref) - 10} more fields")
        break
    print(f"{field_name}: {description}")

Dataset: Amazon Products
Available fields: 52

ðŸ“‹ Field Reference (showing first 10 fields):
title: Product title
asin: Unique identifier for each product
parent_asin: Parent ASIN of the product
brand: Product brand
description: A brief description of the product
categories: Product categories
initial_price: Initial price
final_price: Final price of the product
final_price_high: Highest value of the final price when it is a range
currency: Currency of the product
... and 42 more fields
