Copyright 2025 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");

In [None]:
#@title See full license here

# Copyright 2025 Google LLC

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#  Push Products to Image Inventory for Classification

## Description

This Colab notebook provides an alternative way to selectively classify products
using Image Inventory.

Instead of having the solution "always-on" (meaning that the entire feed is classified and new products are classified as they are being added), this solution allows you manually select a subset of products to run through the classification pipeline.

## References

For more info, please check out the repository on [Github (google-marketing-solutions/image-inventory)](https://github.com/google-marketing-solutions/image-inventory)


## How it works

This notebook utilizes the same codebase as the "always-on" deployment.

+ In the "always-on" deployment - push_products_lib is called by a Cloud Run Function

+ In this colab notebook - you are manually calling the methods in push_products_lib.

## Instructions

To begin:

1. Start and connect to a runtime (in the top right corner)
2. Execute each code block (in sequence) by pressing the play icon next to each block.

+ **Authenticate user** - sets up authentication for Google services
+ **Install repo & dependencies** - clones repo from Github & uses pip to install required modules
   + Note: this step may require you to restart the runtime.
+ **Fill out form fields** - sets required configuration values
   + Note: if your Image Inventory deployment uses default values for the [optional config parameters](https://github.com/google-marketing-solutions/image-inventory?tab=readme-ov-file#4-provide-values-for-variables), then you only need to fill out ```project_id```
   + Be sure to rerun this code block after changing any values.
+ **Define product filters** - choose one or more values that will be used to filter products in the feed
   + For product type, the filter uses a "starts with" comparison, so when you provide a product type like "Electronics," it will identify and include any product types that begin with "Electronics," such as "Electronics > TVs" or "Electronics > Audio.
+ **Pull set of products** - this stages the set of products from BigQuery for classification, the product data is shown in a table for validation purposes.
   + If ```exclude_previously_clasified_products``` is enabled, then the solution will only pull in product image links that have not been run through Image Inventory before
   + When ```exclude_previously_clasified_products``` is disabled and if multiple products in the pull have the same link, those links will be processed multiple times.
+ **Push set of products for analysis** - this is the step where products are pushed to Image Inventory for analysis.
+ **Get generated classifications** - after Image Inventory has finished classifying the submitted products, this block queries the ```get_product_image_classifications``` BigQuery view to get the classifications by product.

In [None]:
# @title ### Authenticate user

from google.colab import auth
auth.authenticate_user()
print('Authenticated!')

In [None]:
# @title ### Install repo & python dependencies

import os
import datetime
import pandas as pd

from google.colab import data_table
data_table.enable_dataframe_formatter()

# Clone Github repo
repo_path = '/content/image-inventory'
if not os.path.exists(repo_path):
  os.system('git clone https://github.com/google-marketing-solutions/image-inventory.git &> /dev/null')

# Install python packages (may require restarting runtime)
!pip install -q -r "/content/image-inventory/src/push_products/requirements.txt" &> /dev/null

# Import push_products_lib
push_products_lib = __import__("image-inventory.src.push_products.push_products_lib", fromlist=[''])

print('Successfully installed!')

In [None]:

# @title ### Fill out form fields { display-mode: "form" }
# @markdown ### Required Values
project_id = "" # @param {"type":"string","placeholder":"Your Google Cloud Project ID"}

# @markdown ---
# @markdown ### Optional Values (uses default values from Image Inventory)
dataset_id = "image_inventory" # @param {"type":"string","placeholder":"image_inventory"}
location  =  'us-central1' # @param {"type":"string","placeholder":"us-central1"}
queue_id = 'classify-products-queue' # @param {"type":"string","placeholder":"classify_products_queue"}
cloud_function_url = "" # @param {"type":"string"}
if cloud_function_url == '':
  cloud_function_url = f'https://{location}-{project_id}.cloudfunctions.net/classify-product-tf'

if not project_id:
  raise ValueError("No project ID provided")

product_pusher = push_products_lib.ProductPusher(project_id, dataset_id, location, queue_id)

print('Successfully configured!')

In [None]:
# @title Define product filters (choose one or more) { display-mode: "form" }
product_type = "" # @param {"type":"string"}
brands = "" # @param {"type":"string"}
skus = "" # @param {"type":"string"}

brands_list = [brand.strip().lower() for brand in brands.split(',')] if brands else []
sku_list = [sku.strip().lower() for sku in skus.split(',')] if skus else []

product_filter = push_products_lib.ProductFilter(product_type, brands_list, sku_list)

print('Successfully defined filters!')

In [None]:
# @title Pull set of Products { display-mode: "form" }
product_limit = "10" # @param {"type":"string"}
exclude_previously_clasified_products = True # @param {type:"boolean"}

if exclude_previously_clasified_products:
  products = product_pusher.get_new_products_from_view(product_limit=int(product_limit), product_filter=product_filter)
else:
  products = product_pusher.get_all_products_from_view(product_limit=int(product_limit), product_filter=product_filter)

print('Successfully pulled %d products!' % len(products))
data_table.DataTable(pd.DataFrame(products))

In [None]:
# @title Push set of products for analysis

if not product_pusher.is_queue_empty():
  raise ValueError("Queue is not empty!")

try:
  start_timestamp = datetime.datetime.now()
  product_pusher.push_products(products, cloud_function_url=cloud_function_url)
  print('Successfully pushed %d products to the queue!' % len(products))
except Exception as e:
  print('Error pushing products to the queue!')

In [None]:
# @title Check if queue is empty (meaning all products are finished processing)

if product_pusher.is_queue_empty():
  print('Queue is empty!')
else:
  print('Queue is not empty!')

In [None]:
# @title Get generated classifications

sql_filter = product_filter.get_sql_filter()
timestamp_string = start_timestamp.isoformat(timespec='microseconds') + ' UTC'
query = (
    'SELECT T.* EXCEPT (offers),'
    '(SELECT ARRAY_TO_STRING(ARRAY_AGG(DISTINCT o.offer_id), ", ") FROM UNNEST(offers) AS o) AS offer_ids,'
    '(SELECT ARRAY_TO_STRING(ARRAY_AGG(DISTINCT o.image_type), ", ") FROM UNNEST(offers) AS o) AS image_types'
    f' FROM {product_pusher.project_id}.{product_pusher.dataset_id}.get_product_image_classifications AS T'
    f' WHERE timestamp > TIMESTAMP("{timestamp_string}")'
    f' LIMIT 1000'
)
data_table.DataTable(product_pusher.bigquery_client.query(query).to_dataframe())
