# Environment Initialization

In [None]:
!pip3 install -Uq pip
!pip3 install -Uq google-cloud-vision
!pip3 install -q requests wget

In [None]:
import os
from google.cloud import vision
from google.protobuf import field_mask_pb2 as field_mask
from google.cloud import storage
from google.colab import auth
from google.protobuf.json_format import MessageToJson

import requests # to get image from the web
import shutil # to save it locally
from uuid import uuid4
import csv

In [None]:
PROJECT_ID = 'dubhacks-292818'
LOCATION_ID = 'us-east1'
PRODUCT_SET_ID = 'product_set'
PRODUCT_CATEGORY = 'apparel-v2'
IMAGE_BUCKET = 'dubhacks-ref-images'
VISION_BUCKET = 'dubhacks-vision-bucket'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/content/dubhacks-292818-2f87e46570da.json' # TODO
os.environ['PROJECT_ID'] = PROJECT_ID
os.environ['LOCATION_ID'] = LOCATION_ID

# Google Vision API Python SDK

In [None]:
def create_product_set(
        project_id, location, product_set_id, product_set_display_name):
    """Create a product set.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
        product_set_display_name: Display name of the product set.
    """
    client = vision.ProductSearchClient()

    # A resource that represents Google Cloud Platform location.
    location_path = f"projects/{project_id}/locations/{location}"

    # Create a product set with the product set specification in the region.
    product_set = vision.ProductSet(
            display_name=product_set_display_name)

    # The response is the product set with `name` populated.
    response = client.create_product_set(
        parent=location_path,
        product_set=product_set,
        product_set_id=product_set_id)

    # Display the product set information.
    print('Product set name: {}'.format(response.name))

## Import Product Sets

In [None]:
# Format CSV as https://cloud.google.com/vision/product-search/docs/csv-format
def import_product_sets(project_id, location, gcs_uri):
    """Import images of different products in the product set.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        gcs_uri: Google Cloud Storage URI.
            Target files must be in Product Search CSV format.
    """
    client = vision.ProductSearchClient()

    # A resource that represents Google Cloud Platform location.
    location_path = f"projects/{project_id}/locations/{location}"

    # Set the input configuration along with Google Cloud Storage URI
    gcs_source = vision.ImportProductSetsGcsSource(
        csv_file_uri=gcs_uri)
    input_config = vision.ImportProductSetsInputConfig(
        gcs_source=gcs_source)

    # Import the product sets from the input URI.
    response = client.import_product_sets(
        parent=location_path, input_config=input_config)

    print('Processing operation name: {}'.format(response.operation.name))
    # synchronous check of operation status
    result = response.result()
    print('Processing done.')

    for i, status in enumerate(result.statuses):
        print('Status of processing line {} of the csv: {}'.format(
            i, status))
        # Check the status of reference image
        # `0` is the code for OK in google.rpc.Code.
        if status.code == 0:
            reference_image = result.reference_images[i]
            print(reference_image)
        else:
            print('Status code not OK: {}'.format(status.message))

In [None]:
def list_product_sets(project_id, location):
    """List all product sets.
    Args:
        project_id: Id of the project.
        location: A compute region name.
    """
    client = vision.ProductSearchClient()

    # A resource that represents Google Cloud Platform location.
    location_path = f"projects/{project_id}/locations/{location}"

    # List all the product sets available in the region.
    product_sets = client.list_product_sets(parent=location_path)

    # Display the product set information.
    for product_set in product_sets:
        print('Product set name: {}'.format(product_set.name))
        print('Product set id: {}'.format(product_set.name.split('/')[-1]))
        print('Product set display name: {}'.format(product_set.display_name))
        print('Product set index time: ')
        print(product_set.index_time)

In [None]:
def get_product_set(project_id, location, product_set_id):
    """Get info about the product set.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
    """
    client = vision.ProductSearchClient()

    # Get the full path of the product set.
    product_set_path = client.product_set_path(
        project=project_id, location=location,
        product_set=product_set_id)

    # Get complete detail of the product set.
    product_set = client.get_product_set(name=product_set_path)

    # Display the product set information.
    print('Product set name: {}'.format(product_set.name))
    print('Product set id: {}'.format(product_set.name.split('/')[-1]))
    print('Product set display name: {}'.format(product_set.display_name))
    print('Product set index time: ')
    print(product_set.index_time)

## List Products

In [None]:
def list_products(project_id, location):
    """List all products.
    Args:
        project_id: Id of the project.
        location: A compute region name.
    """
    client = vision.ProductSearchClient()

    # A resource that represents Google Cloud Platform location.
    location_path = f"projects/{project_id}/locations/{location}"

    # List all the products available in the region.
    products = client.list_products(parent=location_path)

    # Display the product information.
    for product in products:
        print('Product name: {}'.format(product.name))
        print('Product id: {}'.format(product.name.split('/')[-1]))
        print('Product display name: {}'.format(product.display_name))
        print('Product description: {}'.format(product.description))
        print('Product category: {}'.format(product.product_category))
        print('Product labels: {}\n'.format(product.product_labels))

In [None]:
def get_product(project_id, location, product_id):
    """Get information about a product.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_id: Id of the product.
    """
    client = vision.ProductSearchClient()

    # Get the full path of the product.
    product_path = client.product_path(
        project=project_id, location=location, product=product_id)

    # Get complete detail of the product.
    product = client.get_product(name=product_path)

    # Display the product information.
    print('Product name: {}'.format(product.name))
    print('Product id: {}'.format(product.name.split('/')[-1]))
    print('Product display name: {}'.format(product.display_name))
    print('Product description: {}'.format(product.description))
    print('Product category: {}'.format(product.product_category))
    print('Product labels: {}'.format(product.product_labels))

In [None]:
def list_products_in_product_set(
        project_id, location, product_set_id):
    """List all products in a product set.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
    """
    client = vision.ProductSearchClient()

    # Get the full path of the product set.
    product_set_path = client.product_set_path(
        project=project_id, location=location,
        product_set=product_set_id)

    # List all the products available in the product set.
    products = client.list_products_in_product_set(name=product_set_path)

    # Display the product information.
    count = 0
    for product in products:
        count += 1
        print('Product name: {}'.format(product.name))
        print('Product id: {}'.format(product.name.split('/')[-1]))
        print('Product display name: {}'.format(product.display_name))
        print('Product description: {}'.format(product.description))
        print('Product category: {}'.format(product.product_category))
        print('Product labels: {}'.format(product.product_labels))
    print("Total products: ", count)

## Search Images

In [None]:
def get_reference_image_uri(
        project_id, location, product_id):
    """List all images in a product.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_id: Id of the product.
    """
    client = vision.ProductSearchClient()

    # Get the full path of the product.
    product_path = client.product_path(
        project=project_id, location=location, product=product_id)

    # List all the reference images available in the product.
    reference_images = client.list_reference_images(parent=product_path)

    # Display the reference image information.
    for image in reference_images:
      return image.uri
    #     print('Reference image name: {}'.format(image.name))
    #     print('Reference image id: {}'.format(image.name.split('/')[-1]))
    #     print('Reference image uri: {}'.format(image.uri))
    #     print('Reference image bounding polygons: {}'.format(
    #         image.bounding_polys))

In [None]:
def get_image_metadata(bucket_name, blob_name):
    """Prints out a blob's metadata."""
    # bucket_name = 'your-bucket-name'
    # blob_name = 'your-object-name'

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.get_blob(blob_name)

    return blob.metadata

In [None]:
def convert(labelsStr):
  """
  Converts protobuf format of product labels returned by Vision API
  into a dictionary of key-value pairs
  """
  labelObj = {}
  labelsStr = ' '.join(labelsStr.strip()[1:-1].split('\n'))
  for label in labelsStr.split(','):
    key = label.split(' value: ')[0].split('key: ')[1].strip().strip('"')
    value = label.split(' value: ')[1].strip().strip('"')
    labelObj[key] = value
  return labelObj


In [None]:
def get_similar_products_uri(
        project_id, location, product_set_id, product_category,
        image_uri, filter):
    """Search similar products to image.
    Args:
        project_id: Id of the project.
        location: A compute region name.
        product_set_id: Id of the product set.
        product_category: Category of the product.
        image_uri: Cloud Storage location of image to be searched.
        filter: Condition to be applied on the labels.
        Example for filter: (color = red OR color = blue) AND style = kids
        It will search on all products with the following labels:
        color:red AND style:kids
        color:blue AND style:kids
    """
    # product_search_client is needed only for its helper methods.
    product_search_client = vision.ProductSearchClient()
    image_annotator_client = vision.ImageAnnotatorClient()

    # Create annotate image request along with product search feature.
    image_source = vision.ImageSource(image_uri=image_uri)
    image = vision.Image(source=image_source)

    # product search specific parameters
    product_set_path = product_search_client.product_set_path(
        project=project_id, location=location,
        product_set=product_set_id)
    product_search_params = vision.ProductSearchParams(
        product_set=product_set_path,
        product_categories=[product_category],
        filter=filter)
    image_context = vision.ImageContext(
        product_search_params=product_search_params)

    # Search products similar to the image.
    response = image_annotator_client.product_search(
        image, image_context=image_context)

    index_time = response.product_search_results.index_time
    print('Product set index time: ')
    print(index_time)

    results = response.product_search_results.results

    print('Search results:')
    for result in results:
        product = result.product

        print('Score(Confidence): {}'.format(result.score))
        # print('Image name: {}'.format(result.image))

        # print('Product name: {}'.format(product.name))
        # print('Product display name: {}'.format(
        #     product.display_name))
        # print('Product description: {}'.format(product.description))
        # # labels.append(str(product.product_labels))
        # labels = convert(str(product.product_labels))
        # print('Product labels:'.format(labels))
        # for key,val in labels.items():
        #   print(f'{key}: {val}')
        product_id = product.name.split('/')[-1]
        image_uri = get_reference_image_uri(PROJECT_ID, LOCATION_ID, product_id)
        blob_name = image_uri.split('/')[-1]
        meta = get_image_metadata(IMAGE_BUCKET, blob_name)
        print("Product Info: ", meta)

## Examples

In [None]:
# Create one product set
create_product_set(PROJECT_ID, LOCATION_ID, 'test_product_set_id2', 'test_product_set2')
# Create everything (product sets, products, and reference images) based on csv file
import_product_sets(PROJECT_ID, LOCATION_ID, "gs://cloud-samples-data/vision/product_search/product_catalog.csv")
# List all product sets
list_product_sets(PROJECT_ID, LOCATION_ID)
# Get one product set
get_product_set(PROJECT_ID, LOCATION_ID, PRODUCT_SET_ID)
# Get one product
get_product(PROJECT_ID, LOCATION_ID, 'product_id78')
# Search for products
get_similar_products_uri(PROJECT_ID, LOCATION_ID, PRODUCT_SET_ID, 'apparel-v2', 'https://cdn.shopify.com/s/files/1/0017/2100/8243/products/CWJ-1_BROWN_61e80a3c-a589-43ab-89e7-1d8c69e04d25_2000x.jpg?v=1589312314', '')

# Scraping to Products Pipeline

## Download image from uri


In [None]:
# https://towardsdatascience.com/how-to-download-an-image-using-python-38a75cfa21c
def downloadImage(image_url):
  try:
    ## Set up the image URL and filename
    extension = '.' + image_url.split("/")[-1].split('?')[0].split('.')[-1]
    filename = 'image' + extension

    # Open the url image, set stream to True, this will return the stream content.
    r = requests.get(image_url, stream = True)

    # Check if the image was retrieved successfully
    if r.status_code == 200:
      # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
      r.raw.decode_content = True
      
      # Open a local file with wb ( write binary ) permission.
      with open(filename,'wb') as f:
        shutil.copyfileobj(r.raw, f)
    return filename
  except:
    print('Image Couldn\'t be retreived: ', image_url)

## Upload image to Google Cloud Storage

In [None]:
# https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
def set_image_metadata(bucket, image_blob_name, metadata=''):
  """Uploads a file to the bucket."""
  # bucket_name = "your-bucket-name"
  # source_file_name = "local/path/to/file"
  # destination_blob_name = "storage-object-name"

  blob = bucket.blob(image_blob_name)
  blob.metadata = metadata
  blob.patch()

In [None]:
# https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
def upload_blob(bucket, source_file_name, destination_blob_name):
  """Uploads a file to the bucket."""
  # bucket_name = "your-bucket-name"
  # source_file_name = "local/path/to/file"
  # destination_blob_name = "storage-object-name"

  blob = bucket.blob(destination_blob_name)

  blob.upload_from_filename(source_file_name)
  blob.patch()


## Read recommendations file


`recommendations.csv` file format:

| Name                           | Price | Link                          | Image                       | Brand          |
|--------------------------------|-------|-------------------------------|-----------------------------|----------------|
| BOTANICAL REVERSIBLE FACE MASK | 50.0  | https://biancaspender.com/... | https://cdn.shopify.com/... | Bianca Spender |

In [None]:
visionProductsList = []
PRODUCT_SET_ID = 'products_set_labelled'
IMAGE_BUCKET = 'dubhacks-images-labelled-bucket'
storage_client = storage.Client()
bucket = storage_client.bucket(IMAGE_BUCKET)
with open('./recommendations.csv') as f:
  for line in f:
    try:
      line = line.strip().split(',')
      
      # Upload image to GCS
      imageFile = downloadImage(line[3])
      extension = '.' + imageFile.split('.')[-1]
      imageId = str(uuid4())
      newImageFile = imageId+extension
      upload_blob(bucket, imageFile, newImageFile)
      query_labels = str(detect_labels_uri('gs://'+IMAGE_BUCKET+'/'+newImageFile))
      labels = {'name': line[0],
                'price': line[1],
                'link': line[2],
                'imageLink': line[3],
                'brand': line[4],
                'labels': query_labels}
      set_image_metadata(bucket, newImageFile, labels)

      productId = str(uuid4())
      visionProductsList.append(['gs://'+IMAGE_BUCKET+'/'+newImageFile, imageId, PRODUCT_SET_ID, productId, PRODUCT_CATEGORY, '', '', ''])
    except Exception as e:
      print(e)

## Upload products file to GCS

In [None]:
CATALOG = 'vision_product_search_product_catalog.csv'
bucket = storage_client.bucket(VISION_BUCKET)
with open(CATALOG, 'w') as file:
  write = csv.writer(file) 
  write.writerows(visionProductsList)

upload_blob(bucket, CATALOG, CATALOG)

## Import Vision Product Set

In [None]:
import_product_sets(PROJECT_ID, LOCATION_ID, "gs://"+VISION_BUCKET+"/"+CATALOG)

# Search

In [None]:
image='https://cdn.shopify.com/s/files/1/1004/1966/products/STORYmfg_Vertical03_900x.jpg?v=1573147039'
get_similar_products_uri(PROJECT_ID, LOCATION_ID, PRODUCT_SET_ID, 'apparel-v2', image, '')

# Image Classification

In [None]:
def detect_labels_uri(uri):
    """Detects labels in the file located in Google Cloud Storage or on the
    Web."""
    from google.cloud import vision
    client = vision.ImageAnnotatorClient()
    image = vision.Image()
    image.source.image_uri = uri

    response = client.label_detection(image=image)
    labels = response.label_annotations

    # for label in labels:
    #     print(label.description)
    print(response.error)
    if response.error.message:
        raise Exception(
            '{}'+uri+'\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))
    return [label.description for label in labels]

In [None]:
print(detect_labels_uri('https://cdn.shopify.com/s/files/1/0373/2642/2152/products/IMG_2918_420x.jpg?v=1602820943'))