# Image Downloader

This notebook is querying data from a database table and downloading images from image URLs.

In [None]:
# Necessary Packages
import pandas as pd
import pandas_gbq
import requests
from io import BytesIO
from PIL import Image
import numpy as np
import os
import requests

In [None]:
# Run a SQL query 
# Get a dataset of products from your product image table
sql = """
    SELECT
      product_id,
      product_type,
      max(product_image_url) AS image_url
    FROM <Table Name>
    GROUP BY 1, 2
"""

# Running the query on GCP
project_id = "<GCP Project Name>"
df = pandas_gbq.read_gbq(sql, project_id=project_id)
df.head()

In [None]:
# Function to download images with URL
def download_image(url, output_path):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            with open(output_path, 'wb') as f:
                f.write(response.content)
            return True
        else:
            return False
    except Exception as e:
        return False

# Path to download images
path_folder = "<Location to download>"

## Error List
Some URLs might be broken, therefore we are bucketing them on a python list.

In [None]:
# Loop through unique product_ids
error_list = []
for product_id in df['product_id'].unique():
    # Filter DataFrame for the current product_id
    product_df = df[df['product_id'] == product_id]

    # Try downloading images for each country record
    image_downloaded = False
    for index, row in product_df.iterrows():
        image_url = row['image_url']
        country = row['locale']

        # Define the output path for the downloaded image
        output_path = os.path.join(path_folder, f"{product_id}_{country}.jpg")

        # Try downloading the image
        if not image_downloaded:
            image_downloaded = download_image(image_url, output_path)

    # If no image is downloaded, add the product_id to the error list
    if not image_downloaded:
        error_list.append(product_id)

# Display the list of product_ids with download errors
print("Product IDs with download errors:", error_list)

All images are downloaded onto the given path.

In [None]:
# Export Query Features (Preprocessed image feaatures of range)
from numpy import savetxt
savetxt('range_features.csv', query_features, delimiter=',')