# Use the Google Vision API to build an Image Color Palette
This notebook uses the the Google Vision API to get Image Properties from web hosted images.  Specifically it returns the Color Palette of the image in numeric RGB values.  These Numeric Values can be used as input into other data products.

###### Step 1: Get list of image URls
###### Step 2: Scrape images from web
###### Step 3: Load image in to RAM
###### Step 4: Process Image with Google Vision API Image Properties method
###### Step 5: Flatten Response JSON into dataframe
###### Step 6: Load results to BigQuery


In [None]:
#Import Packages
import io
import requests
from google.cloud import vision
from google.cloud.vision import types
from google.cloud import bigquery
import os
import pandas as pd
import shutil
import pandas_gbq

### Authenticate to GCP using a Service Account keyfile
Get a .json keyfile from your GCP project and place it on the machine you are running this on.

In [None]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="C:\Temp\YOURGCPPROJECTNAME-SERVICEACCOUNTCREDENTIALS.json"

In [None]:
#Initialize BigQuery Client
project_id = 'YOURGCPPROJECTNAME'
bq_client = bigquery.Client(project=project_id)

### Step 1: Load a list of image file web urls into a pandas dataframe
In my case, the list of image urls I want to process was in BigQuery already.  For this example I will be using a list of 

In [None]:
#Create SQL statement that returns list of image URLS from BQ
query_string = ("""
SELECT input.image_id, input.image_url
FROM `yourgcpproject.yourdataset.yourinputtablename` input
--LIMIT 2000
  """)

In [None]:
#Query BQ using the SQL statement and load results to pandas frame
df = (
    bq_client.query(query_string)
    .result()
    .to_dataframe()
)

In [None]:
#confirm # of rows in df
df.shape

In [None]:
#examine first few rows of df
pd.set_option('display.max_colwidth', -1)
df.head()

### Step 2: Scrape Image 
### Step 3: Load image in to RAM 
### Step 4: Process Image with Google Vision API Image Properties method
### Step 5: Flatten Response JSON into datafram

In [None]:
#Initialize Google Vision Client
client = vision.ImageAnnotatorClient()

#Set parms for loading results back to BQ.  This is where you will store the results
dataset_ref = bq_client.dataset('yourdataset')
table_ref = dataset_ref.table('yourresultstablename')
table = bq_client.get_table(table_ref)

#create empty dataframe to hold results
df_vision = pd.DataFrame()

In [None]:
#loop through every image URL
for index, row in df.iterrows():
    #df_vision = pd.DataFrame()
    
    #prepare variables
    StyleNbr = None
    ColorCode = None
    #set strings
    url = str(row['image_url'])
    imageid = str(row['image_id'])
    #build local filename
    filename = imageid + '.jfif'
    
    #use python reqeusts package to scrape image from the web and download to local working dir
    response = requests.get(url, stream=True)
    #open local file for writing downloaded image and save
    with open(filename, 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    #clean up web scrape response
    del response
    
    #Open the dowloaded image file into ram
    with io.open(filename, 'rb') as image_file:
        content = image_file.read()
    #set image variable with file
    image = types.Image(content=content)
    
    # Call the google vision API with im memory image file
    response = client.image_properties(image=image)
    props = response.image_properties_annotation
    
    #flatten json response for bq table
    for color in props.dominant_colors.colors:
        score = None
        pixel_fraction = None
        r = None
        g = None
        b = None
        score = color.score
        pixel_fraction = color.pixel_fraction
        r = str(color.color.red)
        g = color.color.green
        b = color.color.blue
        #add row to in memory dataframe
        df_vision = df_vision.append({'DataDate': pd.datetime.now().replace(microsecond=0),
                    'image_id': imageid,
                    'score': score,
                    'pixel_fraction': pixel_fraction,
                    'r': r,
                    'g': g,
                    'b': b},
                    ignore_index=True)

    #coerce data types for happy bq loading
    df_vision['r'] = df_vision['r'].astype(str)
    df_vision['g'] = df_vision['g'].astype(str)
    df_vision['b'] = df_vision['b'].astype(str)
    df_vision['score'] = df_vision['score'].astype(str)
    df_vision['pixel_fraction'] = df_vision['pixel_fraction'].astype(str)
    
    #delete downloaded image file if you don't want to store them.
    os.remove(filename)
        


### Step 6: Load Results back to BigQuery
Once we have the vision api data loaded to a BQ table we can easily use it as input into other data products.

In [None]:
#examine results. 1 row for every image that was in the BQ list from above
df_vision.head()

In [None]:
#load results back to BQ
bq_client.load_table_from_dataframe(df_vision, table_ref).result()