In [88]:
import google.cloud.vision as gcv
from google.oauth2 import service_account
from google.protobuf.json_format import MessageToDict
import json

In [155]:
web_detection_params = gcv.types.WebDetectionParams(include_geo_results=True)
image_context = gcv.types.ImageContext(web_detection_params=web_detection_params)

In [16]:
# credentials must be loaded as below, otherwise there will be an error
credentials = service_account.Credentials.from_service_account_file('credentials/ArnottsAU-7991416de13b.json')

In [16]:
client = gcv.ImageAnnotatorClient(credentials=credentials)

In [182]:
# read image file as binary
img = gcv.types.Image(content=open('pictures/picture_1722024574542749.jpg', 'rb').read())

In [183]:
f = open('pictures/picture_1722024574542749.jpg', 'rb').read()

r = MessageToDict(client.annotate_image({'image': 
                       {'content': f}, 'image_context': image_context}), 
                  preserving_proto_field_name = True)

In [184]:
face_feats = 'joy sorrow anger surprise under_exposed blurred headwear'.split()

In [185]:
list(r)

['logo_annotations',
 'label_annotations',
 'text_annotations',
 'safe_search_annotation',
 'image_properties_annotation',
 'crop_hints_annotation',
 'full_text_annotation',
 'web_detection',
 'localized_object_annotations']

### Face

In [186]:
n_faces = len(r['face_annotations']) if 'face_annotations' in r else 0
print(f'found {n_faces} face(s)')

found 0 face(s)


In [187]:
for e in face_feats:
    print(f'{e}: {r["face_annotations"][0].get(e + "_likelihood", None)}')

KeyError: 'face_annotations'

### Logos

In [188]:
n_logos = len(r['logo_annotations']) if 'logo_annotations' in r else 0
print(f'found {n_logos} logo(s)')

found 1 logo(s)


In [189]:
r['logo_annotations'][0]['description']

'Tim Tam'

### Labels

In [190]:
n_labels= len(r['label_annotations']) if 'label_annotations' in r else 0
print(f'found {n_labels} label(s)')

found 8 label(s)


In [193]:
for l in r['label_annotations']:
    print(f'{l["description"]}, score: {l["score"]}')

drink, score: 0.7965183854103088
food, score: 0.7143707275390625
sundae, score: 0.6274505853652954
liqueur, score: 0.6033880114555359
dairy product, score: 0.5961754322052002
font, score: 0.5572941303253174
chocolate, score: 0.5479716658592224
dessert, score: 0.5367579460144043


### Text in Picture

In [194]:
n_texts = len(r['text_annotations']) if 'text_annotations' in r else 0
print(f'found {n_texts} text(s)')

found 11 text(s)


In [195]:
for i, t in enumerate(r['text_annotations'], 1):
    print(f'#{i} -- language: {t["locale"] if "locale" in t  else "?"}, text: {t["description"]}')

#1 -- language: en, text: TimTam
Super Scrummy
Chocolate Tim Tam
Milkshake
REARNOTTS
TimTam
ORIGINAL

#2 -- language: ?, text: TimTam
#3 -- language: ?, text: Super
#4 -- language: ?, text: Scrummy
#5 -- language: ?, text: Chocolate
#6 -- language: ?, text: Tim
#7 -- language: ?, text: Tam
#8 -- language: ?, text: Milkshake
#9 -- language: ?, text: REARNOTTS
#10 -- language: ?, text: TimTam
#11 -- language: ?, text: ORIGINAL


### Restricted Themes

In [196]:
r['safe_search_annotation']

{'adult': 'UNLIKELY',
 'spoof': 'UNLIKELY',
 'medical': 'VERY_UNLIKELY',
 'violence': 'VERY_UNLIKELY',
 'racy': 'VERY_LIKELY'}

### Colors
higher "scores" means higher confidence that the color in question is prominent in the central focus of the image

In [197]:
for c in r['image_properties_annotation']['dominant_colors']['colors']:
    print(c['color'], c['pixel_fraction'], c['score'])

{'red': 194.0, 'green': 138.0, 'blue': 86.0} 0.02287253551185131 0.20439013838768005
{'red': 216.0, 'green': 193.0, 'blue': 168.0} 0.013833089731633663 0.04273355007171631
{'red': 14.0, 'green': 8.0, 'blue': 8.0} 0.5066198110580444 0.03537030890583992
{'red': 124.0, 'green': 114.0, 'blue': 114.0} 0.0221877284348011 0.00440334714949131
{'red': 167.0, 'green': 111.0, 'blue': 65.0} 0.027711832895874977 0.17405693233013153
{'red': 119.0, 'green': 72.0, 'blue': 35.0} 0.051132213324308395 0.13442160189151764
{'red': 192.0, 'green': 143.0, 'blue': 117.0} 0.012372169643640518 0.03823941946029663
{'red': 110.0, 'green': 72.0, 'blue': 47.0} 0.035609934478998184 0.0373784676194191
{'red': 203.0, 'green': 136.0, 'blue': 67.0} 0.0018261504592373967 0.03437772020697594
{'red': 84.0, 'green': 45.0, 'blue': 25.0} 0.030542366206645966 0.028653955087065697


In [198]:
r['full_text_annotation']['pages'][0]['property']['detected_languages']

[{'language_code': 'en', 'confidence': 0.7699999809265137}]

### Full Text Annotations

In [199]:
print('pages: ', len(r['full_text_annotation']['pages']))

pages:  1


In [200]:
for p in range(len(r['full_text_annotation']['pages'])):
    print(r['full_text_annotation']['pages'][p]['property']['detected_languages'])

[{'language_code': 'en', 'confidence': 0.7699999809265137}]


In [201]:
r['full_text_annotation']['text']

'TimTam\nSuper Scrummy\nChocolate Tim Tam\nMilkshake\nREARNOTTS\nTimTam\nORIGINAL\n'

### Web Detection
This one gives us 
* web_entities
* visually_similar_images
* best_guess_labels
Note: there's an overall relevancy score for the entity, not normalized and not comparable across different image queries.

In [202]:
web_ents = len(r['web_detection']['web_entities'])
print(f'web entities found: {web_ents}')

web entities found: 10


In [203]:
for e in r['web_detection']['web_entities']:
    print(f'entity: {e["description"] if "description" in e else "?"}, score: {e["score"]}')

entity: Sundae, score: 1.578178882598877
entity: Tim Tam, score: 0.8896999955177307
entity: Liqueur, score: 0.8364270329475403
entity: ?, score: 0.6556000113487244
entity: Arnott's Biscuits, score: 0.6057000160217285
entity: Chocolate, score: 0.5485801696777344
entity: Flavor by Bob Holmes, Jonathan Yen (narrator) (9781515966647), score: 0.5430999994277954
entity: Biscuit, score: 0.5418000221252441
entity: Spain, score: 0.5078999996185303
entity: United Arab Emirates, score: 0.5071499943733215


In [204]:
sim_imgs = len(r['web_detection']['visually_similar_images'])
print(f'visually_similar_images found: {sim_imgs}')

visually_similar_images found: 10


In [205]:
r['web_detection']['visually_similar_images']

[{'url': 'https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=1916596088657356'},
 {'url': 'https://s314.siliconimg.com/kb/content_images/2017/12/13/1496558/1513170699_709.jpg'},
 {'url': 'http://www.uhainiu.com/content/images/thumbs/000/0000678_timtam-187g.jpeg'},
 {'url': 'https://cbu01.alicdn.com/img/ibank/2017/452/396/4547693254_937679173.jpg'},
 {'url': 'http://www.totallytarget.com/wp-content/uploads/2016/02/tim-tam-1.jpg'},
 {'url': 'https://wx1.sinaimg.cn/orj360/006YBhA8gy1fm1dmmcd4aj31he0u0aep.jpg'},
 {'url': 'https://www.campbellsoupcompany.com/wp-content/uploads/sites/31/2013/11/Tim-Tam-Chocolicious.jpg'},
 {'url': 'https://s1.bukalapak.com/img/139947173/large/Biskuit_Tim_Tam_Chocolate_100g_x_3_pcs.jpg'},
 {'url': 'https://media.apnarm.net.au/media/images/2014/02/16/tim_tam_50_years-lre5y1mnzhl8vk6nnh2_fct621x468_ct677x380.JPG'},
 {'url': 'https://www.c-store.com.au/wp-content/uploads/2015/07/Tim-Tam.jpg'}]

In [206]:
r['web_detection']['best_guess_labels']

[{'label': 'tim tam', 'language_code': 'en'}]

### Localized Objects

In [207]:
for o in r['localized_object_annotations']:
    print(f'{o["name"]}, score: {o["score"]}')

Drink, score: 0.5362362265586853
