# Welcome to the Ai/ML Meetup - Friday, April 19th, 2019

## I am a fellow Data Scientist & Python Developer in Building #600

# My talk today is focused on how we can use the various ML tools available to us to build more complex and insightful models.
# I will be using the tools developed by Google based on a talk I saw last weekend at PyTexas

In [1]:
import sys
sys.version

'3.6.7 |Anaconda, Inc.| (default, Oct 23 2018, 14:01:38) \n[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]'

# Import Dependencies/Packages

In [15]:
import os
import io
import base64
import json
from google.cloud import pubsub_v1
from google.cloud import storage
from google.cloud import translate
from google.cloud import vision
from google.cloud import language
from google.cloud.vision import types as v_types
from google.cloud.language import enums
from google.cloud.language import types

## To use Google's ML API's, you will need an account and credentials.

In [16]:
CURRENT_DIR = os.getcwd()
GOOGLE_CRED_FILE = "image-to-sentiment-9462fc808e0f.json"
GOOGLE_CRED_FILE_PATH = os.path.join(CURRENT_DIR, GOOGLE_CRED_FILE)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_CRED_FILE_PATH

In [17]:
## Instantiate Necessary Google Vision Objects

In [18]:
vision_client = vision.ImageAnnotatorClient()
translate_client = translate.Client()
publisher = pubsub_v1.PublisherClient()
storage_client = storage.Client()
language_client = language.LanguageServiceClient()

with open('config.json') as f:
    data = f.read()
config = json.loads(data)

## Let's start with a basic image. I won't tell you nor Google what it is. Let's call it "something".

In [19]:
some_image = os.path.join(CURRENT_DIR, "something.jpg")

In [20]:
# The name of the image file to annotate
file_name = some_image

# Load the image into memory
with io.open(file_name, 'rb') as image_file:
    content = image_file.read()

image = v_types.Image(content=content)

# Performs label detection on the image file
response = vision_client.label_detection(image=image)
labels = response.label_annotations

print('Labels:')
for label in labels:
    print(label.description)

Labels:
Cat
Mammal
Small to medium-sized cats
Felidae
Whiskers
Carnivore
Aegean cat
Domestic short-haired cat
American wirehair
Kitten


## Google Vision is telling us that the image is a cat. Well Let's see how it did?

In [21]:
%%HTML
<img src="something.jpg" width="400"/>

## Great Job Google! Well let's try another picture...

In [22]:
some_image2 = os.path.join(CURRENT_DIR, "some_image_2.jpg")

In [24]:
file_name = some_image2

with io.open(file_name, 'rb') as image_file:
    content = image_file.read()
    image = v_types.Image(content=content)
    
response = vision_client.label_detection(image=image)
labels = response.label_annotations

print('Labels:')
for label in labels:
    print(label.description)

Labels:
Dog
Mammal
Vertebrate
Canidae
Dog breed
Morkie
Schnoodle
Carnivore
Puppy
Companion dog


## Google Vision is telling us that the image is a dog. Well is it?

In [25]:
%%HTML
<img src="some_image_2.jpg" width="600" style="transform:rotate(90deg);"/>


## Well Google was right again...

## Hmm, how good is Google vision on reading images with text...?

In [26]:
text_image = os.path.join(CURRENT_DIR, "text_image_1.jpg")

In [27]:
def detect_document(path):
    """Detects document features in an image."""
    from google.cloud import vision
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.document_text_detection(image=image)

    for page in response.full_text_annotation.pages:
        for block in page.blocks:
            print('\nBlock confidence: {}\n'.format(block.confidence))

            for paragraph in block.paragraphs:
                print('Paragraph confidence: {}'.format(
                    paragraph.confidence))

                for word in paragraph.words:
                    word_text = ''.join([
                        symbol.text for symbol in word.symbols
                    ])
                    print('Word text: {} (confidence: {})'.format(
                        word_text, word.confidence))

                    for symbol in word.symbols:
                        print('\tSymbol: {} (confidence: {})'.format(
                            symbol.text, symbol.confidence))

In [28]:
detect_document(text_image)


Block confidence: 0.9900000095367432

Paragraph confidence: 0.9900000095367432
Word text: WELCOME (confidence: 0.9900000095367432)
	Symbol: W (confidence: 0.9900000095367432)
	Symbol: E (confidence: 0.9900000095367432)
	Symbol: L (confidence: 0.9900000095367432)
	Symbol: C (confidence: 1.0)
	Symbol: O (confidence: 1.0)
	Symbol: M (confidence: 1.0)
	Symbol: E (confidence: 1.0)

Block confidence: 0.9800000190734863

Paragraph confidence: 0.9800000190734863
Word text: ORDER (confidence: 0.9900000095367432)
	Symbol: O (confidence: 0.9900000095367432)
	Symbol: R (confidence: 0.9900000095367432)
	Symbol: D (confidence: 0.9900000095367432)
	Symbol: E (confidence: 0.9900000095367432)
	Symbol: R (confidence: 1.0)
Word text: # (confidence: 0.9900000095367432)
	Symbol: # (confidence: 0.9900000095367432)
Word text: 064 (confidence: 0.9700000286102295)
	Symbol: 0 (confidence: 0.9700000286102295)
	Symbol: 6 (confidence: 0.9800000190734863)
	Symbol: 4 (confidence: 0.9700000286102295)

Block confiden

## Well I saw a lot of 99% confidence, it seems Google Vision is pretty confident of it's abilities. Let's see this again but with just the text


In [29]:
def detect_document_text(path):
    """Detects document features in an image."""
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.document_text_detection(image=image)

    for page in response.full_text_annotation.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    word_text = ''.join([
                        symbol.text for symbol in word.symbols
                    ])
                    print('Word text: {})'.format(
                        word_text))

In [30]:
detect_document_text(text_image)

Word text: WELCOME)
Word text: ORDER)
Word text: #)
Word text: 064)
Word text: Location)
Word text: :)
Word text: VZNIRVTX)
Word text: Device)
Word text: :)
Word text: POS1)
Word text: Origin)
Word text: :)
Word text: POS)
Word text: Order)
Word text: ID)
Word text: :)
Word text: 293073)
Word text: Cashier)
Word text: :)
Word text: Veronica)
Word text: Date)
Word text: :)
Word text: 04)
Word text: /)
Word text: 17)
Word text: /)
Word text: 2019)
Word text: Time)
Word text: :)
Word text: 12)
Word text: :)
Word text: 22)
Word text: PM)
Word text: Dine)
Word text: In)
Word text: EREDE)
Word text: 1)
Word text: Chicken)
Word text: $)
Word text: 7)
Word text: .)
Word text: 52)
Word text: 1)
Word text: Add)
Word text: Quacaniole)
Word text: $)
Word text: 0)
Word text: .)
Word text: 66)
Word text: =)
Word text: =)
Word text: =)
Word text: =)
Word text: =)
Word text: =)
Word text: =)
Word text: =)
Word text: Sub)
Word text: -)
Word text: Total)
Word text: $)
Word text: 8)
Word text: .)
Word te

## Can anyone guess what it was?

In [31]:
%%HTML
<img src="text_image_1.jpg" width="600" style="transform:rotate(90deg);"/>

## Wow, Google Vision got it right again!

# Okay, Google is too smart for this simple ML stuff. Let's really try to trick it now!

In [32]:
text_image_2 = os.path.join(CURRENT_DIR, "IMG_0144.jpg")

In [33]:
%%HTML
<img src="IMG_0144.jpg" width="600"/>

In [34]:
client = vision.ImageAnnotatorClient()

with io.open(text_image_2, 'rb') as image_file:
    content = image_file.read()

image = vision.types.Image(content=content)

response = client.document_text_detection(image=image)
document = response.full_text_annotation

In [35]:
for page in response.full_text_annotation.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    word_text = ''.join([
                        symbol.text for symbol in word.symbols
                    ])
                    en_word = translate_client.translate(word_text, target_language="en")
                    en_word = en_word['translatedText']
                    print('Word text: {} = {}'.format(
                        word_text, en_word))

Word text: jax = jax
Word text: tryggði = secured
Word text: sér = a
Word text: sæti = seat
Word text: í = in
Word text: undanúrslitum = semi-final
Word text: Meistaradeildar = Masters
Word text: Evrópu = Europe
Word text: í = in
Word text: kvöld = evening
Word text: með = with
Word text: 2 = 2
Word text: - = -
Word text: 1 = 1
Word text: sigri = victory
Word text: á = on
Word text: Juventus = Juventus
Word text: . = .
Word text: Þetta = this
Word text: er = is
Word text: í = in
Word text: fyrsta = first
Word text: sinn = his
Word text: síðan = ago
Word text: 1997 = 1997
Word text: sem = the
Word text: Ajax = Ajax
Word text: kemst = finds
Word text: í = in
Word text: undanúrslitin = semifinals
Word text: . = .
Word text: Það = it
Word text: var = where
Word text: þriðja = third
Word text: árið = the year
Word text: í = in
Word text: röð = series
Word text: sem = the
Word text: Ajax = Ajax
Word text: komst = found
Word text: í = in
Word text: undanúrslitin = semifinals
Word text: og = a

In [36]:
list_of_words = []
for page in response.full_text_annotation.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    word_text = ''.join([
                        symbol.text for symbol in word.symbols
                    ])
                    en_word = translate_client.translate(word_text, target_language="en")
                    en_word = en_word['translatedText']
                    list_of_words.append(en_word)
translated_text = ' '.join(list_of_words)
print(translated_text)

jax secured a seat in semi-final Masters Europe in evening with 2 - 1 victory on Juventus . this is in first his ago 1997 the Ajax finds in semifinals . it where third the year in series the Ajax found in semifinals and other the year in series the Juventus batt own on dream Ajax one European title . in because items were players on table the Edwin from the Sar , Danny Blind , of farmer brothers , Frank and Ronald , Marc Overmars and Finger Litmanen the Board Louis from Gaal . ago then had Ajax only one his found in eight items finals , for sixteen years later there to in price when Ajax struck out Real Madrid in 16 - items finals . Ajax followed because by with because to beat out Juventus in evening with 2 - 1 victory on Juventus by to previous game teams onion with 1 - 1 draw in Amsterdam . uncertain is whether Ajax meet Tottenham or Manchester City in semi-final in it comes in light other evening when teams meet on Etihad - court . NOTES


## Okay, so Google can translate a picture into text, then translate that text to English! 
## Now that is cool!

https://www.frettabladid.is/sport/fyrsta-sinn-i-22-ar-sem-ajax-fer-i-undanurslitin/

## But wait, can Google tells us the sentiment of this article?

In [37]:
document = types.Document(content=translated_text, type=enums.Document.Type.PLAIN_TEXT)
annotations = language_client.analyze_sentiment(document=document)

In [38]:
def print_result(annotations):
    score = annotations.document_sentiment.score
    magnitude = annotations.document_sentiment.magnitude

    for index, sentence in enumerate(annotations.sentences):
        sentence_sentiment = sentence.sentiment.score
        print('Sentence {} has a sentiment score of {}'.format(
            index, sentence_sentiment))

    print('Overall Sentiment: score of {} with magnitude of {}'.format(
        score, magnitude))

In [39]:
print_result(annotations=annotations)

Sentence 0 has a sentiment score of 0.20000000298023224
Sentence 1 has a sentiment score of 0.0
Sentence 2 has a sentiment score of 0.6000000238418579
Sentence 3 has a sentiment score of 0.20000000298023224
Sentence 4 has a sentiment score of 0.0
Sentence 5 has a sentiment score of 0.0
Sentence 6 has a sentiment score of 0.0
Sentence 7 has a sentiment score of 0.10000000149011612
Overall Sentiment: score of 0.10000000149011612 with magnitude of 1.5


## Hopefully you got inspired with some use cases working with ML

## Thank you