# Install Google Cloud Vision Client

In [2]:
%pip install --upgrade google-cloud-vision
#%pip install --upgrade google-cloud-speech
#%pip install --upgrade google-cloud-language
#%pip install --upgrade google-cloud-texttospeech

Note: you may need to restart the kernel to use updated packages.


In [54]:
import os
import io
import time
import pandas as pd

# Image Locations Paths

In [55]:
# Path of the images
images_location_original="../data/raw"
images_location_facebook="../data/external"

# Path of the output
images_location_processed="../data/processed/google_API_response"


## Get list of Images Names for our own Images

In [56]:
# Downloading the csv file from your GitHub account
path=f'{images_location_original}/our_images.csv'
df = pd.read_csv(path,index_col=0)

# Extracting the list of images
images_name_list = df["Name"].tolist()

# Extracting the list of hate/noHate
hate_list = df["hate"].tolist()


# Print all images names
print(images_name_list)

['10246724_747008182064474_2873398654888745189_n.png', 'obama-meme.jpg', 'alldone.png', 'brown.png', 'patriots.png', 'pending.png', 'mikeFlynn.png', 'precipice.png', 'DUfydpRW4AA5WYF.jpg', 'DUfigmUXUAIc8G5.jpg', 'DUPUIOkXcAAujr0.jpg', 'DVKz6V5WsAE7dlR.jpg', 'hypocrite2.png', 'isaid.png', 'racistVoting.png', 'bush.png', 'pedowood.png', 'bringPain.png', 'twitterJail.jpg', 'treasonNN.jpg', 'PROSECUTION.jpg', 'faithhealer.jpg', 'PrayingMedic.jpg', 'noFlyNancy.jpg', 'maggie.png', 'internalCivilWar.jpg', 'auschwitz.jpg', '14faef5e6164585c084d65fb66d59c78.jpeg', '6fb33123bd09944b92fba03051ca17c9.jpeg', '4dfaaeb44e80b2eba0dfcc7cea091419.png', 'catwithconeears.gif', 'snowwhitesheadexplodes.gif', 'rowanatkinsononkeyboards.gif', 'arielinspired.gif', 'abesimpsoninandout.gif', 'dogstealsbra.gif', 'stuntgolfer.gif', 'wrapitup.gif', 'camerongangsta.jpg', 'aaaahhhh.gif', 'catrollsoverfallsoff.gif', 'moustachesarecool.gif', 'catattack.gif', 'bitchimfabulous.gif', 'yologangnam.gif', 'shockhorror.gif', '

## Get list of Images Names for FaceBook Images

In [57]:
###########################################
################## TO DO ##################
###########################################

# Downloading the csv file from your GitHub account
path=f'{images_location_facebook}/facebook_images.csv'
df = pd.read_csv(path,index_col=0)

# Extracting the list of images
facebook_images_name_list = df["Name"].tolist()

# Extracting the list of hate/noHate
facebook_hate_list = df["hate"].tolist()


# Print all images names
print(facebook_images_name_list)

# Google Cloud Vision

In [58]:
# For the Google Cloud Vision to work, specify the location of your key

path_to_key="./memeproject-335703-018a67736bbc.json"

## Create the enviroment variable indicating the Keys file

In [59]:
# Imports Credential File:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = path_to_key
print("Service Account Key: {}".format(os.environ["GOOGLE_APPLICATION_CREDENTIALS"]))

Service Account Key: ./memeproject-335703-018a67736bbc.json


## Load the Google Vision Client

In [60]:
from google.cloud import vision

# Instantiates a client
client = vision.ImageAnnotatorClient()

# Auxiliary functions

### Function that gets the text from an image

In [61]:
# Get the Text From an Image
# Img: one image in the Bytes format
# Returns: Json response from Google API
def getTextFromImage(img):

  image = vision.Image(content=img)

  response = client.text_detection(image=image)  # returns TextAnnotation JSON format
  # texts = response.text_annotations # Convert the JSON into a List JSON elements

  return(response)

### Function that gets the objects from an image

In [62]:
# Get the Objects from an Image
# Img: one image in the Bytes format
# Returns: Json response from Google API
def getObjectsFromImage(img):

  image = vision.Image(content=img)

  response = client.object_localization(image=image)  # returns TextAnnotation JSON format
  # texts = response.text_annotations # Convert the JSON into a List JSON elements

  return(response)

### Process all images

Function that read the image

In [63]:
# Open the Image specify by the "path_images" and the "name" of the image. It returns it as a Bytes type
# path_images: path of the folder that contains the image
# name: name of the image that we wish to open
# Returns: Content of the Image in Bytes Format
def open_img(path_images,name):
  # Create URL of Image
  url=f'{path_images}/{name}'
  #print("Image Name: ",url)

  # Open Image
  with io.open(url, 'rb') as image_file:
    content = image_file.read()
        
  return(content)

Function that save the response

In [64]:
# Method that save the response into the corresponding file.
# name_file: name of csv file where the content is saved
# content_names: list of all the names of the images 
# content_hate: list of all the categoriacal casiflication for all the images (0: No hate | 1: Hate)
# content_text: list of all the json responses from the getText API function
# content_objects: list of all the json responses from the getObjects API function
def save_response(name_file,content_names,content_hate,content_text,content_objects):
  #Create the route for the save file
  path_to_file=f'{images_location_processed}/{name_file}.csv' 

  # Create the df
  data = {'Name':content_names,
          'hate':content_hate,
          'text':content_text,
          'object':content_objects}
  df = pd.DataFrame(data)
  
  # Save the dataframe as a csv file
  df.to_csv(path_to_file, index=False)

# Main Loop

In [65]:
# Reiterate over all images an get the text and objects
# origin specify which dataset to use --> If 'our' it will processed our memes. If 'facebook' it will processed facebook memes.
def mainGetTextAndObjectsFromImages(origin):
  # Create Timer to measure time spent
  start = time.time()
  print("Start time: ",start)

  
  # Check if we are using our own dataset or facebook dataset
  dataset=[]
  dataset_hate=[]
  path_open=""
  if(origin=="our"):
    dataset=images_name_list
    dataset_hate=hate_list
    path_open=images_location_original
  elif(origin=="facebook"):
    dataset=facebook_images_name_list
    dataset_hate= facebook_hate_list
    path_open=images_location_facebook
  else:
    print("ERROR: Choose 'our' or 'facebook' dataset")
    return (1)
  
  print("Path: ",path_open)
  #print("Images: ",dataset)
  
  # list to save the responses
  list_text=[]
  list_object=[]
  
  count=1
  # Loop all the Images
  for image_name in dataset:
    # Print iteration and avg time spent
    print_count=50 # Counter, Prints itself each print_count
    if(count % print_count==0):
      print("Iteration: " + str(count) + " | Avg Time/img: " + str((time.time()-start)/count))
    count+=1


    # Open Image
    content=open_img(path_open,image_name)

    # Proccess Image
    # Get Text
    result_text = getTextFromImage(content)
    # Get Objects
    result_objects = getObjectsFromImage(content)

    # Save responses
    list_text.append(result_text)
    list_object.append(result_objects)

    #break
  
  # Save to file
  #print("list text: ",list_text)
  #print("list object: ",list_object)
  save_response(f'{origin}_images_API_response',dataset,dataset_hate,list_text,list_object)
  
  end = time.time()
  print("End time: ",end)
  print("Time elapsed (sec): ",end - start)
  print("Avg Time/img: " + str((end-start)/count))

## Run Everything

In [66]:
# Run the code for "our" own memes
mainGetTextAndObjectsFromImages("our")

Start time:  1640699806.246478
Path:  ../data/raw
Iteration: 20 | Avg Time/img: 3.1469215631484984
Iteration: 40 | Avg Time/img: 2.7817333161830904
Iteration: 60 | Avg Time/img: 2.9542929848035175
Iteration: 80 | Avg Time/img: 2.851920560002327
Iteration: 100 | Avg Time/img: 2.850111794471741
Iteration: 120 | Avg Time/img: 2.831601725021998
Iteration: 140 | Avg Time/img: 2.726032326902662
Iteration: 160 | Avg Time/img: 2.5945176064968107
Iteration: 180 | Avg Time/img: 2.4223732577429877
Iteration: 200 | Avg Time/img: 2.404889520406723
Iteration: 220 | Avg Time/img: 2.280617496100339
Iteration: 240 | Avg Time/img: 2.167714564005534
Iteration: 260 | Avg Time/img: 2.067071215923016
Iteration: 280 | Avg Time/img: 1.9852360308170318
Iteration: 300 | Avg Time/img: 1.9272727926572164
Iteration: 320 | Avg Time/img: 1.8645831286907195
Iteration: 340 | Avg Time/img: 1.8311321097261766
Iteration: 360 | Avg Time/img: 1.780944996409946
Iteration: 380 | Avg Time/img: 1.741428926743959
Iteration: 400

In [None]:
# Run the code for "facebook" own memes
mainGetTextAndObjectsFromImages("facebook")

# Google Cloud Vision Functions:


Text detection: **text_detection()**
https://cloud.google.com/vision/docs/ocr

For documents and handwriten text:  **document_text_detection()**
https://cloud.google.com/vision/docs/handwriting

Detect Faces: **face_detection()** 
https://cloud.google.com/vision/docs/detecting-faces

Get colors: **image_properties()**
https://cloud.google.com/vision/docs/detecting-properties

Detect Tags of an Image: **label_detection()**
https://cloud.google.com/vision/docs/labels


Detect Landmarks: **landmark_detection()**
https://cloud.google.com/vision/docs/detecting-landmarks

Detect Logos: **logo_detection()**
https://cloud.google.com/vision/docs/detecting-logos

Detect Object and position: **object_localization()**
https://cloud.google.com/vision/docs/object-localizer

Detect explicit Content: **safe_search_detection()**
https://cloud.google.com/vision/docs/detecting-safe-search

Detect Web Pages: **web_detection()**
https://cloud.google.com/vision/docs/detecting-web