In [1]:
from google.cloud import vision
from google.cloud import storage
# Line throws error as it was replaced in v2
# from google.cloud.vision import types 
from google.cloud.vision_v1 import types
import os
import io
import numpy as np
import webcolors
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

# Constants
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="dsci551-2480c-4e478b8d0198.json"
project_id = "dsci551-2480c"
region = "us-east1"
path_to_credentials = "dsci551-2480c-4e478b8d0198.json"
bucket_name = "dsci551_storage"

In [3]:
client = vision.ImageAnnotatorClient()

# Dictionary for correcting and restricting the classes
correction_hashmap = {"One-piece garment" : "Dress",
                      "Day dress" : "Dress",
                      "High heels" : "Heels",
                      "Sandal" : "Heels",
                      "Basic pump" : "Heels",
                      "Dress shirt" : "Shirt",
                      "Footwear" : "Shoe",
                      "Outdoor shoe" : "Shoe", 
                      "Walking shoe" : "Shoe", 
                      "Sneakers" : "Shoe",
                      "Miniskirt" : "Skirt",
                      "Active tank" : "Tank top",
                      "Undershirt" : "Tank top"
    
}
# List of labels label detection api call
required_labels = ["One-piece garment", "Day dress", "Dress", 
                  "High heels", "Sandal", "Basic pump", 
                  "Jeans",
                  "Shirt", "Dress shirt",
                  "Shoe", "Footwear", "Outdoor shoe", "Walking shoe", "Sneakers",
                  "Shorts",
                  "Miniskirt", "Skirt",
                  "Active tank", "Undershirt",
                  "T-shirt",
                  "Coat"]

# List of labels from object localization api call
labels_for_cropping = ["Top",
                      "Dress", "Day dress",
                      "High heels",
                      "Shorts",
                      "Miniskirt",
                      "Coat",
                      "Shoe"]
img_name = "pic_for_clothing_detection.png"

cam = cv2.VideoCapture(0)
width  = cam.get(cv2.CAP_PROP_FRAME_WIDTH)   # float `width`
height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float `height`
cam.set(cv2.CAP_PROP_AUTO_EXPOSURE, 3)
# Setting exposure to fixed value
#cam.set(cv2.CAP_PROP_EXPOSURE, 1) 
cv2.namedWindow("Capture Image of Clothing Item")
pred_clothing_type = [""]
x1,x2,x3,x4,y1,y2,y3,y4 = 5,5,5,5,5,5,5,5

while True:
    ret, frame = cam.read()
    cv2.putText(frame, "Type = " + ", ".join(pred_clothing_type), 
                (220, 30), cv2.FONT_HERSHEY_SIMPLEX, 
                1, (0, 255, 255))
    if not ret:
        print("failed to grab frame")
        break
    cv2.imshow("Capture Image of Clothing Item", frame)
    
    k = cv2.waitKey(1)
    if k%256 == 27:
        # ESC pressed
        print("Escape hit, closing...")
        break
    elif k%256 == 32:
        # SPACE pressed
        cv2.imwrite(img_name, frame)
        print("Picture Captured")
        # Read saved image for vision api call
        with io.open(img_name, 'rb') as image_file:
            content = image_file.read()
        image = types.Image(content=content)
        # Generate labels and (labels + crop hints)
        all_labels = client.label_detection(image=image)
        labels_and_crops = client.object_localization(image=image).localized_object_annotations
        temp_labels_from_crop_list = [labels_and_crops[i].name for i in range(len(labels_and_crops))] 
        temp_label_list = [all_labels.label_annotations[i].description for i in range(len(all_labels.label_annotations))]
        # Combine only the labels from both the sets
        combined_labels = temp_labels_from_crop_list + temp_label_list
        # Correcting labels manually
        for dup in list(correction_hashmap.keys()):
            if(dup in combined_labels):
                combined_labels[combined_labels.index(dup)] = correction_hashmap[dup]
        pred_clothing_type = list(np.intersect1d(combined_labels, required_labels))
        if(len(pred_clothing_type) == 0):
            pred_clothing_type = ["Nothing Detected"]
        else:
            for detection in labels_and_crops:
                if(detection.name in labels_for_cropping):
                    print(detection.name)
                    print(detection.bounding_poly)
                    x1 = int(detection.bounding_poly.normalized_vertices[0].x * width)
                    y1 = int(detection.bounding_poly.normalized_vertices[0].y * height)
                    x2 = int(detection.bounding_poly.normalized_vertices[1].x * width)
                    y2 = int(detection.bounding_poly.normalized_vertices[1].y * height)
                    x3 = int(detection.bounding_poly.normalized_vertices[2].x * width)
                    y3 = int(detection.bounding_poly.normalized_vertices[2].y * height)
                    x4 = int(detection.bounding_poly.normalized_vertices[3].x * width)
                    y4 = int(detection.bounding_poly.normalized_vertices[3].y * height)
        cv2.imshow("Cropped Image", frame[y1 : y3, x1 : x3])
            
            
        print(combined_labels)
        print(pred_clothing_type, "\n\n")
cam.release()

cv2.destroyAllWindows()

Picture Captured
Top
normalized_vertices {
  x: 0.16308198869228363
  y: 0.18126101791858673
}
normalized_vertices {
  x: 0.4218633770942688
  y: 0.18126101791858673
}
normalized_vertices {
  x: 0.4218633770942688
  y: 0.6214043498039246
}
normalized_vertices {
  x: 0.16308198869228363
  y: 0.6214043498039246
}

Top
normalized_vertices {
  x: 0.6470707654953003
  y: 0.541446328163147
}
normalized_vertices {
  x: 0.9973958134651184
  y: 0.541446328163147
}
normalized_vertices {
  x: 0.9973958134651184
  y: 0.9972038269042969
}
normalized_vertices {
  x: 0.6470707654953003
  y: 0.9972038269042969
}

['Person', 'Jeans', 'Person', 'Top', 'Top', 'Luggage & bags', 'Glasses', 'Human', 'Fashion', 'Street fashion', 'Standing', 'Eyewear', 'Gesture', 'T-shirt', 'Waist', 'Travel', 'Fun']
['Jeans', 'T-shirt'] 


Escape hit, closing...


# Experimentation

In [4]:
print(height, width)

480.0 640.0
