<a href="https://colab.research.google.com/github/alexabruck/speculative-datasets/blob/master/Mouths_of_comedians.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install SPARQLWrapper

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

ENDPOINT_URL = "https://query.wikidata.org/sparql"

def get(query):
    user_agent = "Speculative Datasets (https://github.com/alexabruck/speculative-datasets)"
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(ENDPOINT_URL, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

query = """select distinct ?item ?itemLabel ?itemDescription ?pic where {
    ?item wdt:P31 wd:Q5;  # Any instance of a human.
          wdt:P106 wd:Q245068; #comedian
          wdt:P18 ?image_. #has pic
          BIND(REPLACE(wikibase:decodeUri(STR(?image_)), "http://commons.wikimedia.org/wiki/Special:FilePath/", "") AS ?imageFileName_)
          BIND(REPLACE(?imageFileName_, " ", "_") AS ?imageFileNameSafe_)
          BIND(MD5(?imageFileNameSafe_) AS ?imageFileNameHash_)
          BIND(CONCAT("https://upload.wikimedia.org/wikipedia/commons/thumb/", SUBSTR(?imageFileNameHash_, 1 , 1 ), "/", SUBSTR(?imageFileNameHash_, 1 , 2 ), "/", ?imageFileNameSafe_, "/300px-", ?imageFileNameSafe_) AS ?pic)
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en,nl" }
}"""

response = get(query)
items = response["results"]["bindings"]
pics = [item['pic']['value'] for item in items]
print(pics)

## Download the pics

In [None]:
!mkdir pics

for pic in pics:
  !wget $pic -P pics

## Import Computer Vision libs

In [None]:
!pip install mediapipe
import cv2
import mediapipe as mp
import os

In [None]:
rm -rd results

In [None]:
INPUT_FOLDER = 'pics/'
OUTPUT_FOLDER = 'results/'

!mkdir $OUTPUT_FOLDER

mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh()

#Landmark keypoints according to https://github.com/tensorflow/tfjs-models/blob/master/facemesh/mesh_map.jpg
LANDMARK_MOUTH_LEFT = 57
LANDMARK_MOUTH_RIGHT = 287
LANDMARK_MOUTH_UPPER = 164
LANDMARK_MOUTH_LOWER = 18

def process_image(filename):
  img = cv2.imread(INPUT_FOLDER + filename)
  try:
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = faceMesh.process(imgRGB)
    landmarks = results.multi_face_landmarks[0].landmark
  except:
    return
  mouth_left, mouth_right  = landmarks[LANDMARK_MOUTH_LEFT], landmarks[LANDMARK_MOUTH_RIGHT]
  mouth_top, mouth_bottom  = landmarks[LANDMARK_MOUTH_UPPER], landmarks[LANDMARK_MOUTH_LOWER]

  #NB: Landmarks are between 0 and 1, now calculating the cutout rectangle in real pixel values
  img_h, img_w, _ = img.shape

  rect_top_left = (int(mouth_left.x * img_w), int(mouth_top.y * img_h))
  rect_top_right = (int(mouth_right.x * img_w), int(mouth_top.y * img_h))
  rect_bottom_left = (int(mouth_left.x * img_w), int(mouth_bottom.y * img_h))
  rect_bottom_right = (int(mouth_right.x * img_w), int(mouth_bottom.y * img_h))

  cropped_image = img[rect_top_left[1]:rect_bottom_left[1], rect_top_left[0]:rect_top_right[0]]
  cv2.imwrite(OUTPUT_FOLDER + filename + "_mouth"+  ".jpg", cropped_image)

#For all images in the folder
for filename in os.listdir(INPUT_FOLDER):
  print(filename)
  process_image(filename)




In [None]:
!zip -r results.zip results
from google.colab import files
files.download("results.zip")