<a href="https://colab.research.google.com/github/alexabruck/speculative-datasets/blob/master/speculative_datasets_mouths_of_comedians.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install SPARQLWrapper

In [15]:
from SPARQLWrapper import SPARQLWrapper, JSON

ENDPOINT_URL = "https://query.wikidata.org/sparql"

def get(query):
    user_agent = "Speculative Datasets (https://github.com/alexabruck/speculative-datasets)"
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(ENDPOINT_URL, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

query = """select distinct ?item ?itemLabel ?itemDescription ?pic where {
    ?item wdt:P31 wd:Q5;  # Any instance of a human.
          wdt:P106 wd:Q245068; #comedian
          wdt:P18 ?image_. #has pic
          BIND(REPLACE(wikibase:decodeUri(STR(?image_)), "http://commons.wikimedia.org/wiki/Special:FilePath/", "") AS ?imageFileName_)
          BIND(REPLACE(?imageFileName_, " ", "_") AS ?imageFileNameSafe_)
          BIND(MD5(?imageFileNameSafe_) AS ?imageFileNameHash_)
          BIND(CONCAT("https://upload.wikimedia.org/wikipedia/commons/thumb/", SUBSTR(?imageFileNameHash_, 1 , 1 ), "/", SUBSTR(?imageFileNameHash_, 1 , 2 ), "/", ?imageFileNameSafe_, "/300px-", ?imageFileNameSafe_) AS ?pic)
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en,nl" }
}"""

response = get(query)
items = response["results"]["bindings"]
pics = [item['pic']['value'] for item in items]
print(pics)

['https://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Hugh_Laurie_@_El_Rey_Theatre8.jpg/300px-Hugh_Laurie_@_El_Rey_Theatre8.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/9/9c/Bourvil2.jpg/300px-Bourvil2.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/8/8d/Loriot_by_Philipp_von_Ostau.jpg/300px-Loriot_by_Philipp_von_Ostau.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Ryan_Higa_by_Gage_Skidmore.jpg/300px-Ryan_Higa_by_Gage_Skidmore.jpg', "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Ed_O'Neill_at_2015_PaleyFest.jpg/300px-Ed_O'Neill_at_2015_PaleyFest.jpg", 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/ce/Janeane_Garofalo_October_2006.jpg/300px-Janeane_Garofalo_October_2006.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/07/Kabarett_der_Komiker;_Jacques_Tati_013575a.jpg/300px-Kabarett_der_Komiker;_Jacques_Tati_013575a.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/7/76/Nino_Manfredi,_1990.jpg

## Download the pics

In [None]:
!mkdir pics

for pic in pics:
  !wget $pic -P pics

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

--2021-10-10 09:50:56--  https://upload.wikimedia.org/wikipedia/commons/thumb/2/2c/Soran_Ismail_in_Jan_2013.jpg/300px-Soran_Ismail_in_Jan_2013.jpg
Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.154.240, 2620:0:861:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|208.80.154.240|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 32249 (31K) [image/jpeg]
Saving to: ‘pics/300px-Soran_Ismail_in_Jan_2013.jpg’


2021-10-10 09:50:56 (2.71 MB/s) - ‘pics/300px-Soran_Ismail_in_Jan_2013.jpg’ saved [32249/32249]

/bin/bash: _Kimderella_movie_premiere.jpg: command not found
/bin/bash: _Kimderella_movie_premiere.jpg/300px-Gina_Riley_at_Kath_: No such file or directory
--2021-10-10 09:50:56--  https://upload.wikimedia.org/wikipedia/commons/thumb/4/46/Gina_Riley_at_Kath_
Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.154.240, 2620:0:861:ed1a::2:b
Connecting to up

## Import Computer Vision libs

In [63]:
!pip install mediapipe
import cv2
import mediapipe as mp
import os



In [79]:
rm -rd results

In [78]:
INPUT_FOLDER = 'pics/'
OUTPUT_FOLDER = 'results/'

!mkdir $OUTPUT_FOLDER

mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh()

#Landmark keypoints according to https://github.com/tensorflow/tfjs-models/blob/master/facemesh/mesh_map.jpg
LANDMARK_MOUTH_LEFT = 57
LANDMARK_MOUTH_RIGHT = 287
LANDMARK_MOUTH_UPPER = 164
LANDMARK_MOUTH_LOWER = 18

def process_image(filename):
  img = cv2.imread(INPUT_FOLDER + filename)
  imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  results = faceMesh.process(imgRGB)
  try:
    landmarks = results.multi_face_landmarks[0].landmark
  except:
    return
  mouth_left, mouth_right  = landmarks[LANDMARK_MOUTH_LEFT], landmarks[LANDMARK_MOUTH_RIGHT]
  mouth_top, mouth_bottom  = landmarks[LANDMARK_MOUTH_UPPER], landmarks[LANDMARK_MOUTH_LOWER]

  #NB: Landmarks are between 0 and 1, now calculating the cutout rectangle in real pixel values
  img_h, img_w, _ = img.shape

  rect_top_left = (int(mouth_left.x * img_w), int(mouth_top.y * img_h))
  rect_top_right = (int(mouth_right.x * img_w), int(mouth_top.y * img_h))
  rect_bottom_left = (int(mouth_left.x * img_w), int(mouth_bottom.y * img_h))
  rect_bottom_right = (int(mouth_right.x * img_w), int(mouth_bottom.y * img_h))

  cropped_image = img[rect_top_left[1]:rect_bottom_left[1], rect_top_left[0]:rect_top_right[0]]
  cv2.imwrite(OUTPUT_FOLDER + filename + "_mouth"+  ".jpg", cropped_image)

#For all images in the folder
for filename in os.listdir(INPUT_FOLDER):
  print(filename)
  process_image(filename)




mkdir: cannot create directory ‘results/’: File exists
300px-Janeane_Garofalo_October_2006.jpg
300px-Hugh_Laurie_@_El_Rey_Theatre8.jpg
300px-Ryan_Higa_by_Gage_Skidmore.jpg
300px-노홍철.JPG
300px-Loriot_by_Philipp_von_Ostau.jpg
300px-Jackie_Chan_July_2016.jpg
300px-Nino_Manfredi,_1990.jpg
300px-Bourvil2.jpg
300px-Bourvil2.jpg_mouth.jpg
