<a href="https://colab.research.google.com/github/alexabruck/speculative-datasets/blob/master/Mouths_of_comedians.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install SPARQLWrapper

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

ENDPOINT_URL = "https://query.wikidata.org/sparql"

def get(query):
    user_agent = "Speculative Datasets (https://github.com/alexabruck/speculative-datasets)"
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(ENDPOINT_URL, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

query = """select distinct ?item ?itemLabel ?itemDescription ?pic where {
    ?item wdt:P31 wd:Q5;  # Any instance of a human.
          wdt:P106 wd:Q245068; #comedian
          wdt:P18 ?image_. #has pic
          BIND(REPLACE(wikibase:decodeUri(STR(?image_)), "http://commons.wikimedia.org/wiki/Special:FilePath/", "") AS ?imageFileName_)
          BIND(REPLACE(?imageFileName_, " ", "_") AS ?imageFileNameSafe_)
          BIND(MD5(?imageFileNameSafe_) AS ?imageFileNameHash_)
          BIND(CONCAT("https://upload.wikimedia.org/wikipedia/commons/thumb/", SUBSTR(?imageFileNameHash_, 1 , 1 ), "/", SUBSTR(?imageFileNameHash_, 1 , 2 ), "/", ?imageFileNameSafe_, "/300px-", ?imageFileNameSafe_) AS ?pic)
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en,nl" }
}"""

response = get(query)
items = response["results"]["bindings"]
pics = [item['pic']['value'] for item in items]
print(pics)

['https://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Hugh_Laurie_@_El_Rey_Theatre8.jpg/300px-Hugh_Laurie_@_El_Rey_Theatre8.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/9/9c/Bourvil2.jpg/300px-Bourvil2.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/8/8d/Loriot_by_Philipp_von_Ostau.jpg/300px-Loriot_by_Philipp_von_Ostau.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Ryan_Higa_by_Gage_Skidmore.jpg/300px-Ryan_Higa_by_Gage_Skidmore.jpg', "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Ed_O'Neill_at_2015_PaleyFest.jpg/300px-Ed_O'Neill_at_2015_PaleyFest.jpg", 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/ce/Janeane_Garofalo_October_2006.jpg/300px-Janeane_Garofalo_October_2006.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/07/Kabarett_der_Komiker;_Jacques_Tati_013575a.jpg/300px-Kabarett_der_Komiker;_Jacques_Tati_013575a.jpg', 'https://upload.wikimedia.org/wikipedia/commons/thumb/7/76/Nino_Manfredi,_1990.jpg

## Download the pics

In [None]:
!mkdir pics

for pic in pics:
  !wget $pic -P pics

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
--2021-10-10 09:56:23--  https://upload.wikimedia.org/wikipedia/commons/thumb/2/25/Benji_Lovitt_Headshot.jpg/300px-Benji_Lovitt_Headshot.jpg
Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.154.240, 2620:0:861:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|208.80.154.240|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15748 (15K) [image/jpeg]
Saving to: ‘pics/300px-Benji_Lovitt_Headshot.jpg’


2021-10-10 09:56:23 (6.56 MB/s) - ‘pics/300px-Benji_Lovitt_Headshot.jpg’ saved [15748/15748]

--2021-10-10 09:56:24--  https://upload.wikimedia.org/wikipedia/commons/thumb/f/f2/Biswa_Kalyan_Rath.jpg/300px-Biswa_Kalyan_Rath.jpg
Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.154.240, 2620:0:861:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|208.80.154.240|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21024 (2

## Import Computer Vision libs

In [None]:
!pip install mediapipe
import cv2
import mediapipe as mp
import os



In [None]:
rm -rd results

rm: cannot remove 'results': No such file or directory


In [None]:
INPUT_FOLDER = 'pics/'
OUTPUT_FOLDER = 'results/'

!mkdir $OUTPUT_FOLDER

mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh()

#Landmark keypoints according to https://github.com/tensorflow/tfjs-models/blob/master/facemesh/mesh_map.jpg
LANDMARK_MOUTH_LEFT = 57
LANDMARK_MOUTH_RIGHT = 287
LANDMARK_MOUTH_UPPER = 164
LANDMARK_MOUTH_LOWER = 18

def process_image(filename):
  img = cv2.imread(INPUT_FOLDER + filename)
  try:
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = faceMesh.process(imgRGB)
    landmarks = results.multi_face_landmarks[0].landmark
  except:
    return
  mouth_left, mouth_right  = landmarks[LANDMARK_MOUTH_LEFT], landmarks[LANDMARK_MOUTH_RIGHT]
  mouth_top, mouth_bottom  = landmarks[LANDMARK_MOUTH_UPPER], landmarks[LANDMARK_MOUTH_LOWER]

  #NB: Landmarks are between 0 and 1, now calculating the cutout rectangle in real pixel values
  img_h, img_w, _ = img.shape

  rect_top_left = (int(mouth_left.x * img_w), int(mouth_top.y * img_h))
  rect_top_right = (int(mouth_right.x * img_w), int(mouth_top.y * img_h))
  rect_bottom_left = (int(mouth_left.x * img_w), int(mouth_bottom.y * img_h))
  rect_bottom_right = (int(mouth_right.x * img_w), int(mouth_bottom.y * img_h))

  cropped_image = img[rect_top_left[1]:rect_bottom_left[1], rect_top_left[0]:rect_top_right[0]]
  cv2.imwrite(OUTPUT_FOLDER + filename + "_mouth"+  ".jpg", cropped_image)

#For all images in the folder
for filename in os.listdir(INPUT_FOLDER):
  print(filename)
  process_image(filename)




mkdir: cannot create directory ‘results/’: File exists
300px-Fayez_Al-Malki_in_Rabah_wa_Al_Dctorah.jpg
300px-Kapil-Sharma-and-Ginni-Chatrath’s-wedding-reception.jpg.1
300px-Tal_Friedman_23032007.jpg
300px-Kanpei_Kagami_aka_Togarashi_Nanami_AUG1959_2.jpg
300px-Kausar_Mohammed.jpg
300px-Park_Hee-Jin.jpg
300px-Jo_Koy_1a.jpg
300px-John_Mahoney.jpg
300px-Jerry_Sadowitz_at_the_Greenock_Arts_Guild_cropped.jpg
300px-USIS_-_Ernst_Waldbrunn_1.jpg
300px-Albert_Algoud_ganache.jpg
300px-Pavol_Seriš_2020.jpg
300px-Ian_Comedy1.jpg
300px-Tom_Rosenthal_2013.jpg
300px-Sophisticating_Duker_drink.jpg
300px-Funny_women_final_2014_group.jpg
300px-Manolo_Viera_2.jpg
300px-John_Belushi_at_the_32nd_Annual_Radio_and_Television_Correspondents_Association_Dinner_-_NARA_-_30805929.jpg
300px-Javier_Cansado.jpg
300px-Lorna_Prichard_crop.jpg
300px-2009_CUN_Award_Party_Oscar_Nuñez_058.JPG
300px-Nicholas_Parsons_2007.png
300px-Florian_Hacke.jpg
300px-Tim_Ferguson_DAAS.jpg
300px-Jamie_Campbell.jpg
300px-Гарик_Бирча2.jpg

In [None]:
!zip -r results.zip results
from google.colab import files
files.download("results.zip")

updating: results/ (stored 0%)
updating: results/300px-Matthew_Holness_as_Merriman_Wier.jpg_mouth.jpg (deflated 17%)
updating: results/300px-Pete_Barbutti_1966.JPG_mouth.jpg (deflated 19%)
updating: results/300px-荒井注.jpg_mouth.jpg (deflated 6%)
updating: results/300px-Joan_Rivers_1.jpg_mouth.jpg (deflated 6%)
updating: results/300px-Whitney_Cummings_2019.png_mouth.jpg (deflated 9%)
updating: results/300px-Martin_Lepperød.png_mouth.jpg (deflated 15%)
updating: results/300px-Bernhard_hoecker_20061130.jpg_mouth.jpg (deflated 8%)
updating: results/300px-2012-05-31_Studio_Hamburg_Nachwuchspreis_DSCF0371.jpg_mouth.jpg (deflated 10%)
updating: results/300px-Adolf_Rakowitsch_1892_Vilimek.jpg_mouth.jpg (deflated 12%)
updating: results/300px-Shane_Dawson_by_Gage_Skidmore.jpg_mouth.jpg (deflated 19%)
updating: results/300px-GKpress.jpg_mouth.jpg (deflated 5%)
updating: results/300px-JoePesci-2009.jpg_mouth.jpg (deflated 7%)
updating: results/300px-Simon_Pegg_by_Gage_Skidmore.jpg_mouth.jpg (deflat

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>