In [1]:
!pip install mediapipe

[31mERROR: Could not find a version that satisfies the requirement mediapipe (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for mediapipe[0m[31m
[0m

In [2]:
import cv2
import numpy
import mediapipe as mp
import os
import random
from PIL import Image
import math
import numpy as np


In [3]:
class HandDetector:
    """
    Finds Hands using the mediapipe library. Exports the landmarks
    in pixel format. Adds extra functionalities like finding how
    many fingers are up or the distance between two fingers. Also
    provides bounding box info of the hand found.
    """

    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):
        """
        :param mode: In static mode, detection is done on each image: slower
        :param maxHands: Maximum number of hands to detect
        :param detectionCon: Minimum Detection Confidence Threshold
        :param minTrackCon: Minimum Tracking Confidence Threshold
        """
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.minTrackCon = minTrackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
                                        min_detection_confidence=self.detectionCon,
                                        min_tracking_confidence=self.minTrackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]
        self.fingers = []
        self.lmList = []

    def findHands(self, img, draw=True, flipType=True):
        """
        Finds hands in a BGR image.
        :param img: Image to find the hands in.
        :param draw: Flag to draw the output on the image.
        :return: Image with or without drawings
        """
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)
        allHands = []
        h, w, c = img.shape
        if self.results.multi_hand_landmarks:
            for handType, handLms in zip(self.results.multi_handedness, self.results.multi_hand_landmarks):
                myHand = {}
                ## lmList
                mylmList = []
                xList = []
                yList = []
                for id, lm in enumerate(handLms.landmark):
                    px, py, pz = int(lm.x * w), int(lm.y * h), int(lm.z * w)
                    mylmList.append([px, py, pz])
                    xList.append(px)
                    yList.append(py)

                ## bbox
                xmin, xmax = min(xList), max(xList)
                ymin, ymax = min(yList), max(yList)
                boxW, boxH = xmax - xmin, ymax - ymin
                bbox = xmin, ymin, boxW, boxH
                cx, cy = bbox[0] + (bbox[2] // 2), \
                         bbox[1] + (bbox[3] // 2)

                myHand["lmList"] = mylmList
                myHand["bbox"] = bbox
                myHand["center"] = (cx, cy)

                if flipType:
                    if handType.classification[0].label == "Right":
                        myHand["type"] = "Left"
                    else:
                        myHand["type"] = "Right"
                else:
                    myHand["type"] = handType.classification[0].label
                allHands.append(myHand)

                ## draw
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
                    cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
                                  (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                                  (0, 0, 0), 2)
                    cv2.putText(img, myHand["type"], (bbox[0] - 30, bbox[1] - 30), cv2.FONT_HERSHEY_PLAIN,
                                2, (0, 0, 255), 2)
        if draw:
            return allHands, img
        else:
            return allHands

    def findDistance(self, p1, p2, img=None):
        """
        Find the distance between two landmarks based on their
        index numbers.
        :param p1: Point1
        :param p2: Point2
        :param img: Image to draw on.
        :param draw: Flag to draw the output on the image.
        :return: Distance between the points
                 Image with output drawn
                 Line information
        """

        x1, y1 = p1
        x2, y2 = p2
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
        length = math.hypot(x2 - x1, y2 - y1)
        info = (x1, y1, x2, y2, cx, cy)
        if img is not None:
            cv2.circle(img, (x1, y1), 15, (154, 50, 168), cv2.FILLED)
            cv2.circle(img, (x2, y2), 15, (154, 50, 168), cv2.FILLED)
            cv2.line(img, (x1, y1), (x2, y2), (154, 50, 168), 2)
            cv2.circle(img, (cx, cy), 15, (154, 50, 168), cv2.FILLED)
            return length, info, img
        else:
            return length, info

In [4]:
class DragImg():
    def __init__(self, path, posOrigin, size, imgType):
 
        self.posOrigin = posOrigin
        self.imgType = imgType
        self.path = path
 
        if self.imgType == 'png':
            self.img = cv2.imread(self.path, cv2.IMREAD_UNCHANGED)
        else:
            self.img = cv2.imread(self.path)
 
        self.img = cv2.resize(self.img, (0,0),None,size,size)
 
        self.size = self.img.shape[:2]
        print(self.size)
 
    def update(self, cursor):
        ox, oy = self.posOrigin
        h, w = self.size
 
        # Check if in region
        if ox < cursor[0] < ox + w and oy < cursor[1] < oy + h:
            self.posOrigin = cursor[0] - w // 2, cursor[1] - h // 2

In [5]:
def overlayPNG(imgBack, imgFront, pos=[0, 0]):
    hf, wf, cf = imgFront.shape
    hb, wb, cb = imgBack.shape
    *_, mask = cv2.split(imgFront)
    maskBGRA = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGRA)
    maskBGR = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
    imgRGBA = cv2.bitwise_and(imgFront, maskBGRA)
    imgRGB = cv2.cvtColor(imgRGBA, cv2.COLOR_BGRA2BGR)

    imgMaskFull = np.zeros((hb, wb, cb), np.uint8)
    imgMaskFull[pos[1]:hf + pos[1], pos[0]:wf + pos[0], :] = imgRGB
    imgMaskFull2 = np.ones((hb, wb, cb), np.uint8) * 255
    maskBGRInv = cv2.bitwise_not(maskBGR)
    imgMaskFull2[pos[1]:hf + pos[1], pos[0]:wf + pos[0], :] = maskBGRInv

    imgBack = cv2.bitwise_and(imgBack, imgMaskFull2)
    imgBack = cv2.bitwise_or(imgBack, imgMaskFull)

    return imgBack

In [6]:
path = 'images'
paths = os.listdir(path)

In [9]:
paths.remove('.DS_Store')

ValueError: list.remove(x): x not in list

In [10]:
paths

['sun_bg.png', 'hills_bg.png', 'tree.png', 'sandwich.png', 'beach_bg.png']

In [7]:
images = []
backgrounds = []
for x, img_path in enumerate(paths):
    print(img_path)
    if 'png' in img_path:
        img_type = 'png'
    else:
        img_type = 'jpg'
    if 'bg' in img_path:
        size = 0.2
    else:
        size = 0.1
    images.append(DragImg(f'{path}/{img_path}', [50 + x * 200, 50 + x * 10], size,img_type))

sun_bg.png
(94, 172)
hills_bg.png
(144, 256)
tree.png
(127, 130)
sandwich.png
(86, 97)
beach_bg.png
(140, 172)


In [None]:
cap = cv2.VideoCapture(0)
cap.set(3, 1280)
cap.set(4, 720)
 
# detector = HandDetector(detectionCon=0.8)
detector = HandDetector(detectionCon=0.8, maxHands=2)

while True:
    success, img = cap.read()
    font = cv2.FONT_HERSHEY_SIMPLEX
    
    img = cv2.flip(img, 1)
    hands, img = detector.findHands(img, flipType=False)
    
    rect1 = cv2.rectangle(img, (1200,400), (900, 700), (255, 0, 0), 2)
    print(rect1.shape)
    cv2.putText(img,'Put your objects here',(900,390), font,1,(0, 255, 255), 2, cv2.LINE_4)
    #cv2.putText(img,'Put your objects here',(rect1.shape), font,1,(0, 255, 255), 2, cv2.LINE_4)
    
    rect2 = cv2.rectangle(img, (1200,50), (930, 350), (0, 255, 0), 2)
    cv2.putText(img,'Put your background here',(840, 40), font,1, (0, 255, 255), 2, cv2.LINE_4)
    

    
    if hands:
        lmList = hands[0]['lmList']
#         print(lmList)
#         print(lmList[8][:2])
#         print(lmList[12][:2])
        # Check if clicked
        length, info, img = detector.findDistance(lmList[8][:2], lmList[12][:2], img)
#         print(length)
        if length < 60:
            cursor = lmList[8]
            for img_obj in images:
                img_obj.update(cursor)
#     try:
    foreground, background = None, None
    for imgObject in images:
        print(imgObject)
        # Draw for JPG image
        h, w = imgObject.size
        print(imgObject.size)

        ox, oy = imgObject.posOrigin
        if imgObject.imgType == "png":
            # Draw for PNG Images
            img = overlayPNG(img, imgObject.img, [ox, oy])
        else:
            img[oy:oy + h, ox:ox + w] = imgObject.img

        if 900 <= ox <= 1200 and 400 <= oy <= 700:
#             print('ox,oy',ox,oy)
            
            foreground = imgObject.img
            cv2.imwrite('fg_bg/foreground.png', foreground)
#             fg_bg.remove('.DS_Store')
        if 900 <= ox <= 1200 and 50 <= oy <= 350:
#             print('ox_bg,oy_bg', ox,oy)
            background = imgObject.img
            cv2.imwrite('fg_bg/background.png', background)
#             fg_bg.remove('.DS_Store')
    if background is not None and foreground is not None:
        bg = Image.open('fg_bg/background.png')
        fg = Image.open('fg_bg/foreground.png')
        bg_width, bg_height = bg.size
        bg_width = bg_width + 20
        bg_height = bg_height + 20
        pos_x = random.randint(0,bg_width - 10)
        pos_y = random.randint(0,bg_height - 10)

        fg_resized = fg.resize((int(bg_width/5), int(bg_height/5)), Image.LANCZOS)
        bg.paste(fg_resized,(pos_x,pos_y) , fg_resized)
        bg.save("overlay.png")
        added = cv2.imread('overlay.png',cv2.IMREAD_UNCHANGED)
        added = cv2.resize(added, (500, 300))  
        cv2.imshow('Combined image', added)

#     except:
#         pass

    cv2.imshow("Image", img)
    cv2.waitKey(1)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

(720, 1280, 3)
<__main__.DragImg object at 0x7feaf09e4a30>
(94, 172)
<__main__.DragImg object at 0x7feab0009c40>
(144, 256)
<__main__.DragImg object at 0x7feab0009fd0>
(127, 130)
<__main__.DragImg object at 0x7feab0009ca0>
(86, 97)
<__main__.DragImg object at 0x7feae1382fa0>
(140, 172)
(720, 1280, 3)
<__main__.DragImg object at 0x7feaf09e4a30>
(94, 172)
<__main__.DragImg object at 0x7feab0009c40>
(144, 256)
<__main__.DragImg object at 0x7feab0009fd0>
(127, 130)
<__main__.DragImg object at 0x7feab0009ca0>
(86, 97)
<__main__.DragImg object at 0x7feae1382fa0>
(140, 172)
(720, 1280, 3)
<__main__.DragImg object at 0x7feaf09e4a30>
(94, 172)
<__main__.DragImg object at 0x7feab0009c40>
(144, 256)
<__main__.DragImg object at 0x7feab0009fd0>
(127, 130)
<__main__.DragImg object at 0x7feab0009ca0>
(86, 97)
<__main__.DragImg object at 0x7feae1382fa0>
(140, 172)
(720, 1280, 3)
<__main__.DragImg object at 0x7feaf09e4a30>
(94, 172)
<__main__.DragImg object at 0x7feab0009c40>
(144, 256)
<__main__.DragI