In [1]:
pip install rembg ultralytics opencv-python numpy pillow onnxruntime

Note: you may need to restart the kernel to use updated packages.


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO

# --- 1) Segment sneaker & find its heel pixel ---
def segment_shoe(path, grabcut_iters=3):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    # sample border for bg color
    top    = img[0:10, :].reshape(-1,3)
    bottom = img[h-10:h, :].reshape(-1,3)
    left   = img[:, 0:10].reshape(-1,3)
    right  = img[:, w-10:w].reshape(-1,3)
    bg     = np.median(np.vstack([top,bottom,left,right]), axis=0)

    # rough mask by color distance
    dist = np.linalg.norm(img.astype(float)-bg[None,None,:], axis=2)
    m0   = (dist>30).astype('uint8')*255

    # refine with GrabCut
    gc   = np.where(m0>0, cv2.GC_PR_FGD, cv2.GC_BGD).astype('uint8')
    bgdM = np.zeros((1,65),np.float64); fgdM = np.zeros((1,65),np.float64)
    cv2.grabCut(img, gc, None, bgdM, fgdM, grabcut_iters, cv2.GC_INIT_WITH_MASK)
    m2 = np.where((gc==cv2.GC_FGD)|(gc==cv2.GC_PR_FGD),255,0).astype('uint8')

    # crop to largest contour
    cnts,_ = cv2.findContours(m2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if cnts:
        x,y,ww,hh = cv2.boundingRect(max(cnts, key=cv2.contourArea))
        img = img[y:y+hh, x:x+ww]
        m2  = m2[y:y+hh, x:x+ww]

    # merge into RGBA
    b,g,r = cv2.split(img)
    shoe_rgba = cv2.merge((b,g,r,m2))

    # find local heel pixel = top‐most mask point
    ys,xs = np.where(m2>0)
    y0 = int(ys.min())
    x0s = xs[ys==y0]
    x0 = int(x0s.mean())
    pivot = (x0, y0)

    return shoe_rgba, pivot

# --- 2) Build foot trapezoid ---
def estimate_toe(ankle, knee, frac=1.0):
    dx,dy = ankle[0]-knee[0], ankle[1]-knee[1]
    return (int(ankle[0]+dx*frac), int(ankle[1]+dy*frac))

def foot_box(ankle, knee, width_frac=0.4):
    toe = estimate_toe(ankle, knee)
    dx,dy = toe[0]-ankle[0], toe[1]-ankle[1]
    L = np.hypot(dx,dy)
    if L<1: return None
    px,py = -dy/L, dx/L
    half = L*width_frac
    return [
        (int(ankle[0]+px*half), int(ankle[1]+py*half)),
        (int(ankle[0]-px*half), int(ankle[1]-py*half)),
        (int(toe[0]-px*half),   int(toe[1]-py*half)),
        (int(toe[0]+px*half),   int(toe[1]+py*half)),
    ]

# --- 3) Lift trapezoid along leg axis ---
def lift_box(box, knee, ankle, lift_frac):
    v = np.array(ankle, dtype=float) - np.array(knee, dtype=float)
    L = np.linalg.norm(v)
    if L<1e-3: 
        return box
    u = v / L
    d = L * lift_frac
    return [ (int(x - u[0]*d), int(y - u[1]*d)) for x,y in box ]

# --- 4) Warp, pivot‐align heel, blend ---
def warp_and_blend(person, shoe_rgba, pivot, box, knee, ankle, lift_frac=0.06):
    H,W = person.shape[:2]
    h_s,w_s = shoe_rgba.shape[:2]

    # 1) lift foot box
    box_l = lift_box(box, knee, ankle, lift_frac)

    # 2) perspective‐warp into lifted trapezoid
    src = np.float32([[0,0],[w_s,0],[w_s,h_s],[0,h_s]])
    dst = np.float32(box_l)
    M   = cv2.getPerspectiveTransform(src, dst)
    warped = cv2.warpPerspective(
        shoe_rgba, M, (W,H),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=(0,0,0,0)
    )

    # 3) pivot‐align the true heel to the detected ankle
    px,py = pivot
    src_pt = np.array([[[px,py]]], dtype=float)
    pt_w = cv2.perspectiveTransform(src_pt, M)[0,0]
    dx = ankle[0] - pt_w[0]
    dy = ankle[1] - pt_w[1]
    T = np.array([[1,0,dx],[0,1,dy]], np.float32)
    warped = cv2.warpAffine(
        warped, T, (W,H),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=(0,0,0,0)
    )

    # 4) alpha‐blend
    alpha = warped[:,:,3:4] / 255.0
    person[:,:,:3] = (person[:,:,:3].astype(float)*(1-alpha)
                      + warped[:,:,:3].astype(float)*alpha)
    return person

# --- 5) Main pipeline ---
def create_virtual_tryon():
    P = "/Users/jonakfir/Downloads/person_image.jpg"
    S = "/Users/jonakfir/Downloads/sneakers.jpg"

    shoe, pivot = segment_shoe(S)
    person = cv2.imread(P)
    model  = YOLO("yolov8n-pose.pt")
    res    = model(person)[0]
    kp     = res.keypoints.data[0]

    # left foot
    lk, la = tuple(map(int, kp[13][:2])), tuple(map(int, kp[15][:2]))
    box_l  = foot_box(la, lk, width_frac=0.45)
    if box_l:
        person = warp_and_blend(person, shoe, pivot, box_l, lk, la, lift_frac=0.06)

    # right foot
    rk, ra = tuple(map(int, kp[14][:2])), tuple(map(int, kp[16][:2]))
    box_r  = foot_box(ra, rk, width_frac=0.45)
    if box_r:
        person = warp_and_blend(person, shoe, pivot, box_r, rk, ra, lift_frac=0.06)

    cv2.imwrite("output_tryon.png", person)
    cv2.imshow("Virtual Try-On", person)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    create_virtual_tryon()



0: 640x448 2 persons, 41.2ms
Speed: 2.3ms preprocess, 41.2ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 448)
