In [1]:
from __future__ import print_function
import os
import argparse
import torch
import torch.backends.cudnn as cudnn
import numpy as np
# from data import cfg_mnet, cfg_re50
from layers.functions.prior_box import PriorBox
from utils.nms.py_cpu_nms import py_cpu_nms
import cv2
from models.retinaface import RetinaFace
from utils.box_utils import decode, decode_landm
from utils.timer import Timer
import matplotlib.pyplot as plt

FIX_AFTER_MODEL = False

def check_keys(model, pretrained_state_dict):
    ckpt_keys = set(pretrained_state_dict.keys())
    model_keys = set(model.state_dict().keys())
    used_pretrained_keys = model_keys & ckpt_keys
    unused_pretrained_keys = ckpt_keys - model_keys
    missing_keys = model_keys - ckpt_keys
    print('Missing keys:{}'.format(len(missing_keys)))
    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
    print('Used keys:{}'.format(len(used_pretrained_keys)))
    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
    return True


def remove_prefix(state_dict, prefix):
    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
    print('remove prefix \'{}\''.format(prefix))
    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
    return {f(key): value for key, value in state_dict.items()}


def load_model(model, pretrained_path, load_to_cpu):
    print('Loading pretrained model from {}'.format(pretrained_path))
    if load_to_cpu:
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
    else:
        device = torch.cuda.current_device()
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
    if "state_dict" in pretrained_dict.keys():
        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model


torch.set_grad_enabled(False)
cfg = {
    'name': 'mobilenet0.25',
    'min_sizes': [[16, 32], [64, 128], [256, 512]],
    'steps': [8, 16, 32],
    'variance': [0.1, 0.2],
    'clip': False,
    'loc_weight': 2.0,
    'gpu_train': True,
    'batch_size': 32,
    'ngpu': 1,
    'epoch': 250,
    'decay1': 190,
    'decay2': 220,
    'image_size': 640,
    'pretrain': False,
    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
    'in_channel': 32,
    'out_channel': 64
}

# net and model
net = RetinaFace(cfg=cfg, phase = 'test')
net = load_model(net, "weights/mobilenet0.25_Final.pth", True)
net.eval()
print('Finished loading model!')
print(net)
cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
net = net.to(device)

def detect_faces(img_raw,vis_thres=0.9):
    if(type(img_raw)==str):
        img_raw = cv2.imread(img_raw, cv2.IMREAD_COLOR)
    img = np.float32(img_raw)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)
    _t = {'forward_pass': Timer(), 'misc': Timer()}
    resize = 1

    _t['forward_pass'].tic()
    loc, conf, landms = net(img)  # forward pass
    _t['forward_pass'].toc()
    _t['misc'].tic()
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                            img.shape[3], img.shape[2]])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > 0.02)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    # order = scores.argsort()[::-1][:args.top_k]
    order = scores.argsort()[::-1]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(dets, 0.4)

    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    # dets = dets[:args.keep_top_k, :]
    # landms = landms[:args.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)
    facess  = []
    for b in dets:
        if b[4] < vis_thres:
            continue
        xs = b[4]
        b = list(map(int, b))
        b.append(xs)
        facess.append(b)

    return facess

import math
import matplotlib.pyplot as plt

def rotate_rectangle(points, angle):
    # Tính tọa độ trung tâm
    center_x = sum(x for x, y in points) / len(points)
    center_y = sum(y for x, y in points) / len(points)

    # Chuyển đổi góc từ độ sang radian
    radian = math.radians(angle)
    
    # Ma trận xoay
    cos_angle = math.cos(radian)
    sin_angle = math.sin(radian)
    
    rotated_points = []
    
    for (x, y) in points:
        # Di chuyển điểm về gốc tọa độ
        x -= center_x
        y -= center_y
        
        # Tính tọa độ mới
        x_new = x * cos_angle - y * sin_angle
        y_new = x * sin_angle + y * cos_angle
        
        # Di chuyển điểm trở lại vị trí cũ
        x_new += center_x
        y_new += center_y
        
        rotated_points.append((x_new, y_new))
    
    return rotated_points



NumANCHOR  2
Loading pretrained model from weights/mobilenet0.25_Final.pth
remove prefix 'module.'
Missing keys:0
Unused checkpoint keys:0
Used keys:300
Finished loading model!
RetinaFace(
  (body): IntermediateLayerGetter(
    (stage1): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.1, inplace=True)
      )
      (1): Sequential(
        (0): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
        (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.1, inplace=True)
        (3): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): LeakyReLU(negative_slope=0.1, 

  pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
  return torch._C._cuda_getDeviceCount() > 0


In [5]:
import cv2
import numpy as np

def combine_images(glasses_path, reflect_path, output_path="combined_image.png"):
    """
    Gộp hai ảnh với độ trong suốt của ảnh đầu tiên (nếu có kênh alpha).
    
    Args:
        glasses_path (str): Đường dẫn tới ảnh kính (glasses_img).
        reflect_path (str): Đường dẫn tới ảnh nền (reflect_img).
        output_path (str): Đường dẫn lưu ảnh kết quả (mặc định: "combined_image.png").
    
    Returns:
        combined_img (numpy.ndarray): Ảnh đã được gộp.
    """
    # Đọc ảnh với kênh alpha
    glasses_img = cv2.imread(glasses_path, cv2.IMREAD_UNCHANGED)
    reflect_img = cv2.imread(reflect_path, cv2.IMREAD_UNCHANGED)

    # Kiểm tra kích thước ảnh và resize nếu cần
    if glasses_img.shape[:2] != reflect_img.shape[:2]:
        reflect_img = cv2.resize(reflect_img, (glasses_img.shape[1], glasses_img.shape[0]))

    # Nếu ảnh reflect_img chỉ có 3 kênh (RGB), thêm kênh alpha
    if reflect_img.shape[2] == 3:
        reflect_img = cv2.cvtColor(reflect_img, cv2.COLOR_BGR2BGRA)

    # Tách kênh alpha của glasses_img
    b, g, r, a = cv2.split(glasses_img)

    # Tạo ảnh overlay từ glasses_img
    overlay = cv2.merge((b, g, r))

    # Chuẩn hóa kênh alpha
    alpha = a / 255.0

    # Áp dụng alpha blending (chỉ trên kênh BGR)
    for c in range(3):  # Xử lý từng kênh (B, G, R)
        reflect_img[:, :, c] = (reflect_img[:, :, c] * (1 - alpha) + overlay[:, :, c] * alpha).astype(np.uint8)

    # Lưu ảnh kết quả
    # cv2.imwrite(output_path, reflect_img)

    return reflect_img


In [7]:
import cv2
import numpy as np

# Đọc hình ảnh kính (nền trong suốt phải là PNG)
glasses_img = cv2.imread("matkinh.png", cv2.IMREAD_UNCHANGED)
reflect_img = cv2.imread("reflect.png", cv2.IMREAD_UNCHANGED)
# glasses_img = combine_images("matkinh.png", "reflect.png")
height_glasses_img, width_glasses_img = glasses_img.shape[:2]
new_height = 3 * height_glasses_img
expanded_img = np.zeros((new_height, width_glasses_img, 4), dtype=np.uint8)
start_y = (new_height - height_glasses_img) // 2
expanded_img[start_y:start_y + height_glasses_img, :, :] = glasses_img
glasses_img = expanded_img
glass_eye_points = np.array([[0.25, 0.45], [0.75, 0.45]])  # normalized
width_glasses_img, height_glasses_img = glasses_img.shape[1], glasses_img.shape[0]
width_eye_glass = int(0.5 * width_glasses_img)

# Hàm thay đổi màu kính
def change_glasses_color(glasses_img, color):
    # Tách các kênh màu R, G, B và alpha (kênh trong suốt)
    b, g, r, a = cv2.split(glasses_img)
    
    # Áp dụng màu mới cho các kênh R, G, B, giữ nguyên alpha
    r = cv2.multiply(r, color[2] / 255)
    g = cv2.multiply(g, color[1] / 255)
    b = cv2.multiply(b, color[0] / 255)
    
    # Gộp lại các kênh đã thay đổi và alpha
    glasses_with_color = cv2.merge([b.astype(np.uint8), g.astype(np.uint8), r.astype(np.uint8), a])
    
    return glasses_with_color

def overlay_glasses(frame, eye_points, glasses_img, glass_eye_points):
    eye_dist = np.linalg.norm(eye_points[1] - eye_points[0])
    glass_width = int(glasses_img.shape[1] * (eye_dist / width_eye_glass))
    glass_height = int(glasses_img.shape[0] * (glass_width / glasses_img.shape[1]))
    resized_glasses = cv2.resize(glasses_img, (glass_width, glass_height), interpolation=cv2.INTER_AREA)
    glass_eye_points_actual = (glass_eye_points * [glass_width, glass_height]).astype(int)
    dx, dy = eye_points[1][0] - eye_points[0][0], eye_points[1][1] - eye_points[0][1]
    angle = -np.degrees(np.arctan2(dy, dx))
    center = (glass_width // 2, glass_height // 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1)
    rotated_glasses = cv2.warpAffine(
        resized_glasses, 
        rotation_matrix, 
        (glass_width, glass_height), 
        flags=cv2.INTER_LINEAR, 
        borderMode=cv2.BORDER_CONSTANT, 
        borderValue=(0, 0, 0, 0)
    )
    glass_eye_points_rotated = cv2.transform(
        np.array([glass_eye_points_actual], dtype=np.float32), rotation_matrix
    )[0].astype(int)
    offset = eye_points - glass_eye_points_rotated
    glasses_top_left = np.min(offset, axis=0).astype(int)
    x1, y1 = glasses_top_left
    x2, y2 = x1 + glass_width, y1 + glass_height
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(frame.shape[1], x2), min(frame.shape[0], y2)
    overlay = rotated_glasses[:y2 - y1, :x2 - x1]
    alpha = overlay[:, :, 3:4] / 255.0
    frame[y1:y2, x1:x2] = (
        alpha * overlay[:, :, :3] + (1 - alpha) * frame[y1:y2, x1:x2]
    ).astype(np.uint8)

    return frame



cap = cv2.VideoCapture(0)  # Mở webcam

current_color = (255, 255, 255)  # Màu mặc định là trắng

while cap.isOpened():
    ret, img_raw = cap.read()
    if not ret:
        break

    data_faces = detect_faces(img_raw)  # Giả sử hàm detect_faces đã trả về thông tin các gương mặt
    for b in data_faces:
        eye_points = np.array([[b[5], b[6]], [b[7], b[8]]])  # Demo vị trí hai mắt
        glasses_img_colored = change_glasses_color(glasses_img, current_color)
        img_raw = overlay_glasses(img_raw, eye_points, glasses_img_colored, glass_eye_points)
    cv2.imshow('WebCam with Glasses', img_raw)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('1'):
        current_color = (255, 0, 0) 
    elif key == ord('2'):
        current_color = (0, 255, 0)  
    elif key == ord('3'):
        current_color = (0, 0, 255)  
    elif key == ord('4'):
        current_color = (255, 255, 0)  
    elif key == ord('5'):
        current_color = (0, 255, 255)  
    elif key == ord('6'):
        current_color = (255, 0, 255)  

cap.release()
cv2.destroyAllWindows()
